@@ -19,37 +19,35 @@ class CoTraining:
1919 Add multinomial functions and unit tests.
2020 Add functionality for regression(?)
2121 Inputs:
22- params: dictionary of logistic regression input functions.
23- keys max_iter, tol, and C supported.
22+ kwargs: logistic regression input functions.
23+ keys random_state, max_iter, tol, and C supported.
2424 random_state: int/float for reproducible intiailization.
2525 '''
2626
2727 # only binary so far
28- def __init__ (self , params = None , random_state = 0 ):
28+ def __init__ (self , ** kwargs ):
29+ # supported keys = ['max_iter', 'tol', 'C', 'random_state']
2930 # defaults to a fixed value for reproducibility
30- self .random_state = random_state
31- # dictionary of parameters for logistic regression model
32- self .params = params
33- if self .params is None :
34- self .model1 = linear_model .LogisticRegression (
35- random_state = self .random_state )
36- self .model2 = linear_model .LogisticRegression (
37- random_state = self .random_state )
38- # default needed for training
39- self .params = {'n_samples' : 1 }
40- else :
41- self .model1 = linear_model .LogisticRegression (
42- random_state = self .random_state ,
43- max_iter = params ['max_iter' ],
44- tol = params ['tol' ],
45- C = params ['C' ]
46- )
47- self .model2 = linear_model .LogisticRegression (
48- random_state = self .random_state ,
49- max_iter = params ['max_iter' ],
50- tol = params ['tol' ],
51- C = params ['C' ]
52- )
31+ self .random_state = kwargs .pop ('random_state' , 0 )
32+ self .seed = kwargs .pop ('seed' , 0 )
33+ # parameters for cotraining logistic regression models:
34+ # defaults to sklearn.linear_model.LogisticRegression default vals
35+ self .max_iter = kwargs .pop ('max_iter' , 100 )
36+ self .tol = kwargs .pop ('tol' , 0.0001 )
37+ self .C = kwargs .pop ('C' , 1.0 )
38+ self .n_samples = kwargs .pop ('n_samples' , 1 )
39+ self .model1 = linear_model .LogisticRegression (
40+ random_state = self .random_state ,
41+ max_iter = self .max_iter ,
42+ tol = self .tol ,
43+ C = self .C
44+ )
45+ self .model2 = linear_model .LogisticRegression (
46+ random_state = self .random_state ,
47+ max_iter = self .max_iter ,
48+ tol = self .tol ,
49+ C = self .C
50+ )
5351
5452 def training_loop (self , slr1 , slr2 , L_lr1 , L_lr2 ,
5553 Ly_lr1 , Ly_lr2 , U_lr , n_samples ,
@@ -155,7 +153,7 @@ def fresh_start(self, params, data_dict):
155153 # unlabeled co-training data
156154 Ux = data_dict ['Ux' ]
157155
158- clf = CoTraining (params = params , random_state = self .random_state )
156+ clf = CoTraining (** params , random_state = self .random_state )
159157 # training and testing
160158 model1_accs , model2_accs = clf .train (trainx , trainy , Ux , testx , testy )
161159 # uses balanced_accuracy accounts for class imbalanced data
@@ -239,10 +237,7 @@ def train(self, trainx, trainy, Ux,
239237 U_lr = Ux .copy ()
240238
241239 # set the random seed of training splits for reproducibility
242- # This can be ignored by excluding params['seed']
243- # in the hyperopt space dictionary
244- if 'seed' in self .params .keys ():
245- np .random .seed (self .params ['seed' ])
240+ np .random .seed (self .seed )
246241
247242 # TODO: allow a user to specify uneven splits between the two models
248243 split_frac = 0.5
@@ -262,7 +257,7 @@ def train(self, trainx, trainy, Ux,
262257 self .model1 , self .model2 ,
263258 L_lr1 , L_lr2 ,
264259 Ly_lr1 , Ly_lr2 ,
265- U_lr , self .params [ ' n_samples' ] ,
260+ U_lr , self .n_samples ,
266261 testx , testy ,
267262 )
268263
0 commit comments