20
20
logging .getLogger ().setLevel (logging .INFO )
21
21
22
22
23
- def train (model_name : str , dataset : str ) -> None :
23
+ def train (dataset : str , model_name : str = "lr" ) -> None :
24
24
"""
25
25
Train models using X_train and y_train with a specific classifier.
26
26
@@ -46,25 +46,24 @@ def train(model_name: str, dataset: str) -> None:
46
46
47
47
# preprocessing
48
48
scaler = RobustScaler ()
49
- X = scaler .fit_transform (X )
50
- rus = RandomUnderSampler (replacement = False )
51
- X , y = rus .fit_resample (X , y )
52
49
53
50
# In this specific example logistic regression was chosen as
54
51
# the most optimal model after running several experiments.
55
52
classifier = LogisticRegression (max_iter = 4000 , penalty = "l2" , C = 0.01 )
56
53
54
+ # create pipeline
55
+ predict_pipeline = make_pipeline (scaler , classifier )
56
+
57
57
# training
58
- classifier .fit (X , y )
59
- training_score = cross_val_score (classifier , X , y , cv = 5 , scoring = "roc_auc" )
60
- logger .info (f"Classifier: { classifier .__class__ .__name__ } " )
58
+ predict_pipeline .fit (X , y )
59
+ training_score = cross_val_score (predict_pipeline , X , y , cv = 5 , scoring = "roc_auc" )
60
+ logger .info (f"Classifier: { predict_pipeline .__class__ .__name__ } " )
61
61
logger .info (
62
62
"Has a training score "
63
63
+ f"of { round (training_score .mean (), 2 ) * 100 } % roc_auc"
64
64
)
65
65
66
66
# saving
67
- predict_pipeline = make_pipeline (scaler , classifier )
68
67
pred_result = {
69
68
"clf" : model_name ,
70
69
"training score roc_auc" : training_score .mean (),
0 commit comments