@@ -12,6 +12,17 @@ def __init__(
12
12
lgb_config : Dict [str , Union [str , int ]] = lgb_config ,
13
13
xgb_config : Dict [str , Union [str , int ]] = xgb_config ,
14
14
) -> None :
15
+ """Initialize the Evaluation class.
16
+
17
+ Args:
18
+ _data: A dictionary containing the data.
19
+ all_possible_variations: A dictionary containing all possible variations.
20
+ labels: An array containing the labels.
21
+ metric: The evaluation metric to use (default: "accuracy").
22
+ sklearn_config: A dictionary containing the sklearn configuration.
23
+ lgb_config: A dictionary containing the lightgbm configuration.
24
+ xgb_config: A dictionary containing the xgboost configuration.
25
+ """
15
26
self .sklearn_config = sklearn_config
16
27
self .lgb_config = lgb_config
17
28
self .xgb_config = xgb_config
@@ -30,9 +41,22 @@ def sklearn(
30
41
results : Dict = {},
31
42
dimred_technique : str = None ,
32
43
) -> Tuple [Dict [str , Union [str , int ]], Dict [str , int ]]:
44
+ """Perform evaluation using sklearn models.
45
+
46
+ Args:
47
+ X_train: The training data.
48
+ X_test: The testing data.
49
+ y_train: The training labels.
50
+ y_test: The testing labels.
51
+ inner_iterator: The inner iterator.
52
+ results: A dictionary to store the results.
53
+ dimred_technique: The dimensionality reduction technique.
54
+
55
+ Returns:
56
+ A tuple containing the results and the best model.
57
+ """
33
58
best_model = [0 , {}]
34
59
for model in tqdm (self .sklearn_config ):
35
- print (model )
36
60
name = dimred_technique + model ().__class__ .__name__
37
61
inner_iterator .set_description (name )
38
62
model_config = self .sklearn_config [model ]
@@ -56,18 +80,18 @@ def sklearn(
56
80
metrics = classification_report (y_test , y_preds , output_dict = True )
57
81
results [model .__class__ .__name__ ] = metrics
58
82
wandb .log (metrics )
59
- wandb .sklearn .plot_classifier (
60
- model ,
61
- X_train ,
62
- X_test ,
63
- y_train ,
64
- y_test ,
65
- y_preds ,
66
- y_probas ,
67
- range (min (y_probas .shape )),
68
- model_name = name ,
69
- feature_names = None ,
70
- )
83
+ # wandb.sklearn.plot_classifier(
84
+ # model,
85
+ # X_train,
86
+ # X_test,
87
+ # y_train,
88
+ # y_test,
89
+ # y_preds,
90
+ # y_probas,
91
+ # range(min(y_probas.shape)),
92
+ # model_name=name,
93
+ # feature_names=None,
94
+ # )
71
95
if metrics [self .metric ] > best_model [0 ]:
72
96
best_model [0 ] = metrics [self .metric ]
73
97
best_model [1 ] = metrics
@@ -85,6 +109,19 @@ def xgb(
85
109
results : Dict = {},
86
110
dimred_technique : str = None ,
87
111
) -> Tuple [Dict [str , Union [str , int ]], Dict [str , int ]]:
112
+ """Perform evaluation using xgboost model.
113
+
114
+ Args:
115
+ X_train: The training data.
116
+ X_test: The testing data.
117
+ y_train: The training labels.
118
+ y_test: The testing labels.
119
+ results: A dictionary to store the results.
120
+ dimred_technique: The dimensionality reduction technique.
121
+
122
+ Returns:
123
+ A tuple containing the results and the metrics.
124
+ """
88
125
model = xgb .XGBClassifier (** self .xgb_config )
89
126
name = dimred_technique + model .__class__ .__name__
90
127
wandb .init (
@@ -102,7 +139,7 @@ def xgb(
102
139
cp .asarray (X_train ),
103
140
cp .asarray (y_train ),
104
141
eval_set = [(cp .asarray (X_test ), cp .asarray (y_test ))],
105
- callbacks = [WandbCallback (log_model = True )],
142
+ # callbacks=[WandbCallback(log_model=True)],
106
143
)
107
144
y_preds = model .predict (X_test )
108
145
metrics = classification_report (y_test , y_preds , output_dict = True )
@@ -122,6 +159,19 @@ def lgb(
122
159
results : Dict = {},
123
160
dimred_technique : str = None ,
124
161
) -> Tuple [Dict [str , Union [str , int ]], Dict [str , int ]]:
162
+ """Perform evaluation using lightgbm model.
163
+
164
+ Args:
165
+ X_train: The training data.
166
+ X_test: The testing data.
167
+ y_train: The training labels.
168
+ y_test: The testing labels.
169
+ results: A dictionary to store the results.
170
+ dimred_technique: The dimensionality reduction technique.
171
+
172
+ Returns:
173
+ A tuple containing the results and the metrics.
174
+ """
125
175
name = dimred_technique + "LGBClf"
126
176
wandb .init (
127
177
project = PROJECT_NAME ,
@@ -139,14 +189,14 @@ def lgb(
139
189
self .lgb_config ,
140
190
train_data ,
141
191
valid_sets = [test_data ],
142
- callbacks = [wandb_callback ()],
192
+ # callbacks=[wandb_callback()],
143
193
)
144
194
y_preds = model .predict (X_test )
145
195
metrics = classification_report (
146
196
y_test , np .argmax (y_preds , axis = 1 ), output_dict = True
147
197
)
148
198
results [name ] = metrics
149
- log_summary (model , save_model_checkpoint = True )
199
+ # log_summary(model, save_model_checkpoint=True)
150
200
wandb .log (metrics )
151
201
wandb .finish ()
152
202
dirs = director_exist (os .path .join (os .getenv ("MODEL_PATH" ), run ))
@@ -155,6 +205,11 @@ def lgb(
155
205
return results , metrics
156
206
157
207
def evaluate (self ) -> Dict [str , Dict [str , Dict [str , Union [str , int ]]]]:
208
+ """Perform evaluation of all pipeline variations.
209
+
210
+ Returns:
211
+ A dictionary containing all pipeline performances and the best performances.
212
+ """
158
213
all_pipeline_performance = {}
159
214
outer_iterator = tqdm (self .all_variations )
160
215
best_performances = {
@@ -168,7 +223,7 @@ def evaluate(self) -> Dict[str, Dict[str, Dict[str, Union[str, int]]]]:
168
223
specific_pipeline_variations = self .all_variations [pipeline_variation_name ]
169
224
inner_iterator = tqdm (specific_pipeline_variations , leave = False )
170
225
for pipeline_variation in inner_iterator :
171
- name_of_pipeline = pipeline_variation .__class__ .__name__
226
+ name_of_pipeline = pipeline_variation .steps [ - 1 ][ - 1 ]. __class__ .__name__
172
227
pipeline_performance = {}
173
228
X_train = pipeline_variation .fit_transform (self ._data ["X_train" ])
174
229
X_test = pipeline_variation .transform (self ._data ["X_test" ])
@@ -207,19 +262,17 @@ def evaluate(self) -> Dict[str, Dict[str, Dict[str, Union[str, int]]]]:
207
262
avg_var = average_metric (
208
263
self .metric , [sklearn_metrics , xgb_metrics , lgb_metrics ]
209
264
)
210
- if avg_var > best_performing_pipeline [0 ]:
211
- best_performing_pipeline [0 ] = avg_var
212
- best_performing_pipeline [1 ] = str (pipeline_variation ).strip
213
- # best_performing_pipeline[2] = pipeline_performance
265
+ if float (avg_var ) > float (best_performing_pipeline [0 ]):
266
+ best_performing_pipeline [0 ] = str (avg_var )
267
+ best_performing_pipeline [1 ] = name_of_pipeline
214
268
inner_iterator .set_description (f"{ name_of_pipeline } Done :)" )
215
- # best_performances[pipeline_variation_name] = best_performing_pipeline
216
269
best_performances = add_to_dictionary (
217
270
best_performances , best_performing_pipeline
218
271
)
219
- with open (f'{ os .getenv ("DATA_PATH" )} /all_performance_data.json' , "w" ) as f :
220
- json .dump (all_pipeline_performance , f )
221
- with open (
222
- f'{ os .getenv ("DATA_PATH" )} /best_performance_dimred.json' , "w"
223
- ) as json_f :
224
- json .dump (best_performances , json_f )
272
+ with open (f'{ os .getenv ("DATA_PATH" )} /all_performance_data.json' , "w" ) as f :
273
+ json .dump (all_pipeline_performance , f )
274
+ with open (
275
+ f'{ os .getenv ("DATA_PATH" )} /best_performance_dimred.json' , "w"
276
+ ) as json_f :
277
+ json .dump (best_performances , json_f )
225
278
return all_pipeline_performance , best_performances
0 commit comments