Skip to content

Commit ef0ab17

Browse files
committed
much better run through
1 parent c0f857e commit ef0ab17

File tree

2 files changed

+1660
-676
lines changed

2 files changed

+1660
-676
lines changed

auto_ph.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def gbm_forward_select_train(orig_x_names, y_name, train, valid, seed_, next_lis
165165
# init loop var
166166
selected = orig_x_names
167167

168-
for j, name in enumerate(next_list):
168+
for j in range(0, len(next_list) + 1):
169169

170170
# init or clear local dict of monotone constraints
171171
mc = None
@@ -190,6 +190,10 @@ def gbm_forward_select_train(orig_x_names, y_name, train, valid, seed_, next_lis
190190
hvalid = h2o.H2OFrame(valid[selected + [y_name]])
191191

192192
# train model and calculate Shapley values
193+
print('Starting grid search %i/%i ...' % (j + 1, len(next_list)+1))
194+
print('Input features =', selected)
195+
if mc is not None:
196+
print('Monotone constraints =', mc)
193197
model_list.append(gbm_grid(selected, y_name, htrain, hvalid, seed_,
194198
monotone_constraints_=mc, hyper_params_=hyper_params_,
195199
search_criteria_=search_criteria_))
@@ -203,11 +207,13 @@ def gbm_forward_select_train(orig_x_names, y_name, train, valid, seed_, next_lis
203207

204208
# retrieve AUC and update progress
205209
auc_ = model_list[j].auc(valid=True)
206-
print('Completed grid search %i/%i with AUC: %.2f ...' % (j + 1, len(next_list), auc_))
210+
print('Completed grid search %i/%i with AUC: %.2f ...' % (j + 1, len(next_list)+1, auc_))
211+
print('--------------------------------------------------------------------------------')
207212

208213
# add the next most y-correlated feature
209214
# for the next modeling iteration
210-
selected = selected + [next_list[j]]
215+
if j < len(next_list):
216+
selected = selected + [next_list[j]]
211217

212218
print('Done.')
213219

@@ -283,7 +289,8 @@ def cv_model_rank(valid, seed_, model_name_list, nfolds=5):
283289
# dynamically generate and run code statements
284290
# to calculate metrics for each fold and model
285291
for model in sorted(model_name_list):
286-
code = 'h2o.get_model("%s").model_performance(h2o.H2OFrame(temp_df[temp_df["fold"] == %d])).%s()' % (model, fold, metric)
292+
code = 'h2o.get_model("%s").model_performance(h2o.H2OFrame(temp_df[temp_df["fold"] == %d])).%s()' \
293+
% (model, fold, metric)
287294
key_ = model + ' Value'
288295
val_ = eval(code)
289296

@@ -343,6 +350,7 @@ def cv_model_rank_select(valid, seed_, coef_list, model_list, model_prefix,
343350

344351
best_idx = 0
345352
rank = len(compare_model_ids) + 1
353+
best_model_frame = None
346354

347355
for i in range(0, len(model_list)):
348356

@@ -453,9 +461,7 @@ def get_percentile_dict(yhat_name, valid, id_):
453461
sort_df.reset_index(inplace=True)
454462

455463
# find top and bottom percentiles
456-
percentiles_dict = {}
457-
percentiles_dict[0] = sort_df.loc[0, id_]
458-
percentiles_dict[99] = sort_df.loc[sort_df.shape[0] - 1, id_]
464+
percentiles_dict = {0: sort_df.loc[0, id_], 99: sort_df.loc[sort_df.shape[0] - 1, id_]}
459465

460466
# find 10th-90th percentiles
461467
inc = sort_df.shape[0] // 10
@@ -498,9 +504,9 @@ def plot_pd_ice(x_name, par_dep_frame, ax=None):
498504
else:
499505

500506
# plot ICE
501-
par_dep_frame.plot(x=x_name,
502-
colormap='gnuplot',
503-
ax=ax)
507+
par_dep_frame.drop('partial_dependence', axis=1).plot(x=x_name,
508+
colormap='gnuplot',
509+
ax=ax)
504510

505511
# overlay partial dependence, annotate plot
506512
par_dep_frame.plot(title='Partial Dependence with ICE: ' + x_name,

0 commit comments

Comments
 (0)