@@ -117,13 +117,15 @@ def run(args):
117
117
performance_value_at_anchor = row [learning_curve_column ][- 1 ]
118
118
performance_values_new .append (performance_value_at_anchor )
119
119
performance_values_new = np .array (performance_values_new , dtype = float )
120
- frame_workflow_job_task [performance_column ] = pd .Series (performance_values_new )
121
120
122
- id_results [(workflow_ids [0 ], openml_task_ids [0 ], current_anchor_value )].append (frame_workflow_job_task )
121
+ # make a copy
122
+ frame_copy = frame_workflow_job_task .copy (deep = True )
123
+ frame_copy [performance_column ] = pd .Series (performance_values_new )
124
+ id_results [(workflow_ids [0 ], openml_task_ids [0 ], current_anchor_value )].append (frame_copy )
123
125
124
- load_count += 1
125
- if args .max_load and load_count >= args .max_load :
126
- break
126
+ load_count += 1
127
+ if args .max_load and load_count >= args .max_load :
128
+ break
127
129
128
130
task_ids = set ()
129
131
for idx , (workflow_name , task_id , current_anchor_value ) in enumerate (id_results ):
@@ -133,7 +135,11 @@ def run(args):
133
135
relevant_columns = list (workflow_hyperparameter_mapping .values ()) + [performance_column ]
134
136
task_results = task_results [relevant_columns ]
135
137
136
- logging .info ("Starting with task %d anchor %d (%d/%d)" % (task_id , current_anchor_value , idx + 1 , len (id_results )))
138
+ nan_count = task_results [performance_column ].isna ().sum ()
139
+ logging .info ("Starting with task %d anchor %d (%d/%d), shape %s %d nans" % (
140
+ task_id , current_anchor_value , idx + 1 , len (id_results ), task_results .shape , nan_count )
141
+ )
142
+
137
143
fanova_task_results = fanova_on_task (
138
144
task_results , performance_column , current_anchor_value , config_space , args .n_trees
139
145
)
0 commit comments