diff --git a/utils/simple_impute.py b/utils/simple_impute.py index 2e9d729..c72f521 100644 --- a/utils/simple_impute.py +++ b/utils/simple_impute.py @@ -18,12 +18,17 @@ def simple_imputer(df,train_subj): df_out.rename(columns={'count': 'mask'}, level='Aggregation Function', inplace=True) is_absent = (1 - df_out.loc[:, idx[:, 'mask']]) - hours_of_absence = is_absent.cumsum() - time_since_measured = hours_of_absence - hours_of_absence[is_absent==0].fillna(method='ffill') +# hours_of_absence = is_absent.cumsum() +# time_since_measured = hours_of_absence - hours_of_absence[is_absent==0].fillna(method='ffill') + + hours_of_absence = is_absent.groupby(ID_COLS).cumsum() + hours_of_absence.columns.set_names(['LEVEL2', 'Aggregation Function'], inplace=True) + time_since_measured = hours_of_absence - hours_of_absence[is_absent==0].groupby(ID_COLS).fillna(method='ffill') #.fillna(100) + time_since_measured.rename(columns={'mask': 'time_since_measured'}, level='Aggregation Function', inplace=True) df_out = pd.concat((df_out, time_since_measured), axis=1) df_out.loc[:, idx[:, 'time_since_measured']] = df_out.loc[:, idx[:, 'time_since_measured']].fillna(100) df_out.sort_index(axis=1, inplace=True) - return df_out \ No newline at end of file + return df_out