Skip to content

Commit

Permalink
bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
sundy1994 committed Feb 28, 2025
1 parent 09ccd7d commit 48925b9
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 10 deletions.
2 changes: 1 addition & 1 deletion src/team_comm_tools/feature_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def preprocess_chat_data(self) -> None:

# create the appropriate grouping variables and assert the columns are present
self.chat_data = preprocess_conversation_columns(self.chat_data, self.column_names, self.grouping_keys, self.cumulative_grouping, self.within_task)
# assert_key_columns_present(self.chat_data, self.column_names)
assert_key_columns_present(self.chat_data, self.column_names)
self.chat_data = remove_unhashable_cols(self.chat_data, self.column_names)

# save original column with no preprocessing
Expand Down
9 changes: 0 additions & 9 deletions src/team_comm_tools/utils/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,6 @@ def preprocess_conversation_columns(df: pd.DataFrame, column_names: dict, groupi
else: # case 2: grouping multiple keys, or case 3 but not 3 layers
df['conversation_num'] = df.groupby(grouping_keys).ngroup()
df = df[df.columns.tolist()[-1:] + df.columns.tolist()[0:-1]] # make the new column first
# assert key columns are present
for role, col in column_names.items():
if role == 'timestamp_col':
continue # skip timestamp column
if col not in df.columns:
raise KeyError(f"Missing required columns in DataFrame: '{col}' (expected for {role})\n Columns available: {df.columns}")
else:
print(f"Confirmed that data has {role} column: {col}!")
df[col] = df[col].fillna('')

return df

Expand Down

0 comments on commit 48925b9

Please sign in to comment.