Skip to content

Commit 674b31d

Browse files
committed
adjust function order
1 parent 253286c commit 674b31d

File tree

1 file changed

+2
-10
lines changed

1 file changed

+2
-10
lines changed

src/team_comm_tools/utils/preprocess.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@ def assert_key_columns_present(df: pd.DataFrame, column_names: dict) -> None:
2121
# message_col = column_names['message_col']
2222

2323
# remove all special characters from df
24-
# df.columns = df.columns.str.replace('[^A-Za-z0-9_]', '', regex=True) # NOTE: This is moved to preprocess_conversation_columns
25-
24+
df.columns = df.columns.str.replace('[^A-Za-z0-9_]', '', regex=True)
2625
# Assert that key columns are present
2726
for role, col in column_names.items():
2827
if role == 'timestamp_col':
@@ -31,6 +30,7 @@ def assert_key_columns_present(df: pd.DataFrame, column_names: dict) -> None:
3130
raise KeyError(f"Missing required columns in DataFrame: '{col}' (expected for {role})\n Columns available: {df.columns}")
3231
else:
3332
print(f"Confirmed that data has {role} column: {col}!")
33+
df[col] = df[col].fillna('')
3434

3535
# if {conversation_id_col, speaker_id_col, message_col}.issubset(df.columns):
3636
# print(f"Confirmed that data has conversation_id: {conversation_id_col}, speaker_id: {speaker_id_col} and message: {message_col} columns!")
@@ -66,14 +66,6 @@ def preprocess_conversation_columns(df: pd.DataFrame, column_names: dict, groupi
6666
:return: The preprocessed DataFrame with a conversation number column.
6767
:rtype: pd.DataFrame
6868
"""
69-
70-
# remove all special characters from df
71-
df.columns = df.columns.str.replace('[^A-Za-z0-9_]', '', regex=True)
72-
# fillna
73-
for role, col in column_names.items():
74-
if role == 'timestamp_col':
75-
continue
76-
df[col] = df[col].fillna('')
7769

7870
if not grouping_keys: # case 1: single identifier
7971
return df

0 commit comments

Comments
 (0)