Skip to content

Commit d8291db

Browse files
committed
updates to message_original error
1 parent 5b2b9f0 commit d8291db

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

src/team_comm_tools/utils/check_embeddings.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,14 @@ def check_embeddings(chat_data: pd.DataFrame, vect_path: str, bert_path: str, or
7272
# check that message in vector data matches chat data
7373
preprocessed_chat = chat_data[message_col].astype(str).apply(preprocess_text)
7474

75-
# preprocess vector data
75+
# preprocess vector data, remove _original if message_col contains to preprocess the text
76+
if '_original' in message_col:
77+
message_col = message_col.replace('_original', '')
78+
79+
print(message_col, message_col[:-9])
7680
preprocessed_vector = vector_df[message_col].astype(str).apply(preprocess_text)
7781

82+
7883
mismatches = chat_data[preprocessed_chat != preprocessed_vector]
7984
if len(mismatches) != 0:
8085
print("Messages in the vector data do not match the chat data. Regenerating...")

0 commit comments

Comments
 (0)