Skip to content

Commit

Permalink
reverse preprocess_text_lowercase_but_retain_punctuation
Browse files Browse the repository at this point in the history
  • Loading branch information
sundy1994 committed Feb 28, 2025
1 parent 2bf6896 commit 8a352ec
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
2 changes: 1 addition & 1 deletion src/team_comm_tools/feature_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ def preprocess_chat_data(self) -> None:
self.chat_data[self.message_col + "_original"] = self.chat_data[self.message_col]

# create new column that retains punctuation
self.chat_data["message_lower_with_punc"] = self.chat_data[self.message_col].astype(str).apply(lambda x: x.lower())
self.chat_data["message_lower_with_punc"] = self.chat_data[self.message_col].astype(str).apply(preprocess_text_lowercase_but_retain_punctuation)

# Preprocessing the text in `message_col` and then overwriting the column `message_col`.
# TODO: We should probably use classes to abstract preprocessing module as well?
Expand Down
24 changes: 12 additions & 12 deletions src/team_comm_tools/utils/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,18 +119,18 @@ def remove_unhashable_cols(df: pd.DataFrame, column_names: dict) -> pd.DataFrame
df = df.drop(columns=removable_cols)
return df

# def preprocess_text_lowercase_but_retain_punctuation(text): #NOTE: This function is deprecated
# """Convert the input text to lowercase while retaining punctuation.

# This function takes a string and converts all characters to lowercase,
# keeping any punctuation marks intact.

# :param text: The input text to process.
# :type text: str
# :return: The processed text with all characters in lowercase.
# :rtype: str
# """
# return(text.lower())
def preprocess_text_lowercase_but_retain_punctuation(text):
"""Convert the input text to lowercase while retaining punctuation.
This function takes a string and converts all characters to lowercase,
keeping any punctuation marks intact.
:param text: The input text to process.
:type text: str
:return: The processed text with all characters in lowercase.
:rtype: str
"""
return(text.lower())

def preprocess_text(text: str) -> str:
"""Preprocess text by removing non-alphanumeric characters and converting to lowercase.
Expand Down

0 comments on commit 8a352ec

Please sign in to comment.