Skip to content

Commit 8a352ec

Browse files
committed
reverse preprocess_text_lowercase_but_retain_punctuation
1 parent 2bf6896 commit 8a352ec

File tree

2 files changed

+13
-13
lines changed

2 files changed

+13
-13
lines changed

src/team_comm_tools/feature_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,7 @@ def preprocess_chat_data(self) -> None:
571571
self.chat_data[self.message_col + "_original"] = self.chat_data[self.message_col]
572572

573573
# create new column that retains punctuation
574-
self.chat_data["message_lower_with_punc"] = self.chat_data[self.message_col].astype(str).apply(lambda x: x.lower())
574+
self.chat_data["message_lower_with_punc"] = self.chat_data[self.message_col].astype(str).apply(preprocess_text_lowercase_but_retain_punctuation)
575575

576576
# Preprocessing the text in `message_col` and then overwriting the column `message_col`.
577577
# TODO: We should probably use classes to abstract preprocessing module as well?

src/team_comm_tools/utils/preprocess.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -119,18 +119,18 @@ def remove_unhashable_cols(df: pd.DataFrame, column_names: dict) -> pd.DataFrame
119119
df = df.drop(columns=removable_cols)
120120
return df
121121

122-
# def preprocess_text_lowercase_but_retain_punctuation(text): #NOTE: This function is deprecated
123-
# """Convert the input text to lowercase while retaining punctuation.
124-
125-
# This function takes a string and converts all characters to lowercase,
126-
# keeping any punctuation marks intact.
127-
128-
# :param text: The input text to process.
129-
# :type text: str
130-
# :return: The processed text with all characters in lowercase.
131-
# :rtype: str
132-
# """
133-
# return(text.lower())
122+
def preprocess_text_lowercase_but_retain_punctuation(text):
123+
"""Convert the input text to lowercase while retaining punctuation.
124+
125+
This function takes a string and converts all characters to lowercase,
126+
keeping any punctuation marks intact.
127+
128+
:param text: The input text to process.
129+
:type text: str
130+
:return: The processed text with all characters in lowercase.
131+
:rtype: str
132+
"""
133+
return(text.lower())
134134

135135
def preprocess_text(text: str) -> str:
136136
"""Preprocess text by removing non-alphanumeric characters and converting to lowercase.

0 commit comments

Comments
 (0)