diff --git a/examples/featurize.py b/examples/featurize.py index adfa781a..328c7e7e 100644 --- a/examples/featurize.py +++ b/examples/featurize.py @@ -41,7 +41,6 @@ tiny_juries_feature_builder = FeatureBuilder( input_df = tiny_juries_df, grouping_keys = ["batch_num", "round_num"], - vector_directory = "./vector_data/", output_file_base = "jury_TINY_output", # Naming output files using the output_file_base parameter (recommended) turns = False, custom_features = [ @@ -56,7 +55,6 @@ tiny_multi_task_feature_builder = FeatureBuilder( input_df = tiny_multi_task_df, conversation_id_col = "stageId", - vector_directory = "./vector_data/", # alternatively, you can name each output file separately. NOTE, however, that we don't directly use this path; # we modify the path to place outputs within the `output/chat`, `output/conv`, and `output/user` folders. output_file_path_chat_level = "./multi_task_TINY_output_chat_level_stageId_cumulative.csv", diff --git a/pyproject.toml b/pyproject.toml index e10f2581..9461e0a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "team_comm_tools" -version = "0.1.4.post1" +version = "0.1.4.post2" requires-python = ">= 3.10" dependencies = [ "chardet>=3.0.4", diff --git a/src/team_comm_tools/feature_builder.py b/src/team_comm_tools/feature_builder.py index cb3500d9..bd600064 100644 --- a/src/team_comm_tools/feature_builder.py +++ b/src/team_comm_tools/feature_builder.py @@ -99,14 +99,14 @@ class FeatureBuilder: def __init__( self, input_df: pd.DataFrame, - vector_directory: "./vector_data/", - output_file_base = "output", - output_file_path_chat_level = None, - output_file_path_user_level = None, - output_file_path_conv_level = None, + vector_directory: str = "./vector_data/", + output_file_base: str = "output", + output_file_path_chat_level: str = None, + output_file_path_user_level: str = None, + output_file_path_conv_level: str = None, custom_features: list = [], analyze_first_pct: list = [1.0], - turns: bool=False, + turns: bool = False, conversation_id_col: str = "conversation_num", speaker_id_col: str = "speaker_nickname", message_col: str = "message",