Skip to content

Commit b2ed12a

Browse files
committed
updates to gini dependency
1 parent e3ad8d1 commit b2ed12a

File tree

3 files changed

+72
-46
lines changed

3 files changed

+72
-46
lines changed

examples/featurize.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@
9494
output_file_path_conv_level = "./jury_TINY_output_conversation_level_custom_agg.csv",
9595
convo_methods = ['mean'], # This will aggregate ONLY the "positive_bert" at the conversation level, using mean; it will aggregate ONLY "negative_bert" at the speaker/user level, using max.
9696
convo_columns = ['positive_bert'],
97-
user_methods = ['mean', 'max'],
98-
user_columns = ['positive_bert', 'negative_bert'],
99-
# user_methods = ['max'],
100-
# user_columns = ['negative_bert'],
97+
# user_methods = ['mean', 'max'],
98+
# user_columns = ['positive_bert', 'negative_bert'],
99+
user_methods = ['max'],
100+
user_columns = ['negative_bert'],
101101
turns = False,
102102
)
103103
tiny_juries_feature_builder_custom_aggregation.featurize(col="message")

src/team_comm_tools/feature_builder.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,9 @@ def conv_level_features(self) -> None:
645645
convo_aggregation = self.convo_aggregation,
646646
convo_methods = self.convo_methods,
647647
convo_columns = self.convo_columns,
648-
user_aggregation = self.user_aggregation
648+
user_aggregation = self.user_aggregation,
649+
user_methods = self.user_methods,
650+
user_columns = self.user_columns
649651
)
650652
# Calling the driver inside this class to create the features.
651653
self.conv_data = conv_feature_builder.calculate_conversation_level_features(self.feature_methods_conv)

src/team_comm_tools/utils/calculate_conversation_level_features.py

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ class ConversationLevelFeaturesCalculator:
3535
:type convo_columns: list
3636
:param user_aggregation: If true, will aggregate features at the user level
3737
:type convo_aggregation: bool
38+
:param user_methods: Specifies which functions users want to aggregate with (e.g., mean, std...) at the user level
39+
:type user_methods: list
40+
:param user_columns: Specifies which columns (at the chat level) users want aggregated for the user level
41+
:type user_columns: list
3842
"""
3943
def __init__(self, chat_data: pd.DataFrame,
4044
user_data: pd.DataFrame,
@@ -49,7 +53,9 @@ def __init__(self, chat_data: pd.DataFrame,
4953
convo_aggregation: bool,
5054
convo_methods: list,
5155
convo_columns: list,
52-
user_aggregation: bool
56+
user_aggregation: bool,
57+
user_methods: list,
58+
user_columns: list
5359
) -> None:
5460

5561
# Initializing variables
@@ -227,53 +233,71 @@ def get_user_level_aggregates(self) -> None:
227233
- Minimum of averaged user-level features
228234
- Maximum of averaged user-level features
229235
236+
230237
:return: None
231238
:rtype: None
232239
"""
233240

234241
if self.convo_aggregation == True and self.user_aggregation == True:
235-
236-
# Sum Columns were created using self.get_user_level_summed_features()
237-
for column in self.columns_to_summarize:
238-
239-
if 'mean' in self.convo_methods:
240-
# Average/Mean of User-Level Feature
241-
self.conv_data = pd.merge(
242-
left=self.conv_data,
243-
right=get_average(self.user_data.copy(), "sum_"+column, 'average_user_sum_'+column, self.conversation_id_col),
244-
on=[self.conversation_id_col],
245-
how="inner"
246-
)
247-
248-
if 'std' in self.convo_methods:
249-
# Standard Deviation of User-Level Feature
250-
self.conv_data = pd.merge(
251-
left=self.conv_data,
252-
right=get_stdev(self.user_data.copy(), "sum_"+column, 'stdev_user_sum_'+column, self.conversation_id_col),
253-
on=[self.conversation_id_col],
254-
how="inner"
255-
)
256-
257-
if 'min' in self.convo_methods:
258-
# Minima of User-Level Feature
259-
self.conv_data = pd.merge(
260-
left=self.conv_data,
261-
right=get_min(self.user_data.copy(), "sum_"+column, 'min_user_sum_'+column, self.conversation_id_col),
262-
on=[self.conversation_id_col],
263-
how="inner"
264-
)
242+
243+
# this may be right??
244+
if 'mean' in self.convo_methods:
245+
for user_column in self.user_columns:
246+
for user_method in self.user_methods:
247+
# Average/Mean of User-Level Feature
248+
self.conv_data = pd.merge(
249+
left=self.conv_data,
250+
right=get_average(self.user_data.copy(), user_method + "_" +user_column, 'average_user_' + user_method + "_" +user_column, self.conversation_id_col),
251+
on=[self.conversation_id_col],
252+
how="inner"
253+
)
254+
255+
if 'std' in self.convo_methods:
256+
for user_column in self.user_columns:
257+
for user_method in self.user_methods:
258+
# Standard Deviation of User-Level Feature
259+
self.conv_data = pd.merge(
260+
left=self.conv_data,
261+
right=get_stdev(self.user_data.copy(), user_method + "_" + user_column, 'stdev_user_' + user_method + "_" + user_column, self.conversation_id_col),
262+
on=[self.conversation_id_col],
263+
how="inner"
264+
)
265+
266+
if 'min' in self.convo_methods:
267+
for user_column in self.user_columns:
268+
for user_method in self.user_methods:
269+
# Minima of User-Level Feature
270+
self.conv_data = pd.merge(
271+
left=self.conv_data,
272+
right=get_min(self.user_data.copy(), user_method + "_" + user_column, 'min_user_sum_' + user_method + "_" + user_column, self.conversation_id_col),
273+
on=[self.conversation_id_col],
274+
how="inner"
275+
)
265276

266-
if 'max' in self.convo_methods:
267-
# Maxima of User-Level Feature
268-
self.conv_data = pd.merge(
269-
left=self.conv_data,
270-
right=get_max(self.user_data.copy(), "sum_"+column, 'max_user_sum_'+column, self.conversation_id_col),
271-
on=[self.conversation_id_col],
272-
how="inner"
273-
)
277+
if 'max' in self.convo_methods:
278+
for user_column in self.user_columns:
279+
for user_method in self.user_methods:
280+
# Maxima of User-Level Feature
281+
self.conv_data = pd.merge(
282+
left=self.conv_data,
283+
right=get_max(self.user_data.copy(), user_method + "_" + user_column, 'max_user_sum_' + user_method + "_" + user_column, self.conversation_id_col),
284+
on=[self.conversation_id_col],
285+
how="inner"
286+
)
274287

275-
# # temp checking for error
276-
# for column in self.summable_columns:
288+
289+
# Sum Columns were created using self.get_user_level_summed_features()
290+
# for column in self.columns_to_summarize:
291+
# # change to self.user_columns
292+
# # should be summable_columns
293+
294+
# # for method in self.user_methods:
295+
# # self.conv_data = pd.merge(
296+
# # left=self.conv_data,
297+
# # right=get_average(self.user_data.copy(), method+"_"+column, 'average_user_' + method + "_" +column, self.conversation_id_col),
298+
# # on=[self.conversation_id_col],
299+
# # how="inner"
300+
# # )
277301

278302
# if 'mean' in self.convo_methods:
279303
# # Average/Mean of User-Level Feature

0 commit comments

Comments
 (0)