@@ -35,6 +35,10 @@ class ConversationLevelFeaturesCalculator:
35
35
:type convo_columns: list
36
36
:param user_aggregation: If true, will aggregate features at the user level
37
37
:type convo_aggregation: bool
38
+ :param user_methods: Specifies which functions users want to aggregate with (e.g., mean, std...) at the user level
39
+ :type user_methods: list
40
+ :param user_columns: Specifies which columns (at the chat level) users want aggregated for the user level
41
+ :type user_columns: list
38
42
"""
39
43
def __init__ (self , chat_data : pd .DataFrame ,
40
44
user_data : pd .DataFrame ,
@@ -49,7 +53,9 @@ def __init__(self, chat_data: pd.DataFrame,
49
53
convo_aggregation : bool ,
50
54
convo_methods : list ,
51
55
convo_columns : list ,
52
- user_aggregation : bool
56
+ user_aggregation : bool ,
57
+ user_methods : list ,
58
+ user_columns : list
53
59
) -> None :
54
60
55
61
# Initializing variables
@@ -227,53 +233,71 @@ def get_user_level_aggregates(self) -> None:
227
233
- Minimum of averaged user-level features
228
234
- Maximum of averaged user-level features
229
235
236
+
230
237
:return: None
231
238
:rtype: None
232
239
"""
233
240
234
241
if self .convo_aggregation == True and self .user_aggregation == True :
235
-
236
- # Sum Columns were created using self.get_user_level_summed_features()
237
- for column in self .columns_to_summarize :
238
-
239
- if 'mean' in self .convo_methods :
240
- # Average/Mean of User-Level Feature
241
- self .conv_data = pd .merge (
242
- left = self .conv_data ,
243
- right = get_average (self .user_data .copy (), "sum_" + column , 'average_user_sum_' + column , self .conversation_id_col ),
244
- on = [self .conversation_id_col ],
245
- how = "inner"
246
- )
247
-
248
- if 'std' in self .convo_methods :
249
- # Standard Deviation of User-Level Feature
250
- self .conv_data = pd .merge (
251
- left = self .conv_data ,
252
- right = get_stdev (self .user_data .copy (), "sum_" + column , 'stdev_user_sum_' + column , self .conversation_id_col ),
253
- on = [self .conversation_id_col ],
254
- how = "inner"
255
- )
256
-
257
- if 'min' in self .convo_methods :
258
- # Minima of User-Level Feature
259
- self .conv_data = pd .merge (
260
- left = self .conv_data ,
261
- right = get_min (self .user_data .copy (), "sum_" + column , 'min_user_sum_' + column , self .conversation_id_col ),
262
- on = [self .conversation_id_col ],
263
- how = "inner"
264
- )
242
+
243
+ # this may be right??
244
+ if 'mean' in self .convo_methods :
245
+ for user_column in self .user_columns :
246
+ for user_method in self .user_methods :
247
+ # Average/Mean of User-Level Feature
248
+ self .conv_data = pd .merge (
249
+ left = self .conv_data ,
250
+ right = get_average (self .user_data .copy (), user_method + "_" + user_column , 'average_user_' + user_method + "_" + user_column , self .conversation_id_col ),
251
+ on = [self .conversation_id_col ],
252
+ how = "inner"
253
+ )
254
+
255
+ if 'std' in self .convo_methods :
256
+ for user_column in self .user_columns :
257
+ for user_method in self .user_methods :
258
+ # Standard Deviation of User-Level Feature
259
+ self .conv_data = pd .merge (
260
+ left = self .conv_data ,
261
+ right = get_stdev (self .user_data .copy (), user_method + "_" + user_column , 'stdev_user_' + user_method + "_" + user_column , self .conversation_id_col ),
262
+ on = [self .conversation_id_col ],
263
+ how = "inner"
264
+ )
265
+
266
+ if 'min' in self .convo_methods :
267
+ for user_column in self .user_columns :
268
+ for user_method in self .user_methods :
269
+ # Minima of User-Level Feature
270
+ self .conv_data = pd .merge (
271
+ left = self .conv_data ,
272
+ right = get_min (self .user_data .copy (), user_method + "_" + user_column , 'min_user_sum_' + user_method + "_" + user_column , self .conversation_id_col ),
273
+ on = [self .conversation_id_col ],
274
+ how = "inner"
275
+ )
265
276
266
- if 'max' in self .convo_methods :
267
- # Maxima of User-Level Feature
268
- self .conv_data = pd .merge (
269
- left = self .conv_data ,
270
- right = get_max (self .user_data .copy (), "sum_" + column , 'max_user_sum_' + column , self .conversation_id_col ),
271
- on = [self .conversation_id_col ],
272
- how = "inner"
273
- )
277
+ if 'max' in self .convo_methods :
278
+ for user_column in self .user_columns :
279
+ for user_method in self .user_methods :
280
+ # Maxima of User-Level Feature
281
+ self .conv_data = pd .merge (
282
+ left = self .conv_data ,
283
+ right = get_max (self .user_data .copy (), user_method + "_" + user_column , 'max_user_sum_' + user_method + "_" + user_column , self .conversation_id_col ),
284
+ on = [self .conversation_id_col ],
285
+ how = "inner"
286
+ )
274
287
275
- # # temp checking for error
276
- # for column in self.summable_columns:
288
+
289
+ # Sum Columns were created using self.get_user_level_summed_features()
290
+ # for column in self.columns_to_summarize:
291
+ # # change to self.user_columns
292
+ # # should be summable_columns
293
+
294
+ # # for method in self.user_methods:
295
+ # # self.conv_data = pd.merge(
296
+ # # left=self.conv_data,
297
+ # # right=get_average(self.user_data.copy(), method+"_"+column, 'average_user_' + method + "_" +column, self.conversation_id_col),
298
+ # # on=[self.conversation_id_col],
299
+ # # how="inner"
300
+ # # )
277
301
278
302
# if 'mean' in self.convo_methods:
279
303
# # Average/Mean of User-Level Feature
0 commit comments