@@ -321,6 +321,32 @@ def iter_utterances(self, selector: Optional[Callable[[Utterance], bool]] = lamb
321
321
if selector (v ):
322
322
yield v
323
323
324
+ def view_utterances (self , selector : Optional [Callable [[Utterance ], bool ]] = lambda utt : True ,
325
+ exclude_meta : bool = False ):
326
+ """
327
+ View utterances in the Corpus, with an optional selector that filters for Utterances that should be included
328
+ Returns a dataframe of the utterances with data and metadata values
329
+ :param exclude_meta:
330
+ :param selector:
331
+ :return:
332
+ """
333
+ ds = dict ()
334
+ for utt in self .iter_utterances (selector ):
335
+ d = utt .__dict__ .copy ()
336
+ if not exclude_meta :
337
+ for k , v in d ['meta' ].items ():
338
+ d ['meta.' + k ] = v
339
+ del d ['meta' ]
340
+ ds [utt .id ] = d
341
+
342
+ df = pd .DataFrame (ds ).T
343
+ df ['id' ] = df ['_id' ]
344
+ df = df .set_index ('id' )
345
+ df = df .drop (['_id' , '_owner' , 'obj_type' , 'user' , '_root' ], axis = 1 )
346
+ df ['speaker' ] = df ['speaker' ].map (lambda spkr : spkr .id )
347
+ meta_columns = [k for k in df .columns if k .startswith ('meta.' )]
348
+ return df [['timestamp' , 'text' , 'speaker' , 'reply_to' , 'conversation_id' ] + meta_columns ]
349
+
324
350
def iter_conversations (self , selector : Optional [Callable [[Conversation ], bool ]] = lambda convo : True ) -> Generator [
325
351
Conversation , None , None ]:
326
352
"""
@@ -334,6 +360,28 @@ def iter_conversations(self, selector: Optional[Callable[[Conversation], bool]]
334
360
if selector (v ):
335
361
yield v
336
362
363
+ def view_conversations (self , selector : Optional [Callable [[Conversation ], bool ]] = lambda utt : True ,
364
+ exclude_meta : bool = False ):
365
+ """
366
+ View conversations in the Corpus, with an optional selector that filters for Conversations that should be included
367
+ Returns a dataframe of the conversations with data and metadata values
368
+ :param selector:
369
+ :return:
370
+ """
371
+ ds = dict ()
372
+ for convo in self .iter_conversations (selector ):
373
+ d = convo .__dict__ .copy ()
374
+ if not exclude_meta :
375
+ for k , v in d ['meta' ].items ():
376
+ d ['meta.' + k ] = v
377
+ del d ['meta' ]
378
+ ds [convo .id ] = d
379
+
380
+ df = pd .DataFrame (ds ).T
381
+ df ['id' ] = df ['_id' ]
382
+ df = df .set_index ('id' )
383
+ return df .drop (['_owner' , 'obj_type' , '_utterance_ids' , '_speaker_ids' , 'tree' , '_id' ], axis = 1 )
384
+
337
385
def iter_speakers (self , selector : Optional [Callable [[Speaker ], bool ]] = lambda speaker : True ) -> Generator [Speaker , None , None ]:
338
386
"""
339
387
Get Speakers in the Corpus, with an optional selector that filters for Conversations that should be included
@@ -347,6 +395,28 @@ def iter_speakers(self, selector: Optional[Callable[[Speaker], bool]] = lambda s
347
395
if selector (speaker ):
348
396
yield speaker
349
397
398
+ def view_speakers (self , selector : Optional [Callable [[Speaker ], bool ]] = lambda utt : True ,
399
+ exclude_meta : bool = False ):
400
+ """
401
+ View speakers in the Corpus, with an optional selector that filters for Speakers that should be included
402
+ Returns a dataframe of the Speakers with data and metadata values
403
+ :param selector:
404
+ :return:
405
+ """
406
+ ds = dict ()
407
+ for spkr in self .iter_speakers (selector ):
408
+ d = spkr .__dict__ .copy ()
409
+ if not exclude_meta :
410
+ for k , v in d ['meta' ].items ():
411
+ d ['meta.' + k ] = v
412
+ del d ['meta' ]
413
+ ds [spkr .id ] = d
414
+
415
+ df = pd .DataFrame (ds ).T
416
+ df ['id' ] = df ['_id' ]
417
+ df = df .set_index ('id' )
418
+ return df .drop (['_owner' , 'obj_type' , 'utterances' , 'conversations' , '_id' ], axis = 1 )
419
+
350
420
def iter_users (self , selector = lambda speaker : True ):
351
421
deprecation ("iter_users()" , "iter_speakers()" )
352
422
return self .iter_speakers (selector )
@@ -455,7 +525,7 @@ def reindex_conversations(self, new_convo_roots: List[str], preserve_corpus_meta
455
525
456
526
:param new_convo_roots: List of utterance ids to use as conversation ids
457
527
:param preserve_corpus_meta: set as True to copy original Corpus metadata to new Corpus
458
- :param preserve_convo_meta: set as True to copy original Conversation metadata to new Conversation metadata
528
+ :param preserve_convo_meta: set as True to copy original Conversation metadata to new Conversation metadata
459
529
(For each new conversation, use the metadata of the conversation that the utterance belonged to.)
460
530
:param verbose: whether to print a warning when
461
531
:return: new Corpus with reindexed Conversations
0 commit comments