Skip to content

Commit 010c844

Browse files
committed
implemented view() functions
1 parent 22d69b9 commit 010c844

File tree

1 file changed

+71
-1
lines changed

1 file changed

+71
-1
lines changed

convokit/model/corpus.py

+71-1
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,32 @@ def iter_utterances(self, selector: Optional[Callable[[Utterance], bool]] = lamb
321321
if selector(v):
322322
yield v
323323

324+
def view_utterances(self, selector: Optional[Callable[[Utterance], bool]] = lambda utt: True,
325+
exclude_meta: bool = False):
326+
"""
327+
View utterances in the Corpus, with an optional selector that filters for Utterances that should be included
328+
Returns a dataframe of the utterances with data and metadata values
329+
:param exclude_meta:
330+
:param selector:
331+
:return:
332+
"""
333+
ds = dict()
334+
for utt in self.iter_utterances(selector):
335+
d = utt.__dict__.copy()
336+
if not exclude_meta:
337+
for k, v in d['meta'].items():
338+
d['meta.'+k] = v
339+
del d['meta']
340+
ds[utt.id] = d
341+
342+
df = pd.DataFrame(ds).T
343+
df['id'] = df['_id']
344+
df = df.set_index('id')
345+
df = df.drop(['_id', '_owner', 'obj_type', 'user', '_root'], axis=1)
346+
df['speaker'] = df['speaker'].map(lambda spkr: spkr.id)
347+
meta_columns = [k for k in df.columns if k.startswith('meta.')]
348+
return df[['timestamp', 'text', 'speaker', 'reply_to', 'conversation_id'] + meta_columns]
349+
324350
def iter_conversations(self, selector: Optional[Callable[[Conversation], bool]] = lambda convo: True) -> Generator[
325351
Conversation, None, None]:
326352
"""
@@ -334,6 +360,28 @@ def iter_conversations(self, selector: Optional[Callable[[Conversation], bool]]
334360
if selector(v):
335361
yield v
336362

363+
def view_conversations(self, selector: Optional[Callable[[Conversation], bool]] = lambda utt: True,
364+
exclude_meta: bool = False):
365+
"""
366+
View conversations in the Corpus, with an optional selector that filters for Conversations that should be included
367+
Returns a dataframe of the conversations with data and metadata values
368+
:param selector:
369+
:return:
370+
"""
371+
ds = dict()
372+
for convo in self.iter_conversations(selector):
373+
d = convo.__dict__.copy()
374+
if not exclude_meta:
375+
for k, v in d['meta'].items():
376+
d['meta.'+k] = v
377+
del d['meta']
378+
ds[convo.id] = d
379+
380+
df = pd.DataFrame(ds).T
381+
df['id'] = df['_id']
382+
df = df.set_index('id')
383+
return df.drop(['_owner', 'obj_type', '_utterance_ids', '_speaker_ids', 'tree', '_id'], axis=1)
384+
337385
def iter_speakers(self, selector: Optional[Callable[[Speaker], bool]] = lambda speaker: True) -> Generator[Speaker, None, None]:
338386
"""
339387
Get Speakers in the Corpus, with an optional selector that filters for Conversations that should be included
@@ -347,6 +395,28 @@ def iter_speakers(self, selector: Optional[Callable[[Speaker], bool]] = lambda s
347395
if selector(speaker):
348396
yield speaker
349397

398+
def view_speakers(self, selector: Optional[Callable[[Speaker], bool]] = lambda utt: True,
399+
exclude_meta: bool = False):
400+
"""
401+
View speakers in the Corpus, with an optional selector that filters for Speakers that should be included
402+
Returns a dataframe of the Speakers with data and metadata values
403+
:param selector:
404+
:return:
405+
"""
406+
ds = dict()
407+
for spkr in self.iter_speakers(selector):
408+
d = spkr.__dict__.copy()
409+
if not exclude_meta:
410+
for k, v in d['meta'].items():
411+
d['meta.'+k] = v
412+
del d['meta']
413+
ds[spkr.id] = d
414+
415+
df = pd.DataFrame(ds).T
416+
df['id'] = df['_id']
417+
df = df.set_index('id')
418+
return df.drop(['_owner', 'obj_type', 'utterances', 'conversations', '_id'], axis=1)
419+
350420
def iter_users(self, selector=lambda speaker: True):
351421
deprecation("iter_users()", "iter_speakers()")
352422
return self.iter_speakers(selector)
@@ -455,7 +525,7 @@ def reindex_conversations(self, new_convo_roots: List[str], preserve_corpus_meta
455525
456526
:param new_convo_roots: List of utterance ids to use as conversation ids
457527
:param preserve_corpus_meta: set as True to copy original Corpus metadata to new Corpus
458-
:param preserve_convo_meta: set as True to copy original Conversation metadata to new Conversation metadata
528+
:param preserve_convo_meta: set as True to copy original Conversation metadata to new Conversation metadata
459529
(For each new conversation, use the metadata of the conversation that the utterance belonged to.)
460530
:param verbose: whether to print a warning when
461531
:return: new Corpus with reindexed Conversations

0 commit comments

Comments
 (0)