Skip to content

Commit 7c9c64e

Browse files
committed
+ default behavior for missing speaker metadata
1 parent e1cc169 commit 7c9c64e

File tree

2 files changed

+14
-5
lines changed

2 files changed

+14
-5
lines changed

convokit/model/corpusHelper.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
import os
66
import json
77
from collections import defaultdict
8+
from typing import Dict
89
import pickle
10+
911
from .speaker import Speaker
1012
from .utterance import Utterance
1113
from .conversation import Conversation
12-
from typing import Dict
1314
from .convoKitMeta import ConvoKitMeta
15+
from convokit.util import warn
1416

1517
BIN_DELIM_L, BIN_DELIM_R = "<##bin{", "}&&@**>"
1618
KeyId = "id"
@@ -197,6 +199,10 @@ def initialize_speakers_and_utterances_objects(corpus, utt_dict, utterances, spe
197199
u = defaultdict(lambda: None, u)
198200
speaker_key = u[KeySpeaker]
199201
if speaker_key not in speakers_dict:
202+
if u[KeySpeaker] not in speakers_data:
203+
warn("CorpusLoadWarning: Missing speaker metadata for speaker ID: {}. "
204+
"Initializing default empty metadata instead.".format(u[KeySpeaker]))
205+
speakers_data[u[KeySpeaker]] = {}
200206
if KeyMeta in speakers_data[u[KeySpeaker]]:
201207
speakers_dict[speaker_key] = Speaker(owner=corpus, id=u[KeySpeaker],
202208
meta=speakers_data[u[KeySpeaker]][KeyMeta])

convokit/tests/general/test_corpora_load_and_dump.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,24 @@ class CorpusLoadAndDump(unittest.TestCase):
88
Load a variety of existing (small) corpora to verify that there are no backward compatibility issues
99
"""
1010

11-
def test_load_subreddit(self):
11+
def test_load_dump_subreddit(self):
1212
corpus = Corpus(download('subreddit-hey'))
1313
corpus.dump('subreddit')
1414

15-
def test_load_tennis(self):
15+
def test_load_dump_tennis(self):
1616
corpus = Corpus(download('tennis-corpus'))
1717
corpus.dump('tennis-corpus')
1818

19-
def test_load_politeness(self):
19+
def test_load_dump_politeness(self):
2020
corpus = Corpus(download('wikipedia-politeness-corpus'))
2121
corpus.dump('wikipedia-politeness-corpus')
2222

23-
def test_load_switchboard(self):
23+
def test_load_dump_switchboard(self):
2424
corpus = Corpus(download("switchboard-corpus"))
2525
corpus.dump('switchboard-corpus')
2626

27+
def test_load_wikiconv(self):
28+
corpus = Corpus(download('wikiconv-2004'))
29+
2730
if __name__ == '__main__':
2831
unittest.main()

0 commit comments

Comments
 (0)