Skip to content

Commit 38334b6

Browse files
add compatibility for newer spacy versions and further 1.0.0 updates
1 parent 86efba1 commit 38334b6

File tree

2 files changed

+24
-18
lines changed

2 files changed

+24
-18
lines changed

displacy/__init__.py

+14-8
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,19 @@ def entity_dict(mmif, view, document_id, app_root):
3030
displacy_dict['ents'] = []
3131
for ann in view['annotations']:
3232
if ann.at_type == Uri.NE:
33-
displacy_dict['ents'].append(entity(ann))
33+
displacy_dict['ents'].append(entity(view, ann))
3434
return displacy_dict
3535

3636

3737
def get_text_documents(mmif):
3838
"""Return a dictionary indexed on document identifiers (with the view identifier
3939
if needed) with text documents as the values."""
40-
tds = [d for d in mmif.documents if str(d.at_type).endswith('TextDocument')]
40+
tds = [d for d in mmif.documents if "TextDocument" in str(d.at_type)]
4141
tds = {td.id:td for td in tds}
4242
for view in mmif.views:
4343
# TODO: add check for TextDocument in metadata.contains (saves time)
4444
for annotation in view.annotations:
45-
if str(annotation.at_type).endswith('TextDocument'):
45+
if "TextDocument" in str(annotation.at_type):
4646
tds["%s:%s" % (view.id, annotation.id)] = annotation
4747
return tds
4848

@@ -74,7 +74,7 @@ def mmif_to_dict(mmif: Mmif):
7474
# to a TextDocument in the views or a set of TextDocuments in the views.
7575
transcript_location = None
7676
for document in mmif.documents:
77-
if document.at_type.endswith('TextDocument'):
77+
if "TextDocument" in document.at_type:
7878
transcript_location = document.location
7979
transcript_location = transcript_location
8080
displacy_dict = {}
@@ -85,13 +85,19 @@ def mmif_to_dict(mmif: Mmif):
8585
displacy_dict['ents'] = []
8686
for ann in ne_view['annotations']:
8787
if ann.at_type == Uri.NE:
88-
displacy_dict['ents'].append(entity(ann))
88+
displacy_dict['ents'].append(entity(ne_view, ann))
8989
return displacy_dict
9090

9191

92-
def entity(annotation: Annotation):
93-
return {'start': annotation.properties['start'],
94-
'end': annotation.properties['end'],
92+
def entity(view: View, annotation: Annotation):
93+
if "targets" in annotation.properties:
94+
start = min([view.annotations[target].properties["start"] for target in annotation.properties["targets"]])
95+
end = max([view.annotations[target].properties["end"] for target in annotation.properties["targets"]])
96+
else:
97+
start = annotation.properties['start']
98+
end = annotation.properties['end']
99+
return {'start': start,
100+
'end': end,
95101
'label': annotation.properties['category']}
96102

97103

utils.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ def get_alignments(alignment_view):
3131
annotations = alignment_view.annotations
3232
# TODO: wanted to use "mmif.get_alignments(AnnotationTypes.TimeFrame, Uri.TOKEN)"
3333
# but that gave errors so I gave up on it
34-
token_idx = {a.id:a for a in annotations if str(a.at_type).endswith('Token')}
35-
timeframe_idx = {a.id:a for a in annotations if str(a.at_type).endswith('TimeFrame')}
36-
alignments = [a for a in annotations if str(a.at_type).endswith('Alignment')]
34+
token_idx = {a.id:a for a in annotations if "Token" in str(a.at_type)}
35+
timeframe_idx = {a.id:a for a in annotations if "TimeFrame" in str(a.at_type)}
36+
alignments = [a for a in annotations if "Alignment" in str(a.at_type)]
3737
vtt_start = None
3838
texts = []
3939
for alignment in alignments:
@@ -212,7 +212,7 @@ def get_document_ids(view, annotation_type):
212212
metadata = view.metadata.contains.get(annotation_type)
213213
ids = set([metadata['document']]) if 'document' in metadata else set()
214214
for annotation in view.annotations:
215-
if str(annotation.at_type).endswith(str(annotation_type)):
215+
if str(annotation_type) in str(annotation.at_type):
216216
try:
217217
ids.add(annotation.properties["document"])
218218
except KeyError:
@@ -298,15 +298,15 @@ def get_aligned_views(mmif):
298298
"""Return list of properly aligned views (for tree display)"""
299299
aligned_views = []
300300
for view in mmif.views:
301-
if any([str(at_type).endswith('Alignment') for at_type in view.metadata.contains]):
301+
if any(["Alignment" in str(at_type) for at_type in view.metadata.contains]):
302302
if check_view_alignment(view.annotations) == True:
303303
aligned_views.append(view.id)
304304
return aligned_views
305305

306306
def check_view_alignment(annotations):
307307
anno_stack = []
308308
for annotation in annotations:
309-
if str(annotation.at_type).endswith('Alignment'):
309+
if "Alignment" in str(annotation.at_type):
310310
anno_stack.insert(0, annotation.properties)
311311
else:
312312
anno_stack.append(annotation.id)
@@ -332,7 +332,7 @@ def create_ner_visualization(mmif, view):
332332
# all the view's named entities refer to the same text document (kaldi)
333333
document_ids = get_document_ids(view, Uri.NE)
334334
return displacy.visualize_ner(mmif, view, document_ids[0], app.root_path)
335-
except KeyError:
335+
except KeyError as e:
336336
# the view's entities refer to more than one text document (tessearct)
337337
pass
338338
def get_status(view):
@@ -357,17 +357,17 @@ def prepare_ocr_visualization(mmif, view):
357357
frames, text_docs, alignments = {}, {}, {}
358358
for anno in view.annotations:
359359
try:
360-
if str(anno.at_type).endswith('BoundingBox'):
360+
if "BoundingBox" in str(anno.at_type):
361361
frames = add_bounding_box(anno, frames)
362362

363-
elif str(anno.at_type).endswith('TextDocument'):
363+
elif "TextDocument" in str(anno.at_type):
364364
t = anno.properties["text_value"]
365365
if t:
366366
text_id = anno.properties["id"]
367367
# Format string so it is JSON-readable
368368
text_docs[text_id] = re.sub(r'([\\\/\|\"\'])', r'\1 ', t)
369369

370-
elif str(anno.at_type).endswith('Alignment'):
370+
elif "Alignment" in str(anno.at_type):
371371
source = anno.properties["source"]
372372
target = anno.properties["target"]
373373
alignments[source] = target

0 commit comments

Comments
 (0)