add compatibility for newer spacy versions and further 1.0.0 updates

haydenmccormick · haydenmccormick · commit 38334b619fba · 2023-05-26T12:46:21.000-04:00
diff --git a/displacy/__init__.py b/displacy/__init__.py
@@ -30,19 +30,19 @@ def entity_dict(mmif, view, document_id, app_root):
     displacy_dict['ents'] = []
     for ann in view['annotations']:
         if ann.at_type == Uri.NE:
-            displacy_dict['ents'].append(entity(ann))
+            displacy_dict['ents'].append(entity(view, ann))
     return displacy_dict
 
 
 def get_text_documents(mmif):
     """Return a dictionary indexed on document identifiers (with the view identifier
     if needed) with text documents as the values."""
-    tds = [d for d in mmif.documents if str(d.at_type).endswith('TextDocument')]
+    tds = [d for d in mmif.documents if "TextDocument" in str(d.at_type)]
     tds = {td.id:td for td in tds}
     for view in mmif.views:
         # TODO: add check for TextDocument in metadata.contains (saves time)
         for annotation in view.annotations:
-            if str(annotation.at_type).endswith('TextDocument'):
+            if "TextDocument" in str(annotation.at_type):
                 tds["%s:%s" % (view.id, annotation.id)] = annotation
     return tds
 
@@ -74,7 +74,7 @@ def mmif_to_dict(mmif: Mmif):
     # to a TextDocument in the views or a set of TextDocuments in the views.
     transcript_location = None
     for document in mmif.documents:
-        if document.at_type.endswith('TextDocument'):
+        if "TextDocument" in document.at_type:
             transcript_location = document.location
     transcript_location = transcript_location
     displacy_dict = {}
@@ -85,13 +85,19 @@ def mmif_to_dict(mmif: Mmif):
         displacy_dict['ents'] = []
         for ann in ne_view['annotations']:
             if ann.at_type == Uri.NE:
-                displacy_dict['ents'].append(entity(ann))
+                displacy_dict['ents'].append(entity(ne_view, ann))
     return displacy_dict
 
 
-def entity(annotation: Annotation):
-    return {'start': annotation.properties['start'],
-            'end': annotation.properties['end'],
+def entity(view: View, annotation: Annotation):
+    if "targets" in annotation.properties:
+        start = min([view.annotations[target].properties["start"] for target in annotation.properties["targets"]])
+        end = max([view.annotations[target].properties["end"] for target in annotation.properties["targets"]])
+    else:
+        start = annotation.properties['start']
+        end = annotation.properties['end']
+    return {'start': start,
+            'end': end,
             'label': annotation.properties['category']}
 
 
diff --git a/utils.py b/utils.py
@@ -31,9 +31,9 @@ def get_alignments(alignment_view):
     annotations = alignment_view.annotations
     # TODO: wanted to use "mmif.get_alignments(AnnotationTypes.TimeFrame, Uri.TOKEN)"
     # but that gave errors so I gave up on it
-    token_idx = {a.id:a for a in annotations if str(a.at_type).endswith('Token')}
-    timeframe_idx = {a.id:a for a in annotations if str(a.at_type).endswith('TimeFrame')}
-    alignments = [a for a in annotations if str(a.at_type).endswith('Alignment')]
+    token_idx = {a.id:a for a in annotations if "Token" in str(a.at_type)}
+    timeframe_idx = {a.id:a for a in annotations if "TimeFrame" in str(a.at_type)}
+    alignments = [a for a in annotations if "Alignment" in str(a.at_type)]
     vtt_start = None
     texts = []
     for alignment in alignments:
@@ -212,7 +212,7 @@ def get_document_ids(view, annotation_type):
     metadata = view.metadata.contains.get(annotation_type)
     ids = set([metadata['document']]) if 'document' in metadata else set()
     for annotation in view.annotations:
-        if str(annotation.at_type).endswith(str(annotation_type)):
+        if str(annotation_type) in str(annotation.at_type):
             try:
                 ids.add(annotation.properties["document"])
             except KeyError:
@@ -298,15 +298,15 @@ def get_aligned_views(mmif):
     """Return list of properly aligned views (for tree display)"""
     aligned_views = []
     for view in mmif.views:
-        if any([str(at_type).endswith('Alignment') for at_type in view.metadata.contains]):
+        if any(["Alignment" in str(at_type) for at_type in view.metadata.contains]):
             if check_view_alignment(view.annotations) == True:
                 aligned_views.append(view.id)
     return aligned_views
 
 def check_view_alignment(annotations):
     anno_stack = []
     for annotation in annotations:
-        if str(annotation.at_type).endswith('Alignment'):
+        if "Alignment" in str(annotation.at_type):
             anno_stack.insert(0, annotation.properties)
         else:
             anno_stack.append(annotation.id)
@@ -332,7 +332,7 @@ def create_ner_visualization(mmif, view):
         # all the view's named entities refer to the same text document (kaldi)
         document_ids = get_document_ids(view, Uri.NE)
         return displacy.visualize_ner(mmif, view, document_ids[0], app.root_path)
-    except KeyError:
+    except KeyError as e:
         # the view's entities refer to more than one text document (tessearct)
         pass
 def get_status(view):
@@ -357,17 +357,17 @@ def prepare_ocr_visualization(mmif, view):
     frames, text_docs, alignments = {}, {}, {}
     for anno in view.annotations:
         try:
-            if str(anno.at_type).endswith('BoundingBox'):
+            if "BoundingBox" in str(anno.at_type):
                 frames = add_bounding_box(anno, frames)
 
-            elif str(anno.at_type).endswith('TextDocument'):
+            elif "TextDocument" in str(anno.at_type):
                 t = anno.properties["text_value"]
                 if t:
                     text_id = anno.properties["id"]
                     # Format string so it is JSON-readable
                     text_docs[text_id] = re.sub(r'([\\\/\|\"\'])', r'\1 ', t)
 
-            elif str(anno.at_type).endswith('Alignment'):
+            elif "Alignment" in str(anno.at_type):
                 source = anno.properties["source"]
                 target = anno.properties["target"]
                 alignments[source] = target