Update ocr.py

1192119703jzx · web-flow · commit 9b5ac32a7a6a · 2024-06-23T01:18:40.000-04:00
diff --git a/ocr.py b/ocr.py
@@ -13,6 +13,13 @@
 
 import cache
 
+"""
+Helper function for showing debug information
+
+def some_function(x):
+    from utils import app  # import inside function
+    app.logger.debug(x)
+"""
 
 class OCRFrame():
     """
@@ -62,10 +69,10 @@ def add_bounding_box(self, anno, mmif: Mmif):
         else:
             for alignment_anns in mmif.get_alignments(AnnotationTypes.BoundingBox, AnnotationTypes.TimePoint).values():
                 for alignment_ann in alignment_anns:
-                    if alignment_ann.get('source') == anno.id:
+                    if alignment_ann.get('source') == anno.long_id:
                         timepoint_anno = mmif[alignment_ann.get('target')]
                         break
-                    elif alignment_ann.get('target') == anno.id:
+                    elif alignment_ann.get('target') == anno.long_id:
                         timepoint_anno = mmif[alignment_ann.get('source')]
                         break
         if timepoint_anno:
@@ -90,7 +97,7 @@ def add_timeframe(self, anno, mmif):
         if "targets" in anno.properties:
             start_id, end_id = anno.properties.get("targets")[0], anno.properties.get("targets")[-1]
             anno_parent = mmif.get_view_by_id(anno.parent)
-            start_anno, end_anno = mmif[start_id], mmif[end_id]
+            start_anno, end_anno = anno_parent.get_annotation_by_id(start_id), anno_parent.get_annotation_by_id(end_id)
             start = convert_timepoint(mmif, start_anno, "frames")
             end = convert_timepoint(mmif, end_anno, "frames")
             start_secs = convert_timepoint(mmif, start_anno, "seconds")
@@ -121,10 +128,9 @@ def add_timepoint(self, anno, mmif, skip_if_view_has_frames=True):
                 self.frametype = anno.properties.get("label")
 
     def add_text_document(self, anno):
-        t = anno.properties.get("text_value") or anno.properties.get("text").value
-        if t:
-            text_val = re.sub(r'([\\\/\|\"\'])', r'\1 ', t)
-            self.text = self.text + [text_val] if text_val not in self.text else self.text
+        t = anno.properties.get("text_value") or anno.text_value
+        text_val = re.sub(r'([\\\/\|\"\'])', r'\1 ', t)
+        self.text = self.text + [text_val] if text_val not in self.text else self.text
 
 
 def get_ocr_frames(view, mmif):
@@ -139,6 +145,8 @@ def get_ocr_frames(view, mmif):
 
             # Account for alignment in either direction
             frame = OCRFrame(source, mmif)
+            if target.at_type == DocumentTypes.TextDocument:
+                frame.add_timepoint(source, mmif, skip_if_view_has_frames=False)
             frame.update(target, mmif)
 
             i = frame.frame_num if frame.frame_num is not None else frame.range