Show EAST annotations in OCR view & updated documentation

haydenmccormick · haydenmccormick · commit c5bd58085634 · 2023-07-20T14:42:20.000-04:00
diff --git a/Containerfile b/Containerfile
diff --git a/README.md b/README.md
@@ -1,19 +1,28 @@
 # The MMIF Visualization Server
 
-This application creates an HTML server that visualizes annotation components in a [MMIF](https://mmif.clams.ai) file. Supported annotations are:
+This application creates an HTML server that visualizes annotation components in a [MMIF](https://mmif.clams.ai) file. It contains the following visualizations for any valid MMIF:
 
-- Video or Audio file player with HTML5.
-- [WebVTT](https://www.w3.org/TR/webvtt1/) for showing alignments.
+- Video or Audio file player with HTML5 (assuming file refers to video and/or audio document).
 - Pretty-printed MMIF contents.
-- Javascript for bounding boxes.
-- Named entity annotations with [displaCy.](https://explosion.ai/demos/displacy-ent)
+- Interactive, searchable MMIF tree view with [JSTree](https://www.jstree.com/).
+- Embedded [Universal Viewer](https://universalviewer.io/) (assuming file refers to video and/or image document).
+
+
+The application also includes tailored visualizations depending on the annotations present in the input MMIF:
+| Visualization | Supported CLAMS apps |
+|---|---|
+| [WebVTT](https://www.w3.org/TR/webvtt1/) for showing alignments of video captions. | [Whisper](https://github.com/clamsproject/app-whisper-wrapper), [Kaldi](https://github.com/clamsproject/app-aapb-pua-kaldi-wrapper) |
+| Javascript bounding boxes for image and OCR annotations. | [Tesseract](https://github.com/clamsproject/app-tesseractocr-wrapper), [EAST](https://github.com/clamsproject/app-east-textdetection) |
+| Named entity annotations with [displaCy.](https://explosion.ai/demos/displacy-ent) | [SPACY](https://github.com/clamsproject/app-spacy-wrapper) |                                                                        |
+
+
 
 Requirements:
 
 - A command line interface.
 - Git (to get the code).
-- [Docker](https://www.docker.com/)  (if you run the visualizer using Docker).
-- Python 3.6 or later (if you want to run the server without Docker).
+- [Docker](https://www.docker.com/) or [Podman](https://podman.io/) (if you run the visualizer in a container).
+- Python 3.6 or later (if you want to run the server containerless).
 
 To get this code if you don't already have it:
 
@@ -23,12 +32,12 @@ $ git clone https://github.com/clamsproject/mmif-visualizer
 
 
 
-## Running the server in a Docker container
+## Running the server in a container
 
-Download or clone this repository and build an image using the `Dockerfile` (you may use another name for the -t parameter, for this example we use `clams-mmif-visualizer` throughout).
+Download or clone this repository and build an image using the `Dockerfile` (you may use another name for the -t parameter, for this example we use `clams-mmif-visualizer` throughout). **NOTE**: if using podman, just substitute `docker` for `podman` in the following commands.
 
 ```bash
-$ docker build -t clams-mmif-visualizer .
+$ docker build . -f Containerfile -t clams-mmif-visualizer
 ```
 
 In these notes we assume that the data are in a local directory named `/Users/Shared/archive` with sub directories `audio`, `image`, `text` and `video` (those subdirectories are standard in CLAMS, but the parent directory could be any directory depending on your local set up). We can now run a Docker container with
@@ -56,7 +65,7 @@ With this, the mounted directory `/data` in the container is accessable from ins
 
 
 
-## Running the server without Docker
+## Running the server without Docker/Podman
 
 First install the python dependencies listed in `requirements.txt`:
 
diff --git a/app.py b/app.py
@@ -19,13 +19,11 @@ def index():
 def ocrpage():
     data = request.form
     try:
-        # print(html.unescape(data['frames_pages']))
         frames_pages = eval(html.unescape(data['frames_pages']))
         page_number = int(data['page_number'])
 
         return (render_ocr(data['vid_path'], frames_pages, page_number))
     except Exception as e:
-        print(html.unescape(data['frames_pages']))
         return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
         pass
 
diff --git a/ocr.py b/ocr.py
@@ -6,8 +6,8 @@
 from flask import render_template
 
 
-def add_bounding_box(anno, frames):
-    frame_num = anno.properties["frame"]
+def add_bounding_box(anno, frames, fps):
+    frame_num = anno.properties.get("frame") or anno.properties.get("timePoint")
     box_id = anno.properties["id"]
     boxType = anno.properties["boxType"]
     coordinates = anno.properties["coordinates"]
@@ -21,17 +21,18 @@ def add_bounding_box(anno, frames):
         frames[frame_num]["bb_ids"].append(box_id)
     else:
         frames[frame_num] = {"boxes": [box], "text": [], "bb_ids": [box_id], "timestamp": None, "secs": None, "repeat": False}
+    if fps:
+        secs = int(frame_num/fps)
+        frames[frame_num]["timestamp"] = str(datetime.timedelta(seconds=secs))
+        frames[frame_num]["secs"] = secs
+
     return frames
 
 
-def align_annotations(frames_list, alignments, text_docs, fps):
+def align_annotations(frames_list, alignments, text_docs):
     """Link alignments with frames"""
     prev_frame = None
     for frame_num, frame in frames_list:
-        if fps:
-            secs = int(frame_num/fps)
-            frame["timestamp"] = str(datetime.timedelta(seconds=secs))
-            frame["secs"] = secs
         for box_id in frame["bb_ids"]:
             text_id = alignments[box_id]
             frame["text"].append(text_docs[text_id])
@@ -98,9 +99,8 @@ def round_boxes(boxes):
 def get_ocr_views(mmif):
     """Return OCR views, which have TextDocument, BoundingBox, and Alignment annotations"""
     views = []
-    needed_types = ["TextDocument", "BoundingBox", "Alignment"]
+    ocr_apps = ["east-textdetection", "tesseract"]
     for view in mmif.views:
-        annotation_types = [str(url).split("/")[-1] for url in view.metadata.contains.keys()]
-        if needed_types == annotation_types:
+        if any([view.metadata.app.find(ocr_app) for ocr_app in ocr_apps]):
             views.append(view)
     return views
diff --git a/templates/ocr.html b/templates/ocr.html
@@ -13,10 +13,12 @@
                     <h4>
                         frame: {{frame_num}}<br>
                         timestamp: <a class="timestamp" onclick="SetCurTime('{{secs}}')">{{frame["timestamp"]}}</a><br>
-                        text detected:<br>
-                        {% for text in frame["text"] %}
-                        &emsp;{{text}}<br>
-                        {% endfor %}
+                        {% if frame["text"] %}
+                            text detected:<br>
+                            {% for text in frame["text"] %}
+                                &emsp;{{text}}<br>
+                            {% endfor %}
+                        {% endif %}
                     </h4>
                 </div>
             </div>
diff --git a/utils.py b/utils.py
@@ -357,10 +357,13 @@ def get_properties(annotation):
 def prepare_ocr_visualization(mmif, view):
     """ Visualize OCR by extracting image frames with BoundingBoxes from video"""
     frames, text_docs, alignments = {}, {}, {}
+    vid_path = get_video_path(mmif)
+    cv2_vid = cv2.VideoCapture(vid_path)
+    fps = cv2_vid.get(cv2.CAP_PROP_FPS)
     for anno in view.annotations:
         try:
             if anno.at_type.shortname == "BoundingBox":
-                frames = add_bounding_box(anno, frames)
+                frames = add_bounding_box(anno, frames, fps)
 
             elif anno.at_type.shortname == "TextDocument":
                 t = anno.properties["text_value"]
@@ -379,10 +382,8 @@ def prepare_ocr_visualization(mmif, view):
             pass
 
     # Generate pages (necessary to reduce IO cost) and render
-    vid_path = get_video_path(mmif)
-    cv2_vid = cv2.VideoCapture(vid_path)
-    fps = cv2_vid.get(cv2.CAP_PROP_FPS)
     frames_list = [(k, v) for k, v in frames.items()]
-    frames_list = align_annotations(frames_list, alignments, text_docs, fps)
+    if any(at_type.shortname == "Alignment" for at_type in view.metadata.contains):
+        frames_list = align_annotations(frames_list, alignments, text_docs)
     frames_pages = paginate(frames_list)
     return render_ocr(vid_path, frames_pages, 0)