From 773cff44754ad36bda4cf9ae0dbe11480865b8a4 Mon Sep 17 00:00:00 2001
From: Hayden McCormick <haydenwmccormick@gmail.com>
Date: Sat, 8 Jun 2024 15:02:50 -0400
Subject: [PATCH] Migrate from function-based to class-based rendering

---
 app.py    |  20 ++-
 ocr.py    |  15 ++
 render.py | 409 +++++++++++++++++++++++++++++++-----------------------
 3 files changed, 262 insertions(+), 182 deletions(-)
diff --git a/app.py b/app.py
index 91b5149..75a19be 100644
--- a/app.py
+++ b/app.py
@@ -7,11 +7,12 @@
 
 from flask import Flask, request, render_template, flash, send_from_directory, redirect
 from mmif.serialize import Mmif
+from mmif.vocabulary import DocumentTypes
 
 import cache
 from cache import set_last_access, cleanup
 import traceback
-from render import render_documents, render_annotations, prepare_and_render_ocr, render_ocr_page
+from render import render_documents, render_annotations, prepare_ocr, render_ocr_page
 
 # these two static folder-related params are important, do not remove
 app = Flask(__name__, static_folder='static', static_url_path='')
@@ -26,9 +27,11 @@ def index():
 @app.route('/ocr', methods=['POST'])
 def ocr():
     if "page_number" not in request.json:
-        return serve_first_ocr_page(request.json)
-    else:
-        return serve_ocr_page(request.json)
+        build_ocr_tab(request.json)
+        request.json["page_number"] = 0
+    #     return serve_first_ocr_page(request.json)
+    # else:
+    return serve_ocr_page(request.json)
 
 
 @app.route('/upload', methods=['GET', 'POST'])
@@ -102,9 +105,9 @@ def render_mmif(mmif_str, viz_id):
                            annotations=rendered_annotations)
 
 
-def serve_first_ocr_page(data):
+def build_ocr_tab(data):
     """
-    Prepares OCR (at load time, due to lazy loading) and serves the first page
+    Prepares OCR (at load time, due to lazy loading)
     """
     try:
         data = dict(request.json)
@@ -112,7 +115,10 @@ def serve_first_ocr_page(data):
                         data["mmif_id"] / "file.mmif").read()
         mmif = Mmif(mmif_str)
         ocr_view = mmif.get_view_by_id(data["view_id"])
-        return prepare_and_render_ocr(mmif, ocr_view, data["mmif_id"])
+        prepare_ocr(mmif, ocr_view, data["mmif_id"])
+        request.json["vid_path"] = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[
+                0].location_path()
+
     except Exception as e:
         app.logger.error(f"{e}\n{traceback.format_exc()}")
         return f'<p class="error">Error: {e} Check the server log for more information.</h1>'
diff --git a/ocr.py b/ocr.py
index dc21f02..03a4012 100644
--- a/ocr.py
+++ b/ocr.py
@@ -127,6 +127,21 @@ def add_text_document(self, anno):
                 [text_val] if text_val not in self.text else self.text
 
 
+def prepare_ocr(mmif, view, viz_id):
+    """
+    Prepares list of frames that will be passed back and forth between server
+    and client, and renders the first page of the OCR.
+    """
+    ocr_frames = get_ocr_frames(view, mmif)
+
+    # Generate pages (necessary to reduce IO cost) and render
+    frames_list = [(k, vars(v)) for k, v in ocr_frames.items()]
+    frames_list = find_duplicates(frames_list)
+    frames_pages = paginate(frames_list)
+    # Save page list as temp file
+    save_json(frames_pages, view.id, viz_id)
+
+
 def find_annotation(anno_id, mmif):
     if mmif.id_delimiter in anno_id:
         view_id, anno_id = anno_id.split(mmif.id_delimiter)
diff --git a/render.py b/render.py
index 2020550..13c769c 100644
--- a/render.py
+++ b/render.py
@@ -11,7 +11,7 @@
 import traceback
 
 from utils import get_status, get_properties, get_abstract_view_type, url2posix, get_vtt_file
-from ocr import get_ocr_frames, paginate, find_duplicates, save_json, make_image_directory, is_duplicate_image
+from ocr import prepare_ocr, make_image_directory, is_duplicate_image
 import cv2
 import json
 import tempfile
@@ -22,7 +22,7 @@
 Methods to render MMIF documents and their annotations in various formats.
 """
 
-# -- Documents --
+# -- Render methods --
 
 
 def render_documents(mmif, viz_id):
@@ -31,204 +31,263 @@ def render_documents(mmif, viz_id):
     """
     tabs = []
     for document in mmif.documents:
-        try:
-            # Add symbolic link to document to static folder, so it can be accessed
-            # by the browser.
-            doc_path = document.location_path()
-            doc_symlink_path = pathlib.Path(
-                current_app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{doc_path.split('.')[-1]}")
-            os.symlink(doc_path, doc_symlink_path)
-            doc_symlink_rel_path = '/' + \
-                doc_symlink_path.relative_to(
-                    current_app.static_folder).as_posix()
+        if document.at_type == DocumentTypes.TextDocument:
+            tabs.append(TextTab(document, viz_id))
+        elif document.at_type == DocumentTypes.ImageDocument:
+            tabs.append(ImageTab(document, viz_id))
+        elif document.at_type == DocumentTypes.AudioDocument:
+            tabs.append(AudioTab(document, viz_id))
+        elif document.at_type == DocumentTypes.VideoDocument:
+            tabs.append(VideoTab(document, mmif, viz_id))
 
-            if document.at_type == DocumentTypes.TextDocument:
-                html_tab = render_text(doc_path)
-            elif document.at_type == DocumentTypes.ImageDocument:
-                html_tab = render_image(doc_symlink_rel_path)
-            elif document.at_type == DocumentTypes.AudioDocument:
-                html_tab = render_audio(doc_symlink_rel_path)
-            elif document.at_type == DocumentTypes.VideoDocument:
-                html_tab = render_video(doc_symlink_rel_path, mmif, viz_id)
-
-            tabs.append({"id": document.id,
-                        "tab_name": document.at_type.shortname,
-                         "html": html_tab})
-
-        except Exception:
-            tabs.append({"id": document.id,
-                        "tab_name": document.at_type.shortname,
-                         "html": f"Error rendering document: <br><br> <pre>{traceback.format_exc()}</pre>"})
     return tabs
 
 
-def render_text(text_path):
-    """Return the content of the text document, but with some HTML tags added."""
-    text_path = url2posix(text_path)
-    if not os.path.isfile(text_path):
-        raise FileNotFoundError(f"File not found: {text_path}")
-    with open(text_path) as t_file:
-        content = t_file.read().replace("\n", "<br/>\n")
-        return f"{content}\n"
-
-
-def render_image(img_path):
-    img_path = url2posix(img_path)
-    html = StringIO()
-    html.write(
-        f'<img src=\"{img_path}\" alt="Image" style="max-width: 100%">\n')
-    return html.getvalue()
-
-
-def render_audio(audio_path):
-    audio_path = url2posix(audio_path)
-    html = StringIO()
-    html.write('<audio id="audioplayer" controls crossorigin="anonymous">\n')
-    html.write(f'    <source src=\"{audio_path}\">\n')
-    html.write("</audio>\n")
-    return html.getvalue()
-
-
-def render_video(vid_path, mmif, viz_id):
-    vid_path = url2posix(vid_path)
-    html = StringIO()
-    html.write('<video id="vid" controls crossorigin="anonymous" >\n')
-    html.write(f'    <source src=\"{vid_path}\">\n')
-    for view in mmif.views:
-        if get_abstract_view_type(view, mmif) == "ASR":
-            vtt_path = get_vtt_file(view, viz_id)
-            rel_vtt_path = re.search("mmif-viz-cache/.*", vtt_path).group(0)
-            html.write(
-                f'    <track kind="captions" srclang="en" src="/{rel_vtt_path}" label="transcript" default/>\n')
-    html.write("</video>\n")
-    return html.getvalue()
-
-# -- Annotations --
-
-
 def render_annotations(mmif, viz_id):
     """
     Returns HTML Tab representation of all annotations in the MMIF object.
     """
     tabs = []
     # These tabs should always be present
-    tabs.append({"id": "info", "tab_name": "Info", "html": render_info(mmif)})
-    tabs.append({"id": "annotations", "tab_name": "Annotations",
-                "html": render_annotation_table(mmif)})
-    tabs.append({"id": "tree", "tab_name": "Tree",
-                "html": render_jstree(mmif)})
+    tabs.append(InfoTab(mmif))
+    tabs.append(AnnotationTableTab(mmif))
+    tabs.append(JSTreeTab(mmif))
     # These tabs are optional
     for view in mmif.views:
-        try:
-            abstract_view_type = get_abstract_view_type(view, mmif)
-            # Workaround to deal with the fact that some apps have a version number in the URL
-            app_url = view.metadata.app if re.search(
-                r"\/v\d+\.?\d?$", view.metadata.app) else view.metadata.app + "/v1"
-            app_shortname = app_url.split("/")[-2]
-            if abstract_view_type == "NER":
-                tabs.append(
-                    {"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_ner(mmif, view)})
-            elif abstract_view_type == "ASR":
-                tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}",
-                            "html": render_asr_vtt(view, viz_id)})
-            elif abstract_view_type == "OCR":
-                tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}",
-                            "html": prepare_and_render_ocr(mmif, view, viz_id)})
-
-        except Exception as e:
-            tabs.append({"id": view.id,
-                         "tab_name": view.id,
-                         "html": f"Error rendering annotations: <br><br> <pre>{traceback.format_exc()}</pre>"})
+        abstract_view_type = get_abstract_view_type(view, mmif)
+        if abstract_view_type == "NER":
+            tabs.append(NERTab(mmif, view))
+        elif abstract_view_type == "ASR":
+            tabs.append(VTTTab(mmif, view, viz_id))
+        elif abstract_view_type == "OCR":
+            tabs.append(OCRTab(mmif, view, viz_id))
 
     return tabs
 
 
-def render_info(mmif):
-    s = StringIO('Howdy')
-    s.write("<pre>")
-    for document in mmif.documents:
-        at_type = document.at_type.shortname
-        location = document.location
-        s.write("%s  %s\n" % (at_type, location))
-    s.write('\n')
-    for view in mmif.views:
-        app = view.metadata.app
-        status = get_status(view)
-        s.write('%s  %s  %s  %d\n' %
-                (view.id, app, status, len(view.annotations)))
-        if len(view.annotations) > 0:
-            s.write('\n')
-            types = Counter([a.at_type.shortname
-                             for a in view.annotations])
-            for attype, count in types.items():
-                s.write('    %4d %s\n' % (count, attype))
-        s.write('\n')
-    s.write("</pre>")
-    return s.getvalue()
+# -- Base Tab Class --
 
+class DocumentTab():
+    def __init__(self, document, viz_id):
+        self.id = document.id
+        self.tab_name = document.at_type.shortname
+        self.viz_id = viz_id
 
-def render_annotation_table(mmif):
-    s = StringIO('Howdy')
-    for view in mmif.views:
-        status = get_status(view)
-        s.write('<p><b>%s  %s</b>  %s  %d annotations</p>\n'
-                % (view.id, view.metadata.app, status, len(view.annotations)))
-        s.write("<blockquote>\n")
-        s.write("<table cellspacing=0 cellpadding=5 border=1>\n")
-        def limit_len(str): return str[:500] + \
-            "  . . .  }" if len(str) > 500 else str
-        for annotation in view.annotations:
-            s.write('  <tr>\n')
-            s.write('    <td>%s</td>\n' % annotation.id)
-            s.write('    <td>%s</td>\n' % annotation.at_type.shortname)
-            s.write('    <td>%s</td>\n' %
-                    limit_len(get_properties(annotation)))
-            s.write('  </tr>\n')
-        s.write("</table>\n")
-        s.write("</blockquote>\n")
-    return s.getvalue()
-
-
-def render_jstree(mmif):
-    return render_template('interactive.html', mmif=mmif, aligned_views=[])
-
-
-def render_asr_vtt(view, viz_id):
-    vtt_filename = get_vtt_file(view, viz_id)
-    with open(vtt_filename) as vtt_file:
-        vtt_content = vtt_file.read()
-    return f"<pre>{vtt_content}</pre>"
-
-
-def render_ner(mmif, view):
-    metadata = view.metadata.contains.get(Uri.NE)
-    ner_document = metadata.get('document')
-    return displacy.visualize_ner(mmif, view, ner_document, current_app.root_path)
-
-
-def prepare_and_render_ocr(mmif, view, viz_id):
-    """
-    Prepares list of frames that will be passed back and forth between server
-    and client, and renders the first page of the OCR.
-    """
-    vid_path = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[
-        0].location_path()
+        try:
+            # Add symbolic link to document to static folder, so it can be accessed
+            # by the browser.
+            self.doc_path = document.location_path()
+            self.doc_symlink_path = pathlib.Path(
+                current_app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{self.doc_path.split('.')[-1]}")
+            os.symlink(self.doc_path, self.doc_symlink_path)
+            self.doc_symlink_rel_path = '/' + \
+                self.doc_symlink_path.relative_to(
+                    current_app.static_folder).as_posix()
+
+            self.html = self.render()
 
-    ocr_frames = get_ocr_frames(view, mmif)
+        except Exception as e:
+            self.html = f"Error rendering document: <br><br> <pre>{traceback.format_exc()}</pre>"
+
+    def __str__(self):
+        return f"Tab: {self.tab_name} ({self.id})"
+
+
+class AnnotationTab():
+    def __init__(self, mmif, view=None):
+        self.mmif = mmif
+        # Some AnnotationTab sub-classes don't refer to a specific view, and so
+        # they specify their own ids and tab names. For ones that do refer to
+        # a specific view, we set the ids/tab names based on view properties.
+        if view:
+            self.view = view
+            # Workaround to deal with the fact that some apps have a version number
+            # in the URL
+            app_url = view.metadata.app if re.search(
+                r"\/v\d+\.?\d?$", view.metadata.app) else view.metadata.app + "/v1"
+            app_shortname = app_url.split("/")[-2]
 
-    # Generate pages (necessary to reduce IO cost) and render
-    frames_list = [(k, vars(v)) for k, v in ocr_frames.items()]
-    frames_list = find_duplicates(frames_list)
-    frames_pages = paginate(frames_list)
-    # Save page list as temp file
-    save_json(frames_pages, view.id, viz_id)
-    return render_ocr_page(viz_id, vid_path, view.id, 0)
+            self.id = view.id
+            self.tab_name = f"{app_shortname}-{view.id}"
+        try:
+            self.html = self.render()
+        except Exception as e:
+            self.html = f"Error rendering view: <br><br> <pre>{traceback.format_exc()}</pre>"
+
+
+# -- Document Classes --
+
+class TextTab(DocumentTab):
+    def __init__(self, document, viz_id):
+        super().__init__(document, viz_id)
+
+    def render(self):
+        with open(self.doc_path) as t_file:
+            content = t_file.read().replace("\n", "<br/>\n")
+            return f"{content}\n"
+
+
+class ImageTab(DocumentTab):
+    def __init__(self, document, viz_id):
+        super().__init__(document, viz_id)
+
+    def render(self):
+        img_path = url2posix(self.doc_path)
+        html = StringIO()
+        html.write(
+            f'<img src=\"{img_path}\" alt="Image" style="max-width: 100%">\n')
+        return html.getvalue()
+
+
+class AudioTab(DocumentTab):
+    def __init__(self, document, viz_id):
+        super().__init__(document, viz_id)
+
+    def render(self):
+        audio_path = url2posix(self.doc_symlink_rel_path)
+        html = StringIO()
+        html.write('<audio id="audioplayer" controls crossorigin="anonymous">\n')
+        html.write(f'    <source src=\"{audio_path}\">\n')
+        html.write("</audio>\n")
+        return html.getvalue()
+
+
+class VideoTab(DocumentTab):
+    def __init__(self, document, mmif, viz_id):
+        # VideoTab needs access to the MMIF object to get the VTT file
+        self.mmif = mmif
+        super().__init__(document, viz_id)
+
+    def render(self):
+        vid_path = url2posix(self.doc_symlink_rel_path)
+        html = StringIO()
+        html.write('<video id="vid" controls crossorigin="anonymous" >\n')
+        html.write(f'    <source src=\"{vid_path}\">\n')
+        for view in self.mmif.views:
+            if get_abstract_view_type(view, self.mmif) == "ASR":
+                vtt_path = get_vtt_file(view, self.viz_id)
+                rel_vtt_path = re.search(
+                    "mmif-viz-cache/.*", vtt_path).group(0)
+                html.write(
+                    f'    <track kind="captions" srclang="en" src="/{rel_vtt_path}" label="transcript" default/>\n')
+        html.write("</video>\n")
+        return html.getvalue()
+
+
+# -- Annotation Classes --
+
+class InfoTab(AnnotationTab):
+    def __init__(self, mmif):
+        self.id = "info"
+        self.tab_name = "Info"
+        super().__init__(mmif)
+
+    def render(self):
+        mmif = self.mmif
+        s = StringIO('Howdy')
+        s.write("<pre>")
+        for document in mmif.documents:
+            at_type = document.at_type.shortname
+            location = document.location
+            s.write("%s  %s\n" % (at_type, location))
+        s.write('\n')
+        for view in mmif.views:
+            app = view.metadata.app
+            status = get_status(view)
+            s.write('%s  %s  %s  %d\n' %
+                    (view.id, app, status, len(view.annotations)))
+            if len(view.annotations) > 0:
+                s.write('\n')
+                types = Counter([a.at_type.shortname
+                                for a in view.annotations])
+                for attype, count in types.items():
+                    s.write('    %4d %s\n' % (count, attype))
+            s.write('\n')
+        s.write("</pre>")
+        return s.getvalue()
+
+
+class AnnotationTableTab(AnnotationTab):
+    def __init__(self, mmif):
+        self.id = "annotations"
+        self.tab_name = "Annotations"
+        super().__init__(mmif)
+
+    def render(self):
+        mmif = self.mmif
+        s = StringIO('Howdy')
+        for view in mmif.views:
+            status = get_status(view)
+            s.write('<p><b>%s  %s</b>  %s  %d annotations</p>\n'
+                    % (view.id, view.metadata.app, status, len(view.annotations)))
+            s.write("<blockquote>\n")
+            s.write("<table cellspacing=0 cellpadding=5 border=1>\n")
+            def limit_len(str): return str[:500] + \
+                "  . . .  }" if len(str) > 500 else str
+            for annotation in view.annotations:
+                s.write('  <tr>\n')
+                s.write('    <td>%s</td>\n' % annotation.id)
+                s.write('    <td>%s</td>\n' % annotation.at_type.shortname)
+                s.write('    <td>%s</td>\n' %
+                        limit_len(get_properties(annotation)))
+                s.write('  </tr>\n')
+            s.write("</table>\n")
+            s.write("</blockquote>\n")
+        return s.getvalue()
+
+
+class JSTreeTab(AnnotationTab):
+    def __init__(self, mmif):
+        self.id = "tree"
+        self.tab_name = "Tree"
+        super().__init__(mmif)
+
+    def render(self):
+        mmif = self.mmif
+        return render_template('interactive.html', mmif=mmif, aligned_views=[])
+
+
+class NERTab(AnnotationTab):
+    def __init__(self, mmif, view):
+        super().__init__(mmif, view)
+
+    def render(self):
+        metadata = self.view.metadata.contains.get(Uri.NE)
+        ner_document = metadata.get('document')
+        return displacy.visualize_ner(self.mmif, self.view, ner_document, current_app.root_path)
+
+
+class VTTTab(AnnotationTab):
+    def __init__(self, mmif, view, viz_id):
+        self.viz_id = viz_id
+        super().__init__(mmif, view)
+
+    def render(self):
+        vtt_filename = get_vtt_file(self.view, self.viz_id)
+        with open(vtt_filename) as vtt_file:
+            vtt_content = vtt_file.read()
+        return f"<pre>{vtt_content}</pre>"
+
+
+class OCRTab(AnnotationTab):
+    def __init__(self, mmif, view, viz_id):
+        self.viz_id = viz_id
+        self.vid_path = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[
+            0].location_path()
+
+        super().__init__(mmif, view)
+
+    def render(self):
+        return render_template("pre-ocr.html", view_id=self.view.id, tabname=self.tab_name, mmif_id=self.viz_id)
+        # prepare_ocr(self.mmif, self.view, self.viz_id)
+        # return render_ocr_page(self.viz_id, self.vid_path, self.view.id, 0)
 
 
 def render_ocr_page(mmif_id, vid_path, view_id, page_number):
     """
     Renders a single OCR page by iterating through frames and displaying the 
-    contents/alignments.
+    contents/alignments. Note: this needs to be a separate function (not a method
+    in OCRTab) because it is called by the server when the page is changed.
     """
     # Path for storing temporary images generated by cv2
     cv2_vid = cv2.VideoCapture(vid_path)