diff --git a/.gitignore b/.gitignore
index 0a9b11b..11c7e30 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,4 +73,6 @@ tags
 
 # VSCode
 .devcontainer
-devcontainer.json
\ No newline at end of file
+devcontainer.json
+
+static/mmif-viz-cache
\ No newline at end of file
diff --git a/app.py b/app.py
index 3d8f887..a92f92c 100644
--- a/app.py
+++ b/app.py
@@ -3,16 +3,20 @@
 import secrets
 import sys
 from threading import Thread
+from shutil import rmtree
 
-from flask import request, render_template, flash, send_from_directory, redirect
+from flask import Flask, request, render_template, flash, send_from_directory, redirect
 from mmif.serialize import Mmif
+from mmif.vocabulary import DocumentTypes
 
 import cache
 from cache import set_last_access, cleanup
-from utils import app, render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization
 import traceback
-import utils
-from utils import app
+from render import render_documents, render_annotations, prepare_ocr, render_ocr_page
+
+# these two static folder-related params are important, do not remove
+app = Flask(__name__, static_folder='static', static_url_path='')
+app.secret_key = 'your_secret_key_here'
 
 
 @app.route('/')
@@ -22,24 +26,12 @@ def index():
 
 @app.route('/ocr', methods=['POST'])
 def ocr():
-    try:
-        data = dict(request.json)
-        mmif_str = open(cache.get_cache_root() / data["mmif_id"] / "file.mmif").read()
-        mmif = Mmif(mmif_str)
-        ocr_view = mmif.get_view_by_id(data["view_id"])
-        return utils.prepare_ocr_visualization(mmif, ocr_view, data["mmif_id"])
-    except Exception as e:
-        app.logger.error(f"{e}\n{traceback.format_exc()}")
-        return f'<p class="error">Error: {e} Check the server log for more information.</h1>'
-
-
-@app.route('/ocrpage', methods=['POST'])
-def ocrpage():
-    data = request.json
-    try:
-        return utils.render_ocr(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
-    except Exception as e:
-        return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
+    if "page_number" not in request.json:
+        build_ocr_tab(request.json)
+        request.json["page_number"] = 0
+    #     return serve_first_ocr_page(request.json)
+    # else:
+    return serve_ocr_page(request.json)
 
 
 @app.route('/upload', methods=['GET', 'POST'])
@@ -93,7 +85,7 @@ def display(viz_id):
         return html_file
     else:
         app.logger.debug(f"Visualization {viz_id} not found in cache.")
-        os.remove(path)
+        rmtree(path)
         flash("File not found -- please upload again (it may have been deleted to clear up cache space).")
         return redirect("/upload")
 
@@ -103,6 +95,45 @@ def send_js(path):
     return send_from_directory("uv", path)
 
 
+def render_mmif(mmif_str, viz_id):
+    mmif = Mmif(mmif_str)
+    rendered_documents = render_documents(mmif, viz_id)
+    rendered_annotations = render_annotations(mmif, viz_id)
+    return render_template('player.html',
+                           docs=rendered_documents,
+                           viz_id=viz_id,
+                           annotations=rendered_annotations)
+
+
+def build_ocr_tab(data):
+    """
+    Prepares OCR (at load time, due to lazy loading)
+    """
+    try:
+        data = dict(request.json)
+        mmif_str = open(cache.get_cache_root() /
+                        data["mmif_id"] / "file.mmif").read()
+        mmif = Mmif(mmif_str)
+        ocr_view = mmif.get_view_by_id(data["view_id"])
+        prepare_ocr(mmif, ocr_view, data["mmif_id"])
+        request.json["vid_path"] = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[
+                0].location_path()
+
+    except Exception as e:
+        app.logger.error(f"{e}\n{traceback.format_exc()}")
+        return f'<p class="error">Error: {e} Check the server log for more information.</h1>'
+
+
+def serve_ocr_page(data):
+    """
+    Serves subsequent OCR pages
+    """
+    try:
+        return render_ocr_page(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
+    except Exception as e:
+        return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
+
+
 def upload_file(in_mmif):
     # Save file locally
     in_mmif_bytes = in_mmif if isinstance(in_mmif, bytes) else in_mmif.read()
@@ -117,13 +148,7 @@ def upload_file(in_mmif):
         with open(path / 'file.mmif', 'w') as in_mmif_file:
             app.logger.debug(f"Writing original MMIF to {path / 'file.mmif'}")
             in_mmif_file.write(in_mmif_str)
-        mmif = Mmif(in_mmif_str)
-        htmlized_docs = utils.documents_to_htmls(mmif, viz_id)
-        app.logger.debug(f"Prepared document: {[d[0] for d in htmlized_docs]}")
-        annotations = utils.prep_annotations(mmif, viz_id)
-        app.logger.debug(f"Prepared Annotations: {[annotation[0] for annotation in annotations]}")
-        html_page = render_template('player.html',
-                               docs=htmlized_docs, viz_id=viz_id, annotations=annotations)
+        html_page = render_mmif(in_mmif_str, viz_id)
         with open(os.path.join(path, "index.html"), "w") as f:
             f.write(html_page)
     except FileExistsError:
@@ -133,7 +158,6 @@ def upload_file(in_mmif):
         t = Thread(target=cleanup)
         t.daemon = True
         t.run()
-
     agent = request.headers.get('User-Agent')
     if 'curl' in agent.lower():
         return f"Visualization ID is {viz_id}\nYou can access the visualized file at {request.url_root}display/{viz_id}\n"
@@ -143,7 +167,8 @@ def upload_file(in_mmif):
 if __name__ == '__main__':
     # Make path for temp files
     cache_path = cache.get_cache_root()
-    cache_symlink_path = os.path.join(app.static_folder, cache._CACHE_DIR_SUFFIX)
+    cache_symlink_path = os.path.join(
+        app.static_folder, cache._CACHE_DIR_SUFFIX)
     if os.path.islink(cache_symlink_path):
         os.unlink(cache_symlink_path)
     elif os.path.exists(cache_symlink_path):
@@ -158,5 +183,5 @@ def upload_file(in_mmif):
     port = 5000
     if len(sys.argv) > 2 and sys.argv[1] == '-p':
         port = int(sys.argv[2])
-        
+
     app.run(port=port, host='0.0.0.0', debug=True, use_reloader=True)
diff --git a/displacy/__init__.py b/displacy/__init__.py
index 0ae0ffe..5a76760 100644
--- a/displacy/__init__.py
+++ b/displacy/__init__.py
@@ -48,11 +48,11 @@ def read_text(textdoc, app_root):
         # container, see the comment in html_text() in ../app.py)
         if not os.path.isfile(location):
             if location.startswith('file:///'):
-                location = location[8:]
+                location = location[7:]
             else:
                 # this should not happen anymore, but keeping it anyway
                 location = location[1:]
-            location = os.path.join(app_root, 'static', location)
+            # location = os.path.join(app_root, 'static', location)
         with open(location) as fh:
             text = fh.read()
     else:
diff --git a/examples/whisper-spacy.json b/examples/whisper-spacy.json
index 967a3d4..9a164e6 100644
--- a/examples/whisper-spacy.json
+++ b/examples/whisper-spacy.json
@@ -8,7 +8,7 @@
             "properties": {
                 "mime": "video",
                 "id": "d1",
-                "location": "file:///data/video/service-mbrs-ntscrm-01181182.mp4"
+                "location": "file:///data/service-mbrs-ntscrm-01181182.mp4"
             }
         },
         {
@@ -16,7 +16,7 @@
             "properties": {
                 "mime": "audio",
                 "id": "d2",
-                "location": "file:///data/audio/service-mbrs-ntscrm-01181182.wav"
+                "location": "file:///data/service-mbrs-ntscrm-01181182.wav"
             }
         },
         {
@@ -24,7 +24,7 @@
             "properties": {
                 "mime": "text",
                 "id": "d3",
-                "location": "file:///data/text/service-mbrs-ntscrm-01181182.txt"
+                "location": "file:///data/service-mbrs-ntscrm-01181182.txt"
             }
         }
     ],
diff --git a/ocr.py b/ocr.py
index a964296..dc1bbc6 100644
--- a/ocr.py
+++ b/ocr.py
@@ -1,14 +1,13 @@
 import datetime
-import pathlib
 
 import cv2
-import tempfile
 import json
 import re
-import os, shutil
+import os
+import shutil
+from mmif.vocabulary.annotation_types import AnnotationTypes
+from mmif.vocabulary.document_types import DocumentTypes
 
-from flask import render_template
-from mmif import AnnotationTypes, DocumentTypes, Mmif
 from mmif.utils.video_document_helper import convert_timepoint, convert_timeframe
 
 import cache
@@ -51,14 +50,17 @@ def update(self, anno, mmif):
 
         elif anno.at_type.shortname == "Paragraph":
             view = mmif.get_view_by_id(anno.parent)
-            text_anno = mmif[anno.properties.get("document")]
+            text_anno = view.get_annotation_by_id(
+                anno.properties.get("document"))
             self.add_text_document(text_anno)
 
-    def add_bounding_box(self, anno, mmif: Mmif):
-        timepoint_anno = None
+    def add_bounding_box(self, anno, mmif):
         if "timePoint" in anno.properties:
             timepoint_anno = mmif[anno.get("timePoint")]
-
+            
+            if timepoint_anno:
+                self.add_timepoint(timepoint_anno, mmif,
+                                   skip_if_view_has_frames=False)
         else:
             for alignment_anns in mmif.get_alignments(AnnotationTypes.BoundingBox, AnnotationTypes.TimePoint).values():
                 for alignment_ann in alignment_anns:
@@ -88,9 +90,11 @@ def add_bounding_box(self, anno, mmif: Mmif):
     def add_timeframe(self, anno, mmif):
         # If annotation has multiple targets, pick the first and last as start and end
         if "targets" in anno.properties:
-            start_id, end_id = anno.properties.get("targets")[0], anno.properties.get("targets")[-1]
+            start_id, end_id = anno.properties.get(
+                "targets")[0], anno.properties.get("targets")[-1]
             anno_parent = mmif.get_view_by_id(anno.parent)
-            start_anno, end_anno = mmif[start_id], mmif[end_id]
+            start_anno, end_anno = anno_parent.get_annotation_by_id(
+                start_id), anno_parent.get_annotation_by_id(end_id)
             start = convert_timepoint(mmif, start_anno, "frames")
             end = convert_timepoint(mmif, end_anno, "frames")
             start_secs = convert_timepoint(mmif, start_anno, "seconds")
@@ -99,7 +103,8 @@ def add_timeframe(self, anno, mmif):
             start, end = convert_timeframe(mmif, anno, "frames")
             start_secs, end_secs = convert_timeframe(mmif, anno, "seconds")
         self.range = (start, end)
-        self.timestamp_range = (str(datetime.timedelta(seconds=start_secs)), str(datetime.timedelta(seconds=end_secs)))
+        self.timestamp_range = (str(datetime.timedelta(seconds=start_secs)), str(
+            datetime.timedelta(seconds=end_secs)))
         self.sec_range = (start_secs, end_secs)
         if anno.properties.get("frameType"):
             self.frametype = str(anno.properties.get("frameType"))
@@ -107,24 +112,43 @@ def add_timeframe(self, anno, mmif):
             self.frametype = str(anno.properties.get("label"))
 
     def add_timepoint(self, anno, mmif, skip_if_view_has_frames=True):
-            parent = mmif.get_view_by_id(anno.parent)
-            other_annotations = [k for k in parent.metadata.contains.keys() if k != anno.id]
-            # If there are TimeFrames in the same view, they most likely represent
-            # condensed information about representative frames (e.g. SWT). In this 
-            # case, only render the TimeFrames and ignore the TimePoints.
-            if any([anno == AnnotationTypes.TimeFrame for anno in other_annotations]) and skip_if_view_has_frames:
-                return
-            self.frame_num = convert_timepoint(mmif, anno, "frames")
-            self.secs = convert_timepoint(mmif, anno, "seconds")
-            self.timestamp = str(datetime.timedelta(seconds=self.secs))
-            if anno.properties.get("label"):
-                self.frametype = anno.properties.get("label")
+        parent = mmif.get_view_by_id(anno.parent)
+        other_annotations = [
+            k for k in parent.metadata.contains.keys() if k != anno.id]
+        # If there are TimeFrames in the same view, they most likely represent
+        # condensed information about representative frames (e.g. SWT). In this
+        # case, only render the TimeFrames and ignore the TimePoints.
+        if any([anno.shortname == "TimeFrame" for anno in other_annotations]) and skip_if_view_has_frames:
+            return
+        self.frame_num = convert_timepoint(mmif, anno, "frames")
+        self.secs = convert_timepoint(mmif, anno, "seconds")
+        self.timestamp = str(datetime.timedelta(seconds=self.secs))
+        if anno.properties.get("label"):
+            self.frametype = anno.properties.get("label")
 
     def add_text_document(self, anno):
-        t = anno.properties.get("text_value") or anno.properties.get("text").value
+        t = anno.properties.get(
+            "text_value") or anno.properties.get("text").value
         if t:
             text_val = re.sub(r'([\\\/\|\"\'])', r'\1 ', t)
-            self.text = self.text + [text_val] if text_val not in self.text else self.text
+            self.text = self.text + \
+                [text_val] if text_val not in self.text else self.text
+
+
+def prepare_ocr(mmif, view, viz_id):
+    """
+    Prepares list of frames that will be passed back and forth between server
+    and client, and renders the first page of the OCR.
+    """
+    ocr_frames = get_ocr_frames(view, mmif)
+
+    # Generate pages (necessary to reduce IO cost) and render
+    frames_list = [(k, vars(v)) for k, v in ocr_frames.items()]
+    frames_list = find_duplicates(frames_list)
+    frames_pages = paginate(frames_list)
+    # Save page list as temp file
+    save_json(frames_pages, view.id, viz_id)
+
 
 
 def get_ocr_frames(view, mmif):
@@ -149,7 +173,7 @@ def get_ocr_frames(view, mmif):
                 frames[i].update(target, mmif)
             else:
                 frames[i] = frame
-            
+
     else:
         for annotation in view.get_annotations():
             frame = OCRFrame(annotation, mmif)
@@ -160,7 +184,6 @@ def get_ocr_frames(view, mmif):
                 frames[i].update(annotation, mmif)
             else:
                 frames[i] = frame
-    print(frames)
     return frames
 
 
@@ -183,45 +206,9 @@ def paginate(frames_list):
     return {i: page for (i, page) in enumerate(pages)}
 
 
-def render_ocr(mmif_id, vid_path, view_id, page_number):
-    """
-    Iterate through frames and display the contents/alignments.
-    """
-    # Path for storing temporary images generated by cv2
-    cv2_vid = cv2.VideoCapture(vid_path)
-    tn_data_fname = cache.get_cache_root() / mmif_id / f"{view_id}-pages.json"
-    thumbnail_pages = json.load(open(tn_data_fname))
-    page = thumbnail_pages[str(page_number)]
-    prev_frame_cap = None
-    path = make_image_directory(mmif_id)
-    for frame_num, frame in page:
-        # If index is range instead of frame...
-        if frame.get("range"):
-            frame_num = (int(frame["range"][0]) + int(frame["range"][1])) / 2
-        cv2_vid.set(1, frame_num)
-        _, frame_cap = cv2_vid.read()
-        if frame_cap is None:
-            raise FileNotFoundError(f"Video file {vid_path} not found!")
-
-        # Double check histogram similarity of "repeat" frames -- if they're significantly different, un-mark as repeat
-        if prev_frame_cap is not None and frame["repeat"] and not is_duplicate_image(prev_frame_cap, frame_cap,
-                                                                                     cv2_vid):
-            frame["repeat"] = False
-        with tempfile.NamedTemporaryFile(dir=str(path), suffix=".jpg", delete=False) as tf:
-            cv2.imwrite(tf.name, frame_cap)
-            # "id" is just the name of the temp image file
-            frame["id"] = pathlib.Path(tf.name).name
-        prev_frame_cap = frame_cap
-
-    tn_page_html = render_template(
-        'ocr.html', vid_path=vid_path, view_id=view_id, page=page,
-        n_pages=len(thumbnail_pages), page_number=str(page_number), mmif_id=mmif_id)
-    return tn_page_html
-
-
-def make_image_directory(mmif_id):
+def make_image_directory(mmif_id, view_id):
     # Make path for temp OCR image files or clear image files if it exists
-    path = cache.get_cache_root() / mmif_id / "img"
+    path = cache.get_cache_root() / mmif_id / "img" / view_id
     if os.path.exists(path):
         shutil.rmtree(path)
     os.makedirs(path)
@@ -266,10 +253,14 @@ def is_duplicate_image(prev_frame, frame, cv2_vid):
     img2_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
 
     # Calculate the histogram and normalize it
-    hist_img1 = cv2.calcHist([img1_hsv], [0, 1], None, [180, 256], [0, 180, 0, 256])
-    cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);
-    hist_img2 = cv2.calcHist([img2_hsv], [0, 1], None, [180, 256], [0, 180, 0, 256])
-    cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);
+    hist_img1 = cv2.calcHist([img1_hsv], [0, 1], None, [
+                             180, 256], [0, 180, 0, 256])
+    cv2.normalize(hist_img1, hist_img1, alpha=0,
+                  beta=1, norm_type=cv2.NORM_MINMAX)
+    hist_img2 = cv2.calcHist([img2_hsv], [0, 1], None, [
+                             180, 256], [0, 180, 0, 256])
+    cv2.normalize(hist_img2, hist_img2, alpha=0,
+                  beta=1, norm_type=cv2.NORM_MINMAX)
 
     # Find the metric value
     metric_val = cv2.compareHist(hist_img1, hist_img2, cv2.HISTCMP_CHISQR)
@@ -289,29 +280,6 @@ def round_boxes(boxes):
     return rounded_boxes
 
 
-def get_ocr_views(mmif):
-    """Returns all CV views, which contain timeframes or bounding boxes"""
-    views = []
-    required_types = ["TimeFrame", "BoundingBox", "TimePoint"]
-    for view in mmif.views:
-        for anno_type, anno in view.metadata.contains.items():
-            # Annotation belongs to a CV view if it is a TimeFrame/BB and it refers to a VideoDocument
-            # if anno.get("document") is None:
-            #     continue
-            # if anno_type.shortname in required_types and mmif.get_document_by_id(
-            #         anno["document"]).at_type.shortname == "VideoDocument":
-            #     views.append(view)
-            #     continue
-            if anno_type.shortname in required_types:
-                views.append(view)
-                break
-            # TODO: Couldn't find a simple way to show if an alignment view is a CV/Frames-type view
-            elif "parseq" in view.metadata.app:
-                views.append(view)
-                break
-    return views
-
-
 def save_json(data, view_id, mmif_id):
     path = cache.get_cache_root() / mmif_id / f"{view_id}-pages.json"
     with open(path, 'w') as f:
diff --git a/render.py b/render.py
new file mode 100644
index 0000000..dc4b758
--- /dev/null
+++ b/render.py
@@ -0,0 +1,321 @@
+import os
+import pathlib
+from io import StringIO
+from collections import Counter
+from flask import render_template, current_app
+import re
+
+from mmif import DocumentTypes
+from lapps.discriminators import Uri
+import displacy
+import traceback
+
+from utils import get_status, get_properties, get_abstract_view_type, url2posix, get_vtt_file
+from ocr import prepare_ocr, make_image_directory, is_duplicate_image
+import cv2
+import json
+import tempfile
+
+import cache
+
+"""
+Methods to render MMIF documents and their annotations in various formats.
+"""
+
+# -- Render methods --
+
+
+def render_documents(mmif, viz_id):
+    """
+    Returns HTML Tab representation of all documents in the MMIF object.
+    """
+    tabs = []
+    for document in mmif.documents:
+        if document.at_type == DocumentTypes.TextDocument:
+            tabs.append(TextTab(document, viz_id))
+        elif document.at_type == DocumentTypes.ImageDocument:
+            tabs.append(ImageTab(document, viz_id))
+        elif document.at_type == DocumentTypes.AudioDocument:
+            tabs.append(AudioTab(document, viz_id))
+        elif document.at_type == DocumentTypes.VideoDocument:
+            tabs.append(VideoTab(document, mmif, viz_id))
+
+    return tabs
+
+
+def render_annotations(mmif, viz_id):
+    """
+    Returns HTML Tab representation of all annotations in the MMIF object.
+    """
+    tabs = []
+    # These tabs should always be present
+    tabs.append(InfoTab(mmif))
+    tabs.append(AnnotationTableTab(mmif))
+    tabs.append(JSTreeTab(mmif))
+    # These tabs are optional
+    for view in mmif.views:
+        abstract_view_type = get_abstract_view_type(view, mmif)
+        if abstract_view_type == "NER":
+            tabs.append(NERTab(mmif, view))
+        elif abstract_view_type == "ASR":
+            tabs.append(VTTTab(mmif, view, viz_id))
+        elif abstract_view_type == "OCR":
+            tabs.append(OCRTab(mmif, view, viz_id))
+
+    return tabs
+
+
+# -- Base Tab Class --
+
+class DocumentTab():
+    def __init__(self, document, viz_id):
+        self.id = document.id
+        self.tab_name = document.at_type.shortname
+        self.viz_id = viz_id
+
+        try:
+            # Add symbolic link to document to static folder, so it can be accessed
+            # by the browser.
+            self.doc_path = document.location_path()
+            self.doc_symlink_path = pathlib.Path(
+                current_app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{self.doc_path.split('.')[-1]}")
+            os.symlink(self.doc_path, self.doc_symlink_path)
+            self.doc_symlink_rel_path = '/' + \
+                self.doc_symlink_path.relative_to(
+                    current_app.static_folder).as_posix()
+
+            self.html = self.render()
+
+        except Exception as e:
+            self.html = f"Error rendering document: <br><br> <pre>{traceback.format_exc()}</pre>"
+
+    def __str__(self):
+        return f"Tab: {self.tab_name} ({self.id})"
+
+
+class AnnotationTab():
+    def __init__(self, mmif, view=None):
+        self.mmif = mmif
+        # Some AnnotationTab sub-classes don't refer to a specific view, and so
+        # they specify their own ids and tab names. For ones that do refer to
+        # a specific view, we set the ids/tab names based on view properties.
+        if view:
+            self.view = view
+            # Workaround to deal with the fact that some apps have a version number
+            # in the URL
+            app_url = view.metadata.app if re.search(
+                r"\/v\d+\.?\d?$", view.metadata.app) else view.metadata.app + "/v1"
+            app_shortname = app_url.split("/")[-2]
+
+            self.id = view.id
+            self.tab_name = f"{app_shortname}-{view.id}"
+        try:
+            self.html = self.render()
+        except Exception as e:
+            self.html = f"Error rendering view: <br><br> <pre>{traceback.format_exc()}</pre>"
+
+
+# -- Document Classes --
+
+class TextTab(DocumentTab):
+    def __init__(self, document, viz_id):
+        super().__init__(document, viz_id)
+
+    def render(self):
+        with open(self.doc_path) as t_file:
+            content = t_file.read().replace("\n", "<br/>\n")
+            return f"{content}\n"
+
+
+class ImageTab(DocumentTab):
+    def __init__(self, document, viz_id):
+        super().__init__(document, viz_id)
+
+    def render(self):
+        img_path = url2posix(self.doc_path)
+        html = StringIO()
+        html.write(
+            f'<img src=\"{img_path}\" alt="Image" style="max-width: 100%">\n')
+        return html.getvalue()
+
+
+class AudioTab(DocumentTab):
+    def __init__(self, document, viz_id):
+        super().__init__(document, viz_id)
+
+    def render(self):
+        audio_path = url2posix(self.doc_symlink_rel_path)
+        html = StringIO()
+        html.write('<audio id="audioplayer" controls crossorigin="anonymous">\n')
+        html.write(f'    <source src=\"{audio_path}\">\n')
+        html.write("</audio>\n")
+        return html.getvalue()
+
+
+class VideoTab(DocumentTab):
+    def __init__(self, document, mmif, viz_id):
+        # VideoTab needs access to the MMIF object to get the VTT file
+        self.mmif = mmif
+        super().__init__(document, viz_id)
+
+    def render(self):
+        vid_path = url2posix(self.doc_symlink_rel_path)
+        html = StringIO()
+        html.write('<video id="vid" controls crossorigin="anonymous" >\n')
+        html.write(f'    <source src=\"{vid_path}\">\n')
+        for view in self.mmif.views:
+            if get_abstract_view_type(view, self.mmif) == "ASR":
+                vtt_path = get_vtt_file(view, self.viz_id)
+                rel_vtt_path = re.search(
+                    "mmif-viz-cache/.*", vtt_path).group(0)
+                html.write(
+                    f'    <track kind="captions" srclang="en" src="/{rel_vtt_path}" label="transcript" default/>\n')
+        html.write("</video>\n")
+        return html.getvalue()
+
+
+# -- Annotation Classes --
+
+class InfoTab(AnnotationTab):
+    def __init__(self, mmif):
+        self.id = "info"
+        self.tab_name = "Info"
+        super().__init__(mmif)
+
+    def render(self):
+        mmif = self.mmif
+        s = StringIO('Howdy')
+        s.write("<pre>")
+        for document in mmif.documents:
+            at_type = document.at_type.shortname
+            location = document.location
+            s.write("%s  %s\n" % (at_type, location))
+        s.write('\n')
+        for view in mmif.views:
+            app = view.metadata.app
+            status = get_status(view)
+            s.write('%s  %s  %s  %d\n' %
+                    (view.id, app, status, len(view.annotations)))
+            if len(view.annotations) > 0:
+                s.write('\n')
+                types = Counter([a.at_type.shortname
+                                for a in view.annotations])
+                for attype, count in types.items():
+                    s.write('    %4d %s\n' % (count, attype))
+            s.write('\n')
+        s.write("</pre>")
+        return s.getvalue()
+
+
+class AnnotationTableTab(AnnotationTab):
+    def __init__(self, mmif):
+        self.id = "annotations"
+        self.tab_name = "Annotations"
+        super().__init__(mmif)
+
+    def render(self):
+        mmif = self.mmif
+        s = StringIO('Howdy')
+        for view in mmif.views:
+            status = get_status(view)
+            s.write('<p><b>%s  %s</b>  %s  %d annotations</p>\n'
+                    % (view.id, view.metadata.app, status, len(view.annotations)))
+            s.write("<blockquote>\n")
+            s.write("<table cellspacing=0 cellpadding=5 border=1>\n")
+            def limit_len(str): return str[:500] + \
+                "  . . .  }" if len(str) > 500 else str
+            for annotation in view.annotations:
+                s.write('  <tr>\n')
+                s.write('    <td>%s</td>\n' % annotation.id)
+                s.write('    <td>%s</td>\n' % annotation.at_type.shortname)
+                s.write('    <td>%s</td>\n' %
+                        limit_len(get_properties(annotation)))
+                s.write('  </tr>\n')
+            s.write("</table>\n")
+            s.write("</blockquote>\n")
+        return s.getvalue()
+
+
+class JSTreeTab(AnnotationTab):
+    def __init__(self, mmif):
+        self.id = "tree"
+        self.tab_name = "Tree"
+        super().__init__(mmif)
+
+    def render(self):
+        mmif = self.mmif
+        return render_template('interactive.html', mmif=mmif, aligned_views=[])
+
+
+class NERTab(AnnotationTab):
+    def __init__(self, mmif, view):
+        super().__init__(mmif, view)
+
+    def render(self):
+        metadata = self.view.metadata.contains.get(Uri.NE)
+        ner_document = metadata.get('document')
+        return displacy.visualize_ner(self.mmif, self.view, ner_document, current_app.root_path)
+
+
+class VTTTab(AnnotationTab):
+    def __init__(self, mmif, view, viz_id):
+        self.viz_id = viz_id
+        super().__init__(mmif, view)
+
+    def render(self):
+        vtt_filename = get_vtt_file(self.view, self.viz_id)
+        with open(vtt_filename) as vtt_file:
+            vtt_content = vtt_file.read()
+        return f"<pre>{vtt_content}</pre>"
+
+
+class OCRTab(AnnotationTab):
+    def __init__(self, mmif, view, viz_id):
+        self.viz_id = viz_id
+        self.vid_path = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[
+            0].location_path()
+
+        super().__init__(mmif, view)
+
+    def render(self):
+        return render_template("pre-ocr.html", view_id=self.view.id, tabname=self.tab_name, mmif_id=self.viz_id)
+        # prepare_ocr(self.mmif, self.view, self.viz_id)
+        # return render_ocr_page(self.viz_id, self.vid_path, self.view.id, 0)
+
+
+def render_ocr_page(mmif_id, vid_path, view_id, page_number):
+    """
+    Renders a single OCR page by iterating through frames and displaying the 
+    contents/alignments. Note: this needs to be a separate function (not a method
+    in OCRTab) because it is called by the server when the page is changed.
+    """
+    # Path for storing temporary images generated by cv2
+    cv2_vid = cv2.VideoCapture(vid_path)
+    tn_data_fname = cache.get_cache_root() / mmif_id / f"{view_id}-pages.json"
+    thumbnail_pages = json.load(open(tn_data_fname))
+    page = thumbnail_pages[str(page_number)]
+    prev_frame_cap = None
+    path = make_image_directory(mmif_id, view_id)
+    for frame_num, frame in page:
+        # If index is range instead of frame...
+        if frame.get("range"):
+            frame_num = (int(frame["range"][0]) + int(frame["range"][1])) / 2
+        cv2_vid.set(1, frame_num)
+        _, frame_cap = cv2_vid.read()
+        if frame_cap is None:
+            raise FileNotFoundError(f"Video file {vid_path} not found!")
+
+        # Double check histogram similarity of "repeat" frames -- if they're significantly different, un-mark as repeat
+        if prev_frame_cap is not None and frame["repeat"] and not is_duplicate_image(prev_frame_cap, frame_cap,
+                                                                                     cv2_vid):
+            frame["repeat"] = False
+        with tempfile.NamedTemporaryFile(dir=str(path), suffix=".jpg", delete=False) as tf:
+            cv2.imwrite(tf.name, frame_cap)
+            # "id" is just the name of the temp image file
+            frame["id"] = pathlib.Path(tf.name).name
+        prev_frame_cap = frame_cap
+
+    tn_page_html = render_template(
+        'ocr.html', vid_path=vid_path, view_id=view_id, page=page,
+        n_pages=len(thumbnail_pages), page_number=str(page_number), mmif_id=mmif_id)
+    return tn_page_html
diff --git a/templates/ocr.html b/templates/ocr.html
index 7daea2b..9bf370f 100644
--- a/templates/ocr.html
+++ b/templates/ocr.html
@@ -1,7 +1,7 @@
 
 <div id="ocr_tab_{{view_id}}">
     {% for frame_num, frame in page %}
-        {% set filename = "/mmif-viz-cache/" + mmif_id + "/img/" + frame["id"] %}
+        {% set filename = "/mmif-viz-cache/" + mmif_id + "/img/" + view_id + "/" + frame["id"] %}
         {% set id = frame["id"] %}
         {% set boxes = frame["boxes"] %}
         {% set secs = frame["secs"] %}
@@ -142,7 +142,7 @@ <h4>
         if (data["page_number"] >= 0 && data["page_number"] < parseInt("{{n_pages}}")) {
             $.ajax({
             type:'POST',
-            url:'/ocrpage',
+            url:'/ocr',
             contentType: "application/json",
             data: JSON.stringify(data),
             success: function(res_html){
diff --git a/templates/player.html b/templates/player.html
index 3f56ea8..af62b47 100644
--- a/templates/player.html
+++ b/templates/player.html
@@ -117,27 +117,27 @@ <h1 class="title">Visualizing MMIF</h1>
         <ul class="nav nav-tabs">
           <!-- printing the first one out of the loop so it can be made the active link -->
           <li class="nav-item">
-            <a class="nav-link active" data-toggle="tab" href="#{{ docs[0][0] }}">{{ docs[0][0] }}</a>
+            <a class="nav-link active" data-toggle="tab" href="#{{ docs[0]['tab_name'] }}">{{ docs[0]['tab_name'] }}</a>
           </li>
           {% for medium in docs[1:] %}
-          <li class="nav-item {{medium[0]}}">
-            <a class="nav-link" data-toggle="tab" href="#{{ medium[0] }}">{{ medium[0] }}</a>
+          <li class="nav-item {{medium['tab_name']}}">
+            <a class="nav-link" data-toggle="tab" href="#{{ medium['tab_name'] }}">{{ medium['tab_name'] }}</a>
           </li>
           {%  endfor %}
         </ul>
 
         <!-- contents of the documents -->
         <div class="tab-content">
-          <div id="{{ docs[0][0] }}" class="tab-pane fade show active">
+          <div id="{{ docs[0]['tab_name'] }}" class="tab-pane fade show active">
             <br/>
-            <p>{{ docs[0][2] }}</p>
-            {{ docs[0][3] | safe }}
+            <!-- <p>{{ docs[0][2] }}</p> -->
+            {{ docs[0]['html'] | safe }}
           </div>
           {% for medium in docs[1:] %}
-          <div id="{{ medium[0] }}" class="tab-pane fade">
+          <div id="{{ medium['tab_name'] }}" class="tab-pane fade">
             <br/>
-            <p>{{ medium[2] }}</p>
-            {{ medium[3] | safe }}
+            <!-- <p>{{ medium[2] }}</p> -->
+            {{ medium['html'] | safe }}
           </div>
           {% endfor %}
         </div>
@@ -148,8 +148,8 @@ <h1 class="title">Visualizing MMIF</h1>
         <!-- navigation tabs for the visualizations (WebVTT, Entities, etcetera) -->
         <ul class="nav nav-tabs">
           {% for annotation in annotations %}
-          <li class="nav-item {{ annotation[0] }}">
-            <a class="nav-link" data-toggle="tab" href="#{{ annotation[0] }}">{{ annotation[0] }}</a>
+          <li class="nav-item {{ annotation['tab_name'] }}">
+            <a class="nav-link" data-toggle="tab" href="#{{ annotation['tab_name'] }}">{{ annotation['tab_name'] }}</a>
           </li>
           {%  endfor %}
         </ul>
@@ -157,9 +157,9 @@ <h1 class="title">Visualizing MMIF</h1>
         <!-- visualization content -->
         <div class="tab-content">
           {% for annotation in annotations %}
-          <div id="{{ annotation[0] }}" class="tab-pane fade">
+          <div id="{{ annotation['tab_name'] }}" class="tab-pane fade">
             <br/>
-            {{ annotation[1] | safe }}
+            {{ annotation['html'] | safe }}
           </div>
           {% endfor %}
         </div>
diff --git a/templates/pre-ocr.html b/templates/pre-ocr.html
index eba08d3..c2bf537 100644
--- a/templates/pre-ocr.html
+++ b/templates/pre-ocr.html
@@ -1,3 +1,5 @@
+<!-- TODO: This behavior should be standardized across pages/tabs -->
+
 <div class="loader-container">
     <div class="loader"></div>
 </div>
diff --git a/templates/tab-placeholder.html b/templates/tab-placeholder.html
new file mode 100644
index 0000000..c2bf537
--- /dev/null
+++ b/templates/tab-placeholder.html
@@ -0,0 +1,33 @@
+<!-- TODO: This behavior should be standardized across pages/tabs -->
+
+<div class="loader-container">
+    <div class="loader"></div>
+</div>
+
+<script>
+    loaded_ocr_tabs = [];
+
+    // Lazy load OCR element (for very large files)
+    $(".nav-item.{{tabname}}").click(function() {
+        if (loaded_ocr_tabs.includes("{{tabname}}"))
+            return
+        loaded_ocr_tabs += "{{tabname}}";
+        var data = {
+            "view_id": "{{view_id}}",
+            "mmif_id": "{{mmif_id}}"
+        }
+        $.ajax({
+            type:'POST',
+            url:'/ocr',
+            contentType: "application/json",
+            data: JSON.stringify(data),
+            success: function(res_html){
+              $('#{{tabname}}').html(res_html);
+            },
+            error: function(error_msg){
+                $('#{{tabname}}').html(error_msg);
+            }
+        })
+
+})
+</script>
\ No newline at end of file
diff --git a/utils.py b/utils.py
index ce2df4c..5f44679 100644
--- a/utils.py
+++ b/utils.py
@@ -1,374 +1,17 @@
-from collections import Counter
-from datetime import timedelta
-from io import StringIO
-
-from flask import Flask, url_for
-from lapps.discriminators import Uri
-from mmif.serialize.annotation import Text, Document
-from mmif.utils.timeunit_helper import UNIT_NORMALIZATION
-
-import displacy
-import iiif_utils
-from ocr import *
-
-# Get Properties from MMIF file ---
-
-# these two static folder-related params are important, do not remove
-app = Flask(__name__, static_folder='static', static_url_path='')
-app.secret_key = 'your_secret_key_here'
-
-
-def normalize_timeunit(tu_str):
-    if tu_str in UNIT_NORMALIZATION:
-        return UNIT_NORMALIZATION[tu_str]
-    else:
-        return tu_str
-
-
-def asr_alignments_to_vtt(alignment_view, viz_id):
-    vtt_filename = cache.get_cache_root() / viz_id / f"{alignment_view.id.replace(':', '-')}.vtt" 
-    if vtt_filename.exists():
-        return str(vtt_filename)
-    vtt_file = open(vtt_filename, 'w')
-    vtt_file.write("WEBVTT\n\n")
-    annotations = alignment_view.annotations
-    timeframe_at_type = [at_type for at_type in alignment_view.metadata.contains if at_type.shortname == "TimeFrame"][0]
-    timeunit = normalize_timeunit(alignment_view.metadata.contains[timeframe_at_type]["timeUnit"])
-    # make plural so that this key can be used in timedelta init
-    if timeunit[-1] != 's':
-        timeunit += 's'
-    # TODO: wanted to use "mmif.get_alignments(AnnotationTypes.TimeFrame, Uri.TOKEN)"
-    # but that gave errors so I gave up on it
-    token_idx = {a.id: a for a in annotations if a.at_type.shortname == "Token"}
-    timeframe_idx = {a.id: a for a in annotations if a.at_type.shortname == "TimeFrame"}
-    alignments = [a for a in annotations if a.at_type.shortname == "Alignment"]
-    vtt_start = None
-    texts = []
-    for alignment in alignments:
-        start_end_text = build_alignment(alignment, token_idx, timeframe_idx)
-        if start_end_text is not None:
-            # VTT specifically requires timestamps expressed in miliseconds and
-            # must be be in one of these formats: mm:ss.ttt or hh:mm:ss.ttt
-            # (https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API)
-            # ISO format can have up to 6 below the decimal point, on the other hand
-            # Assuming here that start and end are in miliseconds
-            start, end, text = start_end_text
-            start_kwarg, end_kwarg = {timeunit: float(start)}, {timeunit: float(end)}
-            start, end = timedelta(**start_kwarg), timedelta(**end_kwarg)
-            s_mins, s_secs = divmod(start.seconds, 60)
-            e_mins, e_secs = divmod(end.seconds, 60)
-            if not vtt_start:
-                vtt_start = f'{s_mins:02d}:{s_secs:02d}.{((s_secs - int(s_secs)) * 1000):03d}'
-            texts.append(text)
-            if len(texts) > 8:
-                vtt_end = f'{e_mins:02d}:{e_secs:02d}.{((e_secs - int(e_secs)) * 1000):03d}'
-                vtt_file.write(f'{vtt_start} --> {vtt_end}\n{" ".join(texts)}\n\n')
-                vtt_start = None
-                texts = []
-    return vtt_file.name
-
-
-def build_alignment(alignment, token_idx, timeframe_idx):
-    target = alignment.get('target')
-    source = alignment.get('source')
-    timeframe = timeframe_idx.get(source)
-    token = token_idx.get(target)
-    if timeframe and token:
-        start = timeframe.get('start')
-        end = timeframe.get('end')
-        for text_key in ['text', 'word']:
-            if text_key in token:
-                text = token.get(text_key)
-                return start, end, text
-
-
-def get_src_media_symlink_basename(doc: Document):
-    doc_path = doc.location_path()
-    return f"{doc.id}.{doc_path.split('.')[-1]}"
-
-
-def get_symlink_relurl(viz_id, symlink_fname):
-    static_folder = pathlib.Path(app.static_folder)
-    symlink_path = pathlib.Path(cache._CACHE_DIR_SUFFIX) / viz_id / symlink_fname
-    return static_folder / symlink_path
-
-
-def symlink_to_static(viz_id, original_path, symlink_fname) -> str:
-    static_folder = pathlib.Path(app.static_folder)
-    symlink_path = pathlib.Path(cache._CACHE_DIR_SUFFIX) / viz_id / symlink_fname
-    app.logger.debug(f"Symlinking {original_path} to {symlink_path}")
-    try:
-        os.symlink(original_path, static_folder / symlink_path)
-    except Exception as e:
-        app.logger.error(f"SOME ERROR when symlinking: {str(e)}")
-    app.logger.debug(f"{original_path} is symlinked to {symlink_path}")
-    symlink_rel_path = url_for('static', filename=symlink_path)
-    app.logger.debug(f"and exposable as {symlink_rel_path}")
-    return symlink_rel_path
-
-
-def documents_to_htmls(mmif, viz_id):
-    """
-    Returns a list of tuples, one for each element in the documents list of
-    the MMIF object, following the order in that list. Each tuple has four
-    elements: document type, document identifier, document path and the HTML
-    visualization.
-    """
-    htmlized = []
-    for document in mmif.documents:
-        doc_path = document.location_path()
-        app.logger.debug(f"MMIF on AV asset: {doc_path}")
-        linked = symlink_to_static(viz_id, doc_path, get_src_media_symlink_basename(document))
-        if document.at_type == DocumentTypes.TextDocument:
-            html = html_text(linked)
-        elif document.at_type == DocumentTypes.VideoDocument:
-            fa_views = get_alignment_views(mmif)
-            fa_view = fa_views[0] if fa_views else None
-            html = html_video(viz_id, linked, fa_view)
-        elif document.at_type == DocumentTypes.AudioDocument:
-            html = html_audio(linked)
-        elif document.at_type == DocumentTypes.ImageDocument:
-            boxes = get_boxes(mmif)
-            html = html_img(linked, boxes)
-        htmlized.append((document.at_type.shortname, document.id, doc_path, html))
-    manifest_filename = iiif_utils.generate_iiif_manifest(mmif, viz_id)
-    app.logger.debug(f"Generated IIIF manifest: {manifest_filename}")
-    man = os.path.basename(manifest_filename)
-    app.logger.debug(f"Manifest filename: {man}")
-    symlink_to_static(viz_id, manifest_filename, man)
-    app.logger.debug(f"Symlinked IIIF manifest: {None}")
-    temp = render_template("uv_player.html", manifest=man, mmif_id=viz_id)
-    # TODO (krim @ 2024-03-12): Turning off IIIF added to the HTML page since
-    # 1. current IIIF manifest conversion is based on old version of manifest API, and quite brittle
-    # 2. the conversion code at the moment can only convert TimeFrame annotation to "jump-able" IIIF canvases, 
-    # but the case is already covered by `Thumbnails` tab (look for usage of `pre-ocr.html` template)
-    # htmlized.append(('UV', "", "", temp))
-    return htmlized
-
-
-def get_boxes(mmif):
-    # TODO: this gives you the last view with BoundingBoxes, should
-    # perhaps use get_views_contain() instead, should also select just
-    # the bounding boxes and add information from alignments to text
-    # documents.
-    tbox_view = mmif.get_view_contains(str(AnnotationTypes.BoundingBox))
-    tbox_annotations = tbox_view.annotations
-    # For the boxes we pull some information from the annotation: the
-    # identifier, boxType and the (x,y,w,h) coordinates used by the
-    # Javascript code that draws the rectangle.
-    boxes = []
-    for a in tbox_annotations:
-        coordinates = a.get("coordinates")
-        x = coordinates[0][0]
-        y = coordinates[0][1]
-        w = coordinates[1][0] - x
-        h = coordinates[2][1] - y
-        box = [a.get("id"), a.get("boxType"), [x, y, w, h]]
-        boxes.append(box)
-    return boxes
-
-
-def prep_annotations(mmif, viz_id):
-    """Prepare annotations from the views, and return a list of pairs of tabname
-    and tab content. The first tab is alway the full MMIF pretty print."""
-    tabs = []
-    tabs.append(("Info", "<pre>" + create_info(mmif) + "</pre>"))
-    app.logger.debug(f"Prepared INFO Tab: {tabs[-1][0]}")
-    # tabs.append(("MMIF", "<pre>" + mmif.serialize(pretty=True) + "</pre>"))
-    # app.logger.debug(f"Prepared RAW Tab: {tabs[-1][0]}")
-    tabs.append(("Annotations", create_annotation_tables(mmif)))
-    app.logger.debug(f"Prepared SUMMARY Tab: {tabs[-1][0]}")
-    tabs.append(("Tree", render_interactive_mmif(mmif)))
-    app.logger.debug(f"Prepared JSTREE Tab: {tabs[-1][0]}")
-    # TODO: since this uses the same tab-name this will only show the same
-    # stuff; it does a loop but for now we assume there is just one file with
-    # alignments (generated by Kaldi)
-    for fa_view in get_alignment_views(mmif):
-        vtt_file = asr_alignments_to_vtt(fa_view, viz_id)
-        tabs.append(("WebVTT", '<pre>' + open(vtt_file).read() + '</pre>'))
-        app.logger.debug(f"Prepared a VTT Tab: {tabs[-1][0]}")
-    ner_views = get_ner_views(mmif)
-    use_id = True if len(ner_views) > 1 else False
-    for ner_view in ner_views:
-        if not ner_view.annotations:
-            continue
-        visualization = create_ner_visualization(mmif, ner_view)
-        tabname = "Entities-%s" % ner_view.id if use_id else "Entities"
-        tabs.append((tabname, visualization))
-        app.logger.debug(f"Prepared a displaCy Tab: {tabs[-1][0]}")
-    # TODO: somewhat hackish
-    ocr_views = get_ocr_views(mmif)
-    use_id = True if len(ocr_views) > 1 else False
-    for ocr_view in ocr_views:
-        if not ocr_view.annotations:
-            continue
-        tabname = "Thumbnails-%s" % ocr_view.id
-        visualization = render_template("pre-ocr.html", view_id=ocr_view.id, tabname=tabname, mmif_id=viz_id)
-        tabs.append((tabname, visualization))
-        app.logger.debug(f"Prepared a Thumbnails Tab: {tabs[-1][0]}")
-    return tabs
-
-
-def create_info(mmif):
-    s = StringIO('Howdy')
-    for document in mmif.documents:
-        at_type = document.at_type.shortname
-        location = document.location
-        s.write("%s  %s\n" % (at_type, location))
-    s.write('\n')
-    for view in mmif.views:
-        app = view.metadata.app
-        status = get_status(view)
-        s.write('%s  %s  %s  %d\n' % (view.id, app, status, len(view.annotations)))
-        if len(view.annotations) > 0:
-            s.write('\n')
-            types = Counter([a.at_type.shortname
-                             for a in view.annotations])
-            for attype, count in types.items():
-                s.write('    %4d %s\n' % (count, attype))
-        s.write('\n')
-    return s.getvalue()
-
-
-def create_annotation_tables(mmif):
-    s = StringIO('Howdy')
-    for view in mmif.views:
-        status = get_status(view)
-        s.write('<p><b>%s  %s</b>  %s  %d annotations</p>\n'
-                % (view.id, view.metadata.app, status, len(view.annotations)))
-        s.write("<blockquote>\n")
-        s.write("<table cellspacing=0 cellpadding=5 border=1>\n")
-        limit_len = lambda str: str[:500] + "  . . .  }" if len(str) > 500 else str
-        for annotation in view.annotations:
-            s.write('  <tr>\n')
-            s.write('    <td>%s</td>\n' % annotation.id)
-            s.write('    <td>%s</td>\n' % annotation.at_type.shortname)
-            s.write('    <td>%s</td>\n' % limit_len(get_properties(annotation)))
-            s.write('  </tr>\n')
-        s.write("</table>\n")
-        s.write("</blockquote>\n")
-    return s.getvalue()
-
-
-def get_document_ids(view, annotation_type):
-    metadata = view.metadata.contains.get(annotation_type)
-    ids = set([metadata['document']]) if 'document' in metadata else set()
-    for annotation in view.annotations:
-        if annotation.at_type.shortname == str(annotation_type):
-            try:
-                ids.add(annotation.get("document"))
-            except KeyError:
-                pass
-    return list(ids)
-
-
-def get_alignment_views(mmif):
-    """Return alignment views which have at least TextDocument, Token, TimeFrame and
-    Alignment annotations."""
-    views = []
-    needed_types = set(['TextDocument', 'Token', 'TimeFrame', 'Alignment'])
-    for view in mmif.views:
-        annotation_types = view.metadata.contains.keys()
-        annotation_types = [at.shortname for at in annotation_types]
-        if needed_types.issubset(annotation_types):
-            views.append(view)
-    return views
-
-
-# Render documents as HTML ------------
-
-def html_video(viz_id, vpath, vtt_srcview=None):
-    vpath = url2posix(vpath)
-    html = StringIO()
-    html.write('<video id="vid" controls crossorigin="anonymous" >\n')
-    html.write(f'    <source src=\"{vpath}\">\n')
-    if vtt_srcview is not None:
-        vtt_path = asr_alignments_to_vtt(vtt_srcview, viz_id)
-        rel_vtt_path = str(vtt_path)[len(app.static_folder):]
-        app.logger.debug(f"VTT path: {vtt_path}")
-        html.write(f'    <track kind="captions" srclang="en" src="{rel_vtt_path}" label="transcript" default/>\n')
-    html.write("</video>\n")
-    return html.getvalue()
-
-
-def html_text(tpath):
-    """Return the content of the text document, but with some HTML tags added."""
-    if not os.path.isfile(tpath):
-        raise FileNotFoundError(f"File not found: {tpath}")
-    with open(tpath) as t_file:
-        content = t_file.read().replace("\n", "<br/>\n")
-        return f"{content}\n"
-
-
-def html_img(ipath, boxes=None, id="imgCanvas"):
-    ipath = url2posix(ipath)
-    boxes = [] if boxes is None else boxes
-    return render_template('image.html', filename=ipath, boxes=boxes, id=id)
-
-
-def html_audio(apath):
-    apath = url2posix(apath)
-    return f"<audio controls src={apath}></audio>"
+from mmif.serialize.annotation import Text
+from flask import current_app
+import cache
 
 
 def url2posix(path):
     """For the visualizer we often want a POSIX path and not a URL so we strip off
     the protocol if there is one."""
-    if path.startswith('file:///'):
+    if str(path).startswith('file:///'):
         path = path[7:]
     return path
 
 
-# Interactive MMIF Tab -----------
-
-def render_interactive_mmif(mmif):
-    return render_template('interactive.html', mmif=mmif, aligned_views=get_aligned_views(mmif))
-
-
-# Functions for checking if view can be rendered with alignment highlighting
-def get_aligned_views(mmif):
-    """Return list of properly aligned views (for tree display)"""
-    aligned_views = []
-    for view in mmif.views:
-        if any([at_type.shortname == "Alignment" for at_type in view.metadata.contains]):
-            if check_view_alignment(view.annotations) == True:
-                aligned_views.append(view.id)
-    return aligned_views
-
-
-def check_view_alignment(annotations):
-    anno_stack = []
-    for annotation in annotations:
-        if annotation.at_type.shortname == "Alignment":
-            anno_stack.insert(0, annotation.properties)
-        else:
-            anno_stack.append(annotation.id)
-        if len(anno_stack) == 3:
-            if type(anno_stack[0]) == str or not (
-                    anno_stack[0]["source"] in anno_stack and anno_stack[0]["target"] in anno_stack):
-                return False
-            anno_stack = []
-    return True
-
-
-# NER Tools ----------------------
-
-def get_ner_views(mmif):
-    return [v for v in mmif.views if Uri.NE in v.metadata.contains]
-
-
-def create_ner_visualization(mmif, view):
-    metadata = view.metadata.contains.get(Uri.NE)
-    try:
-        # all the view's named entities refer to the same text document (kaldi)
-        document_ids = get_document_ids(view, Uri.NE)
-        return displacy.visualize_ner(mmif, view, document_ids[0], app.root_path)
-    except KeyError as e:
-        # the view's entities refer to more than one text document (tessearct)
-        pass
-
-
-def get_status(view):
+def get_status(view): 
     return 'ERROR' if 'message' in view.metadata.error else 'OKAY'
 
 
@@ -384,19 +27,95 @@ def get_properties(annotation):
     return '{ %s }' % ', '.join(props_list)
 
 
-# OCR Tools ----------------------
+def get_abstract_view_type(view, mmif):
+    annotation_types = [a.shortname for a in view.metadata.contains.keys()]
+    if "NamedEntity" in annotation_types:
+        return "NER"
+    elif all([anno_type in annotation_types for anno_type in ["Token", "TimeFrame", "Alignment"]]):
+        return "ASR"
+    ocr_apps = ["swt-detection", "doctr-wrapper", "pyscenedetect-wrapper", "easyocr-wrapper",
+                "slatedetection", "fewshotclassifier", "barsdetection", "east-textdetection",
+                "parseqocr-wrapper", "tesseractocr-wrapper", "chyron-detection", "paddleocr-wrapper"]
+    if any([app in view.metadata.app for app in ocr_apps]):
+        return "OCR"
+    # Define an OCR view as one that refers to a video and doesn't contain Sentences
+    # or Tokens
+    # else:
+    #     for configuration in view.metadata.contains.values():
+    #         if "document" in configuration \
+    #                 and mmif.get_document_by_id(configuration["document"]).at_type.shortname == "VideoDocument":
+    #             if not any([anno_type in annotation_types for anno_type in ["Sentence", "Token"]]):
+    #                 return "OCR"
+                
+                
+def get_vtt_file(view, viz_id):
+    vtt_filename = cache.get_cache_root() / viz_id / \
+        f"{view.id.replace(':', '-')}.vtt"
+    if not vtt_filename.exists():
+        with open(vtt_filename, 'w') as vtt_file:
+            vtt_file.write(write_vtt(view, viz_id))
+    return str(vtt_filename)
+
+
+def write_vtt(view, viz_id):
+    vtt = "WEBVTT\n\n"
+    timeunit = "milliseconds"
+    for a in view.metadata.contains.values():
+        if "timeUnit" in a:
+            timeunit = a["timeUnit"]
+            break
+    token_idx = {
+        a.id: a for a in view.annotations if a.at_type.shortname == "Token"}
+    timeframe_idx = {
+        a.id: a for a in view.annotations if a.at_type.shortname == "TimeFrame"}
+    alignments = [
+        a for a in view.annotations if a.at_type.shortname == "Alignment"]
+    vtt_start = None
+    texts = []
+    for alignment in alignments:
+        start_end_text = build_alignment(alignment, token_idx, timeframe_idx)
+        if start_end_text is None:
+            continue
+        start, end, text = start_end_text
+        if not vtt_start:
+            vtt_start = format_time(start, timeunit)
+        texts.append(text)
+        if len(texts) > 8:
+            vtt_end = format_time(end, timeunit)
+            vtt += f"{vtt_start} --> {vtt_end}\n{' '.join(texts)}\n\n"
+            vtt_start = None
+            texts = []
+    return vtt
 
-def prepare_ocr_visualization(mmif, view, mmif_id):
-    """ Visualize OCR by extracting image frames with BoundingBoxes from video"""
-    # frames, text_docs, alignments = {}, {}, {}
-    vid_path = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[0].location_path()
 
-    ocr_frames = get_ocr_frames(view, mmif)
+def build_alignment(alignment, token_idx, timeframe_idx):
+    target = alignment.properties['target']
+    source = alignment.properties['source']
+    timeframe = timeframe_idx.get(source)
+    token = token_idx.get(target)
+    if timeframe and token:
+        start = timeframe.properties['start']
+        end = timeframe.properties['end']
+        text = token.properties['word']
+        return start, end, text
 
-    # Generate pages (necessary to reduce IO cost) and render
-    frames_list = [(k, vars(v)) for k, v in ocr_frames.items()]
-    frames_list = find_duplicates(frames_list)
-    frames_pages = paginate(frames_list)
-    # Save page list as temp file
-    save_json(frames_pages, view.id, mmif_id)
-    return render_ocr(mmif_id, vid_path, view.id, 0)
+
+def format_time(time, unit):
+    """
+    Formats a time in seconds as a string in the format "hh:mm:ss.fff"
+    VTT specifically requires timestamps expressed in miliseconds and
+    must be be in one of these formats: mm:ss.ttt or hh:mm:ss.ttt
+    (https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API)
+    ISO format can have up to 6 below the decimal point, on the other hand
+    """
+    if unit == "seconds":
+        time_in_ms = int(time * 1000)
+    else:
+        time_in_ms = int(time)
+    hours = time_in_ms // (1000 * 60 * 60)
+    time_in_ms %= (1000 * 60 * 60)
+    minutes = time_in_ms // (1000 * 60)
+    time_in_ms %= (1000 * 60)
+    seconds = time_in_ms // 1000
+    time_in_ms %= 1000
+    return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{time_in_ms:03d}"