From 24e9e610e5cedd59520bfee4b916bfc8551234bf Mon Sep 17 00:00:00 2001 From: Hayden McCormick Date: Mon, 1 Apr 2024 22:21:17 -0400 Subject: [PATCH 01/11] Mostly implemented refactor with new code structure, minus OCR functionality --- app.py | 20 +++--- helpers.py | 105 +++++++++++++++++++++++++++++ ocr.py | 1 - render.py | 153 ++++++++++++++++++++++++++++++++++++++++++ start_visualizer.sh | 2 +- templates/player.html | 26 +++---- utils.py | 2 - 7 files changed, 284 insertions(+), 25 deletions(-) create mode 100644 helpers.py create mode 100644 render.py diff --git a/app.py b/app.py index b78e5e4..b4fae53 100644 --- a/app.py +++ b/app.py @@ -4,13 +4,18 @@ import sys from threading import Thread -from flask import request, render_template, flash, send_from_directory, redirect +from flask import Flask, request, render_template, flash, send_from_directory, redirect from mmif.serialize import Mmif import cache from cache import set_last_access, cleanup -from utils import app, render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization +from utils import render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization import traceback +from render import render_documents, render_annotations + +# these two static folder-related params are important, do not remove +app = Flask(__name__, static_folder='static', static_url_path='') +app.secret_key = 'your_secret_key_here' @app.route('/') @@ -103,13 +108,12 @@ def send_js(path): def render_mmif(mmif_str, viz_id): mmif = Mmif(mmif_str) - htmlized_docs = documents_to_htmls(mmif, viz_id) - app.logger.debug(f"Prepared document: {[d[0] for d in htmlized_docs]}") - annotations = prep_annotations(mmif, viz_id) - app.logger.debug(f"Prepared Annotations: {[annotation[0] for annotation in annotations]}") + rendered_documents = render_documents(mmif, viz_id) + rendered_annotations = render_annotations(mmif, viz_id) return render_template('player.html', - docs=htmlized_docs, viz_id=viz_id, annotations=annotations) - + docs=rendered_documents, + viz_id=viz_id, + annotations=rendered_annotations) def upload_file(in_mmif): # Save file locally diff --git a/helpers.py b/helpers.py new file mode 100644 index 0000000..be0141a --- /dev/null +++ b/helpers.py @@ -0,0 +1,105 @@ +from mmif.serialize.annotation import Text +from flask import current_app +import cache + +def url2posix(path): + """For the visualizer we often want a POSIX path and not a URL so we strip off + the protocol if there is one.""" + if path.startswith('file:///'): + path = path[7:] + return path + + +def get_doc_path(document): + doc_path = document.location_path() + return doc_path + # app.logger.debug(f"MMIF on AV asset: {doc_path}") + # doc_symlink_path = pathlib.Path(app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{doc_path.split('.')[-1]}") + # os.symlink(doc_path, doc_symlink_path) + # app.logger.debug(f"{doc_path} is symlinked to {doc_symlink_path}") + # doc_symlink_rel_path = '/' + doc_symlink_path.relative_to(app.static_folder).as_posix() + # app.logger.debug(f"and {doc_symlink_rel_path} will be used in HTML src attribute") + + +def get_status(view): + return 'ERROR' if 'message' in view.metadata.error else 'OKAY' + + +def get_properties(annotation): + props = annotation.properties._serialize() + props.pop('id') + props_list = [] + for prop in sorted(props): + val = props[prop] + if type(val) == Text: + val = val.value + props_list.append("%s=%s" % (prop, val)) + return '{ %s }' % ', '.join(props_list) + + +def get_abstract_view_type(view): + annotation_types = [a.shortname for a in view.metadata.contains.keys()] + if "NamedEntity" in annotation_types: + return "NER" + elif all([anno_type in annotation_types for anno_type in ["Token", "TimeFrame", "Alignment"]]): + return "ASR" + + +def get_vtt_file(view, viz_id): + vtt_filename = cache.get_cache_root() / viz_id / f"{view.id.replace(':', '-')}.vtt" + if not vtt_filename.exists(): + with open(vtt_filename, 'w') as vtt_file: + vtt_file.write(write_vtt(view, viz_id)) + return str(vtt_filename) + + +def write_vtt(view, viz_id): + vtt = "WEBVTT\n\n" + token_idx = {a.id: a for a in view.annotations if a.at_type.shortname == "Token"} + timeframe_idx = {a.id: a for a in view.annotations if a.at_type.shortname == "TimeFrame"} + alignments = [a for a in view.annotations if a.at_type.shortname == "Alignment"] + vtt_start = None + texts = [] + for alignment in alignments: + start_end_text = build_alignment(alignment, token_idx, timeframe_idx) + if start_end_text is None: + continue + start, end, text = start_end_text + if not vtt_start: + vtt_start = format_time(start) + texts.append(text) + if len(texts) > 8: + vtt_end = format_time(end) + vtt += f"{vtt_start} --> {vtt_end}\n{' '.join(texts)}\n\n" + vtt_start = None + texts = [] + return vtt + + +def build_alignment(alignment, token_idx, timeframe_idx): + target = alignment.properties['target'] + source = alignment.properties['source'] + timeframe = timeframe_idx.get(source) + token = token_idx.get(target) + if timeframe and token: + start = timeframe.properties['start'] + end = timeframe.properties['end'] + text = token.properties['word'] + return start, end, text + + +def format_time(time_in_ms): + """ + Formats a time in seconds as a string in the format "hh:mm:ss.fff" + VTT specifically requires timestamps expressed in miliseconds and + must be be in one of these formats: mm:ss.ttt or hh:mm:ss.ttt + (https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API) + ISO format can have up to 6 below the decimal point, on the other hand + """ + hours = time_in_ms // (1000 * 60 * 60) + time_in_ms %= (1000 * 60 * 60) + minutes = time_in_ms // (1000 * 60) + time_in_ms %= (1000 * 60) + seconds = time_in_ms // 1000 + time_in_ms %= 1000 + return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{time_in_ms:03d}" \ No newline at end of file diff --git a/ocr.py b/ocr.py index 91d5c54..3862731 100644 --- a/ocr.py +++ b/ocr.py @@ -164,7 +164,6 @@ def get_ocr_frames(view, mmif): frames[i].update(annotation, mmif) else: frames[i] = frame - print(frames) return frames diff --git a/render.py b/render.py new file mode 100644 index 0000000..98fe6a4 --- /dev/null +++ b/render.py @@ -0,0 +1,153 @@ +import os +import pathlib +import shutil +import tempfile +import threading +import time +from io import StringIO +from collections import Counter +from flask import render_template, current_app + +from mmif import DocumentTypes +from mmif.serialize.annotation import Text +from mmif.vocabulary import AnnotationTypes +from lapps.discriminators import Uri +import displacy + +from helpers import * + +import cache + +""" +Methods to render MMIF documents and their annotations in various formats. +""" + +# -- Documents -- + +def render_documents(mmif, viz_id): + """ + Returns HTML Tab representation of all documents in the MMIF object. + """ + tabs = [] + for document in mmif.documents: + doc_path = get_doc_path(document) + if document.at_type == DocumentTypes.TextDocument: + html_tab = render_text(doc_path) + elif document.at_type == DocumentTypes.ImageDocument: + html_tab = render_image(doc_path) + elif document.at_type == DocumentTypes.AudioDocument: + html_tab = render_audio(doc_path) + elif document.at_type == DocumentTypes.VideoDocument: + html_tab = render_video(doc_path, mmif, viz_id) + + tabs.append({"id": document.id, + "tab_name": document.at_type.shortname, + "html": html_tab}) + return tabs + +def render_text(text_path): + """Return the content of the text document, but with some HTML tags added.""" + if not os.path.isfile(text_path): + raise FileNotFoundError(f"File not found: {text_path}") + with open(text_path) as t_file: + content = t_file.read().replace("\n", "
\n") + return f"{content}\n" + +def render_image(img_path): + return "" + +def render_audio(audio_path): + return "" + +def render_video(vid_path, mmif, viz_id): + vid_path = url2posix(vid_path) + html = StringIO() + html.write('\n") + return html.getvalue() + +# -- Annotations -- + +def render_annotations(mmif, viz_id): + """ + Returns HTML Tab representation of all annotations in the MMIF object. + """ + tabs = [] + # These tabs should always be present + tabs.append({"id": "info", "tab_name": "Info", "html": render_info(mmif)}) + tabs.append({"id": "annotations", "tab_name": "Annotations", "html": render_annotation_table(mmif)}) + tabs.append({"id": "tree", "tab_name": "Tree", "html": render_jstree(mmif)}) + # These tabs are optional + for view in mmif.views: + abstract_view_type = get_abstract_view_type(view) + app_shortname = view.metadata.app.split("/")[-2] + if abstract_view_type == "NER": + tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_ner(mmif, view)}) + elif abstract_view_type == "ASR": + tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_asr_vtt(view, viz_id)}) + return tabs + +def render_info(mmif): + s = StringIO('Howdy') + s.write("
")
+    for document in mmif.documents:
+        at_type = document.at_type.shortname
+        location = document.location
+        s.write("%s  %s\n" % (at_type, location))
+    s.write('\n')
+    for view in mmif.views:
+        app = view.metadata.app
+        status = get_status(view)
+        s.write('%s  %s  %s  %d\n' % (view.id, app, status, len(view.annotations)))
+        if len(view.annotations) > 0:
+            s.write('\n')
+            types = Counter([a.at_type.shortname
+                             for a in view.annotations])
+            for attype, count in types.items():
+                s.write('    %4d %s\n' % (count, attype))
+        s.write('\n')
+    s.write("
") + return s.getvalue() + + +def render_annotation_table(mmif): + s = StringIO('Howdy') + for view in mmif.views: + status = get_status(view) + s.write('

%s %s %s %d annotations

\n' + % (view.id, view.metadata.app, status, len(view.annotations))) + s.write("
\n") + s.write("\n") + limit_len = lambda str: str[:500] + " . . . }" if len(str) > 500 else str + for annotation in view.annotations: + s.write(' \n') + s.write(' \n' % annotation.id) + s.write(' \n' % annotation.at_type.shortname) + s.write(' \n' % limit_len(get_properties(annotation))) + s.write(' \n') + s.write("
%s%s%s
\n") + s.write("
\n") + return s.getvalue() + +def render_jstree(mmif): + return render_template('interactive.html', mmif=mmif, aligned_views=[]) + +def render_asr_vtt(view, viz_id): + vtt_filename = get_vtt_file(view, viz_id) + with open(vtt_filename) as vtt_file: + vtt_content = vtt_file.read() + return f"
{vtt_content}
" + +def render_ner(mmif, view): + metadata = view.metadata.contains.get(Uri.NE) + ner_document = metadata.get('document') + return displacy.visualize_ner(mmif, view, ner_document, current_app.root_path) + +def render_ocr(): + pass \ No newline at end of file diff --git a/start_visualizer.sh b/start_visualizer.sh index 6802896..200531e 100755 --- a/start_visualizer.sh +++ b/start_visualizer.sh @@ -27,5 +27,5 @@ else fi # Start visualizer $container_engine build . -f Containerfile -t clams-mmif-visualizer -$container_engine run -d --name clams-mmif-visualizer --rm -p 5001:5000 -e PYTHONUNBUFFERED=1 -v $datadir:$mountdir -v $datadir:/app/static/$mountdir clams-mmif-visualizer +$container_engine run --name clams-mmif-visualizer --rm -p 5001:5000 -e PYTHONUNBUFFERED=1 -v $datadir:$mountdir -v $datadir:/app/static/$mountdir clams-mmif-visualizer echo "MMIF Visualizer is running in the background and can be accessed at http://localhost:5001/. To shut it down, run '$container_engine kill clams-mmif-visualizer'" \ No newline at end of file diff --git a/templates/player.html b/templates/player.html index 3f56ea8..af62b47 100644 --- a/templates/player.html +++ b/templates/player.html @@ -117,27 +117,27 @@

Visualizing MMIF

-
+

-

{{ docs[0][2] }}

- {{ docs[0][3] | safe }} + + {{ docs[0]['html'] | safe }}
{% for medium in docs[1:] %} -
+

-

{{ medium[2] }}

- {{ medium[3] | safe }} + + {{ medium['html'] | safe }}
{% endfor %}
@@ -148,8 +148,8 @@

Visualizing MMIF

@@ -157,9 +157,9 @@

Visualizing MMIF

{% for annotation in annotations %} -
+

- {{ annotation[1] | safe }} + {{ annotation['html'] | safe }}
{% endfor %}
diff --git a/utils.py b/utils.py index d6151ec..2c79dcb 100644 --- a/utils.py +++ b/utils.py @@ -135,8 +135,6 @@ def prep_annotations(mmif, viz_id): tabs = [] tabs.append(("Info", "
" + create_info(mmif) + "
")) app.logger.debug(f"Prepared INFO Tab: {tabs[-1][0]}") - # tabs.append(("MMIF", "
" + mmif.serialize(pretty=True) + "
")) - # app.logger.debug(f"Prepared RAW Tab: {tabs[-1][0]}") tabs.append(("Annotations", create_annotation_tables(mmif))) app.logger.debug(f"Prepared SUMMARY Tab: {tabs[-1][0]}") tabs.append(("Tree", render_interactive_mmif(mmif))) From 27b93adde9da28ea5e7d2d9679183bc8c6f9940c Mon Sep 17 00:00:00 2001 From: Hayden McCormick Date: Tue, 16 Apr 2024 00:38:38 -0400 Subject: [PATCH 02/11] Integrate thumbnails, fix document symlinks --- app.py | 7 ++++--- helpers.py | 13 +++++++++++-- ocr.py | 2 +- render.py | 37 ++++++++++++++++++++++++++++--------- 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/app.py b/app.py index b4fae53..ddeeb92 100644 --- a/app.py +++ b/app.py @@ -3,13 +3,14 @@ import secrets import sys from threading import Thread +from shutil import rmtree from flask import Flask, request, render_template, flash, send_from_directory, redirect from mmif.serialize import Mmif import cache from cache import set_last_access, cleanup -from utils import render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization +from utils import render_ocr_page, documents_to_htmls, prep_annotations, prepare_ocr_visualization import traceback from render import render_documents, render_annotations @@ -40,7 +41,7 @@ def ocr(): def ocrpage(): data = request.json try: - return render_ocr(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"]) + return render_ocr_page(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"]) except Exception as e: return f'

Unexpected error of type {type(e)}: {e}' @@ -96,7 +97,7 @@ def display(viz_id): return html_file else: app.logger.debug(f"Visualization {viz_id} not found in cache.") - os.remove(path) + rmtree(path) flash("File not found -- please upload again (it may have been deleted to clear up cache space).") return redirect("/upload") diff --git a/helpers.py b/helpers.py index be0141a..cfde241 100644 --- a/helpers.py +++ b/helpers.py @@ -5,7 +5,7 @@ def url2posix(path): """For the visualizer we often want a POSIX path and not a URL so we strip off the protocol if there is one.""" - if path.startswith('file:///'): + if str(path).startswith('file:///'): path = path[7:] return path @@ -37,12 +37,21 @@ def get_properties(annotation): return '{ %s }' % ', '.join(props_list) -def get_abstract_view_type(view): +def get_abstract_view_type(view, mmif): annotation_types = [a.shortname for a in view.metadata.contains.keys()] if "NamedEntity" in annotation_types: return "NER" elif all([anno_type in annotation_types for anno_type in ["Token", "TimeFrame", "Alignment"]]): return "ASR" + # Define an OCR view as one that refers to a video and doesn't contain Sentences + # or Tokens + else: + for configuration in view.metadata.contains.values(): + if "document" in configuration \ + and mmif.get_document_by_id(configuration["document"]).at_type.shortname == "VideoDocument": + if not any([anno_type in annotation_types for anno_type in ["Sentence", "Token"]]): + return "OCR" + def get_vtt_file(view, viz_id): diff --git a/ocr.py b/ocr.py index 3862731..5501b45 100644 --- a/ocr.py +++ b/ocr.py @@ -186,7 +186,7 @@ def paginate(frames_list): return {i: page for (i, page) in enumerate(pages)} -def render_ocr(mmif_id, vid_path, view_id, page_number): +def render_ocr_page(mmif_id, vid_path, view_id, page_number): """ Iterate through frames and display the contents/alignments. """ diff --git a/render.py b/render.py index 98fe6a4..0da6cce 100644 --- a/render.py +++ b/render.py @@ -15,6 +15,7 @@ import displacy from helpers import * +from ocr import get_ocr_frames, paginate, find_duplicates, save_json, render_ocr_page import cache @@ -30,15 +31,21 @@ def render_documents(mmif, viz_id): """ tabs = [] for document in mmif.documents: - doc_path = get_doc_path(document) + # Add symbolic link to document to static folder, so it can be accessed + # by the browser. + doc_path = document.location_path() + doc_symlink_path = pathlib.Path(current_app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{doc_path.split('.')[-1]}") + os.symlink(doc_path, doc_symlink_path) + doc_symlink_rel_path = '/' + doc_symlink_path.relative_to(current_app.static_folder).as_posix() + if document.at_type == DocumentTypes.TextDocument: - html_tab = render_text(doc_path) + html_tab = render_text(doc_symlink_rel_path) elif document.at_type == DocumentTypes.ImageDocument: - html_tab = render_image(doc_path) + html_tab = render_image(doc_symlink_rel_path) elif document.at_type == DocumentTypes.AudioDocument: - html_tab = render_audio(doc_path) + html_tab = render_audio(doc_symlink_rel_path) elif document.at_type == DocumentTypes.VideoDocument: - html_tab = render_video(doc_path, mmif, viz_id) + html_tab = render_video(doc_symlink_rel_path, mmif, viz_id) tabs.append({"id": document.id, "tab_name": document.at_type.shortname, @@ -65,7 +72,7 @@ def render_video(vid_path, mmif, viz_id): html.write('\n") return html.getvalue() From 3476f8dd95cc9d7055c180475cb9936df2955f60 Mon Sep 17 00:00:00 2001 From: Hayden McCormick Date: Thu, 6 Jun 2024 13:48:26 -0400 Subject: [PATCH 04/11] Fix various bugs with rendering old MMIF files, add audio/img rendering --- displacy/__init__.py | 4 +-- helpers.py | 15 ++++++-- render.py | 83 ++++++++++++++++++++++++++++---------------- start_visualizer.sh | 2 +- 4 files changed, 69 insertions(+), 35 deletions(-) diff --git a/displacy/__init__.py b/displacy/__init__.py index e1e0dff..563ed65 100644 --- a/displacy/__init__.py +++ b/displacy/__init__.py @@ -48,11 +48,11 @@ def read_text(textdoc, app_root): # container, see the comment in html_text() in ../app.py) if not os.path.isfile(location): if location.startswith('file:///'): - location = location[8:] + location = location[7:] else: # this should not happen anymore, but keeping it anyway location = location[1:] - location = os.path.join(app_root, 'static', location) + # location = os.path.join(app_root, 'static', location) with open(location) as fh: text = fh.read() else: diff --git a/helpers.py b/helpers.py index cfde241..4756ed6 100644 --- a/helpers.py +++ b/helpers.py @@ -64,6 +64,11 @@ def get_vtt_file(view, viz_id): def write_vtt(view, viz_id): vtt = "WEBVTT\n\n" + timeunit = "milliseconds" + for a in view.metadata.contains.values(): + if "timeUnit" in a: + timeunit = a["timeUnit"] + break token_idx = {a.id: a for a in view.annotations if a.at_type.shortname == "Token"} timeframe_idx = {a.id: a for a in view.annotations if a.at_type.shortname == "TimeFrame"} alignments = [a for a in view.annotations if a.at_type.shortname == "Alignment"] @@ -75,10 +80,10 @@ def write_vtt(view, viz_id): continue start, end, text = start_end_text if not vtt_start: - vtt_start = format_time(start) + vtt_start = format_time(start, timeunit) texts.append(text) if len(texts) > 8: - vtt_end = format_time(end) + vtt_end = format_time(end, timeunit) vtt += f"{vtt_start} --> {vtt_end}\n{' '.join(texts)}\n\n" vtt_start = None texts = [] @@ -97,7 +102,7 @@ def build_alignment(alignment, token_idx, timeframe_idx): return start, end, text -def format_time(time_in_ms): +def format_time(time, unit): """ Formats a time in seconds as a string in the format "hh:mm:ss.fff" VTT specifically requires timestamps expressed in miliseconds and @@ -105,6 +110,10 @@ def format_time(time_in_ms): (https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API) ISO format can have up to 6 below the decimal point, on the other hand """ + if unit == "seconds": + time_in_ms = int(time * 1000) + else: + time_in_ms = int(time) hours = time_in_ms // (1000 * 60 * 60) time_in_ms %= (1000 * 60 * 60) minutes = time_in_ms // (1000 * 60) diff --git a/render.py b/render.py index 4d45dd6..d950814 100644 --- a/render.py +++ b/render.py @@ -8,6 +8,7 @@ from mmif import DocumentTypes from lapps.discriminators import Uri import displacy +import traceback from helpers import * from ocr import get_ocr_frames, paginate, find_duplicates, save_json, render_ocr_page @@ -26,29 +27,36 @@ def render_documents(mmif, viz_id): """ tabs = [] for document in mmif.documents: - # Add symbolic link to document to static folder, so it can be accessed - # by the browser. - doc_path = document.location_path() - doc_symlink_path = pathlib.Path(current_app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{doc_path.split('.')[-1]}") - os.symlink(doc_path, doc_symlink_path) - doc_symlink_rel_path = '/' + doc_symlink_path.relative_to(current_app.static_folder).as_posix() - - if document.at_type == DocumentTypes.TextDocument: - html_tab = render_text(doc_symlink_rel_path) - elif document.at_type == DocumentTypes.ImageDocument: - html_tab = render_image(doc_symlink_rel_path) - elif document.at_type == DocumentTypes.AudioDocument: - html_tab = render_audio(doc_symlink_rel_path) - elif document.at_type == DocumentTypes.VideoDocument: - html_tab = render_video(doc_symlink_rel_path, mmif, viz_id) - - tabs.append({"id": document.id, - "tab_name": document.at_type.shortname, - "html": html_tab}) + try: + # Add symbolic link to document to static folder, so it can be accessed + # by the browser. + doc_path = document.location_path() + doc_symlink_path = pathlib.Path(current_app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{doc_path.split('.')[-1]}") + os.symlink(doc_path, doc_symlink_path) + doc_symlink_rel_path = '/' + doc_symlink_path.relative_to(current_app.static_folder).as_posix() + + if document.at_type == DocumentTypes.TextDocument: + html_tab = render_text(doc_path) + elif document.at_type == DocumentTypes.ImageDocument: + html_tab = render_image(doc_symlink_rel_path) + elif document.at_type == DocumentTypes.AudioDocument: + html_tab = render_audio(doc_symlink_rel_path) + elif document.at_type == DocumentTypes.VideoDocument: + html_tab = render_video(doc_symlink_rel_path, mmif, viz_id) + + tabs.append({"id": document.id, + "tab_name": document.at_type.shortname, + "html": html_tab}) + + except Exception: + tabs.append({"id": document.id, + "tab_name": document.at_type.shortname, + "html": f"Error rendering document:

{traceback.format_exc()}
"}) return tabs def render_text(text_path): """Return the content of the text document, but with some HTML tags added.""" + text_path = url2posix(text_path) if not os.path.isfile(text_path): raise FileNotFoundError(f"File not found: {text_path}") with open(text_path) as t_file: @@ -56,10 +64,18 @@ def render_text(text_path): return f"{content}\n" def render_image(img_path): - return "" + img_path = url2posix(img_path) + html = StringIO() + html.write(f'Image\n') + return html.getvalue() def render_audio(audio_path): - return "" + audio_path = url2posix(audio_path) + html = StringIO() + html.write('\n") + return html.getvalue() def render_video(vid_path, mmif, viz_id): vid_path = url2posix(vid_path) @@ -87,14 +103,23 @@ def render_annotations(mmif, viz_id): tabs.append({"id": "tree", "tab_name": "Tree", "html": render_jstree(mmif)}) # These tabs are optional for view in mmif.views: - abstract_view_type = get_abstract_view_type(view, mmif) - app_shortname = view.metadata.app.split("/")[-2] - if abstract_view_type == "NER": - tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_ner(mmif, view)}) - elif abstract_view_type == "ASR": - tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_asr_vtt(view, viz_id)}) - elif abstract_view_type == "OCR": - tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_ocr(mmif, view, viz_id)}) + try: + abstract_view_type = get_abstract_view_type(view, mmif) + # Workaround to deal with the fact that some apps have a version number in the URL + app_url = view.metadata.app if re.search(r"\/v\d+\.?\d?$", view.metadata.app) else view.metadata.app + "/v1" + app_shortname = app_url.split("/")[-2] + if abstract_view_type == "NER": + tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_ner(mmif, view)}) + elif abstract_view_type == "ASR": + tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_asr_vtt(view, viz_id)}) + elif abstract_view_type == "OCR": + tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_ocr(mmif, view, viz_id)}) + + except Exception as e: + tabs.append({"id": view.id, + "tab_name": view.id, + "html": f"Error rendering annotations:

{traceback.format_exc()}
"}) + return tabs def render_info(mmif): diff --git a/start_visualizer.sh b/start_visualizer.sh index 200531e..6802896 100755 --- a/start_visualizer.sh +++ b/start_visualizer.sh @@ -27,5 +27,5 @@ else fi # Start visualizer $container_engine build . -f Containerfile -t clams-mmif-visualizer -$container_engine run --name clams-mmif-visualizer --rm -p 5001:5000 -e PYTHONUNBUFFERED=1 -v $datadir:$mountdir -v $datadir:/app/static/$mountdir clams-mmif-visualizer +$container_engine run -d --name clams-mmif-visualizer --rm -p 5001:5000 -e PYTHONUNBUFFERED=1 -v $datadir:$mountdir -v $datadir:/app/static/$mountdir clams-mmif-visualizer echo "MMIF Visualizer is running in the background and can be accessed at http://localhost:5001/. To shut it down, run '$container_engine kill clams-mmif-visualizer'" \ No newline at end of file From b296bda9e1078011ab6fafff00d544acaeb1d934 Mon Sep 17 00:00:00 2001 From: Hayden McCormick Date: Thu, 6 Jun 2024 13:50:18 -0400 Subject: [PATCH 05/11] Fix example file --- examples/whisper-spacy.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/whisper-spacy.json b/examples/whisper-spacy.json index 967a3d4..9a164e6 100644 --- a/examples/whisper-spacy.json +++ b/examples/whisper-spacy.json @@ -8,7 +8,7 @@ "properties": { "mime": "video", "id": "d1", - "location": "file:///data/video/service-mbrs-ntscrm-01181182.mp4" + "location": "file:///data/service-mbrs-ntscrm-01181182.mp4" } }, { @@ -16,7 +16,7 @@ "properties": { "mime": "audio", "id": "d2", - "location": "file:///data/audio/service-mbrs-ntscrm-01181182.wav" + "location": "file:///data/service-mbrs-ntscrm-01181182.wav" } }, { @@ -24,7 +24,7 @@ "properties": { "mime": "text", "id": "d3", - "location": "file:///data/text/service-mbrs-ntscrm-01181182.txt" + "location": "file:///data/service-mbrs-ntscrm-01181182.txt" } } ], From f8fc8815bc0815ffcbf9ed95a2fedd34d798bb67 Mon Sep 17 00:00:00 2001 From: Hayden McCormick Date: Fri, 7 Jun 2024 09:32:02 -0400 Subject: [PATCH 06/11] Move all OCR rendering methods to render.py, improve function naming --- .gitignore | 4 +++- app.py | 5 ++--- ocr.py | 37 ------------------------------------- render.py | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 51 insertions(+), 44 deletions(-) diff --git a/.gitignore b/.gitignore index 0a9b11b..11c7e30 100644 --- a/.gitignore +++ b/.gitignore @@ -73,4 +73,6 @@ tags # VSCode .devcontainer -devcontainer.json \ No newline at end of file +devcontainer.json + +static/mmif-viz-cache \ No newline at end of file diff --git a/app.py b/app.py index ddeeb92..cc5fe6d 100644 --- a/app.py +++ b/app.py @@ -10,9 +10,8 @@ import cache from cache import set_last_access, cleanup -from utils import render_ocr_page, documents_to_htmls, prep_annotations, prepare_ocr_visualization import traceback -from render import render_documents, render_annotations +from render import render_documents, render_annotations, prepare_and_render_ocr, render_ocr_page # these two static folder-related params are important, do not remove app = Flask(__name__, static_folder='static', static_url_path='') @@ -31,7 +30,7 @@ def ocr(): mmif_str = open(cache.get_cache_root() / data["mmif_id"] / "file.mmif").read() mmif = Mmif(mmif_str) ocr_view = mmif.get_view_by_id(data["view_id"]) - return prepare_ocr_visualization(mmif, ocr_view, data["mmif_id"]) + return prepare_and_render_ocr(mmif, ocr_view, data["mmif_id"]) except Exception as e: app.logger.error(f"{e}\n{traceback.format_exc()}") return f'

Error: {e} Check the server log for more information.' diff --git a/ocr.py b/ocr.py index 5501b45..f229390 100644 --- a/ocr.py +++ b/ocr.py @@ -185,43 +185,6 @@ def paginate(frames_list): return {i: page for (i, page) in enumerate(pages)} - -def render_ocr_page(mmif_id, vid_path, view_id, page_number): - """ - Iterate through frames and display the contents/alignments. - """ - # Path for storing temporary images generated by cv2 - cv2_vid = cv2.VideoCapture(vid_path) - tn_data_fname = cache.get_cache_root() / mmif_id / f"{view_id}-pages.json" - thumbnail_pages = json.load(open(tn_data_fname)) - page = thumbnail_pages[str(page_number)] - prev_frame_cap = None - path = make_image_directory(mmif_id) - for frame_num, frame in page: - # If index is range instead of frame... - if frame.get("range"): - frame_num = (int(frame["range"][0]) + int(frame["range"][1])) / 2 - cv2_vid.set(1, frame_num) - _, frame_cap = cv2_vid.read() - if frame_cap is None: - raise FileNotFoundError(f"Video file {vid_path} not found!") - - # Double check histogram similarity of "repeat" frames -- if they're significantly different, un-mark as repeat - if prev_frame_cap is not None and frame["repeat"] and not is_duplicate_image(prev_frame_cap, frame_cap, - cv2_vid): - frame["repeat"] = False - with tempfile.NamedTemporaryFile(dir=str(path), suffix=".jpg", delete=False) as tf: - cv2.imwrite(tf.name, frame_cap) - # "id" is just the name of the temp image file - frame["id"] = pathlib.Path(tf.name).name - prev_frame_cap = frame_cap - - tn_page_html = render_template( - 'ocr.html', vid_path=vid_path, view_id=view_id, page=page, - n_pages=len(thumbnail_pages), page_number=str(page_number), mmif_id=mmif_id) - return tn_page_html - - def make_image_directory(mmif_id): # Make path for temp OCR image files or clear image files if it exists path = cache.get_cache_root() / mmif_id / "img" diff --git a/render.py b/render.py index d950814..4e4d740 100644 --- a/render.py +++ b/render.py @@ -11,7 +11,10 @@ import traceback from helpers import * -from ocr import get_ocr_frames, paginate, find_duplicates, save_json, render_ocr_page +from ocr import get_ocr_frames, paginate, find_duplicates, save_json, make_image_directory, is_duplicate_image +import cv2 +import json +import tempfile import cache @@ -113,7 +116,7 @@ def render_annotations(mmif, viz_id): elif abstract_view_type == "ASR": tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_asr_vtt(view, viz_id)}) elif abstract_view_type == "OCR": - tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_ocr(mmif, view, viz_id)}) + tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": prepare_and_render_ocr(mmif, view, viz_id)}) except Exception as e: tabs.append({"id": view.id, @@ -178,7 +181,11 @@ def render_ner(mmif, view): ner_document = metadata.get('document') return displacy.visualize_ner(mmif, view, ner_document, current_app.root_path) -def render_ocr(mmif, view, viz_id): +def prepare_and_render_ocr(mmif, view, viz_id): + """ + Prepares list of frames that will be passed back and forth between server + and client, and renders the first page of the OCR. + """ vid_path = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[0].location_path() ocr_frames = get_ocr_frames(view, mmif) @@ -190,3 +197,39 @@ def render_ocr(mmif, view, viz_id): # Save page list as temp file save_json(frames_pages, view.id, viz_id) return render_ocr_page(viz_id, vid_path, view.id, 0) + +def render_ocr_page(mmif_id, vid_path, view_id, page_number): + """ + Renders a single OCR page by iterating through frames and displaying the + contents/alignments. + """ + # Path for storing temporary images generated by cv2 + cv2_vid = cv2.VideoCapture(vid_path) + tn_data_fname = cache.get_cache_root() / mmif_id / f"{view_id}-pages.json" + thumbnail_pages = json.load(open(tn_data_fname)) + page = thumbnail_pages[str(page_number)] + prev_frame_cap = None + path = make_image_directory(mmif_id) + for frame_num, frame in page: + # If index is range instead of frame... + if frame.get("range"): + frame_num = (int(frame["range"][0]) + int(frame["range"][1])) / 2 + cv2_vid.set(1, frame_num) + _, frame_cap = cv2_vid.read() + if frame_cap is None: + raise FileNotFoundError(f"Video file {vid_path} not found!") + + # Double check histogram similarity of "repeat" frames -- if they're significantly different, un-mark as repeat + if prev_frame_cap is not None and frame["repeat"] and not is_duplicate_image(prev_frame_cap, frame_cap, + cv2_vid): + frame["repeat"] = False + with tempfile.NamedTemporaryFile(dir=str(path), suffix=".jpg", delete=False) as tf: + cv2.imwrite(tf.name, frame_cap) + # "id" is just the name of the temp image file + frame["id"] = pathlib.Path(tf.name).name + prev_frame_cap = frame_cap + + tn_page_html = render_template( + 'ocr.html', vid_path=vid_path, view_id=view_id, page=page, + n_pages=len(thumbnail_pages), page_number=str(page_number), mmif_id=mmif_id) + return tn_page_html From 0e27a421d0dcbee896b0637424e4d44ba75d17f1 Mon Sep 17 00:00:00 2001 From: Hayden McCormick Date: Fri, 7 Jun 2024 09:41:42 -0400 Subject: [PATCH 07/11] Collapse OCR routes into a single method --- app.py | 18 ++++++++++++++---- templates/ocr.html | 2 +- templates/pre-ocr.html | 2 ++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/app.py b/app.py index cc5fe6d..867901b 100644 --- a/app.py +++ b/app.py @@ -25,6 +25,16 @@ def index(): @app.route('/ocr', methods=['POST']) def ocr(): + if "page_number" not in request.json: + return serve_first_ocr_page(request.json) + else: + return serve_ocr_page(request.json) + + +def serve_first_ocr_page(data): + """ + Prepares OCR (at load time, due to lazy loading) and serves the first page + """ try: data = dict(request.json) mmif_str = open(cache.get_cache_root() / data["mmif_id"] / "file.mmif").read() @@ -35,10 +45,10 @@ def ocr(): app.logger.error(f"{e}\n{traceback.format_exc()}") return f'

Error: {e} Check the server log for more information.' - -@app.route('/ocrpage', methods=['POST']) -def ocrpage(): - data = request.json +def serve_ocr_page(data): + """ + Serves subsequent OCR pages + """ try: return render_ocr_page(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"]) except Exception as e: diff --git a/templates/ocr.html b/templates/ocr.html index 7daea2b..8d75c5e 100644 --- a/templates/ocr.html +++ b/templates/ocr.html @@ -142,7 +142,7 @@

if (data["page_number"] >= 0 && data["page_number"] < parseInt("{{n_pages}}")) { $.ajax({ type:'POST', - url:'/ocrpage', + url:'/ocr', contentType: "application/json", data: JSON.stringify(data), success: function(res_html){ diff --git a/templates/pre-ocr.html b/templates/pre-ocr.html index eba08d3..c2bf537 100644 --- a/templates/pre-ocr.html +++ b/templates/pre-ocr.html @@ -1,3 +1,5 @@ + +
From adcb79133e776ad6f3c9a234ce41fcd6b5eabc2a Mon Sep 17 00:00:00 2001 From: Hayden McCormick Date: Fri, 7 Jun 2024 09:48:53 -0400 Subject: [PATCH 08/11] PEP-8/style fixes ahead of PR --- app.py | 62 ++++---- helpers.py | 123 ---------------- ocr.py | 72 +++++---- render.py | 79 ++++++---- utils.py | 417 +++++++++++------------------------------------------ 5 files changed, 214 insertions(+), 539 deletions(-) delete mode 100644 helpers.py diff --git a/app.py b/app.py index 867901b..91b5149 100644 --- a/app.py +++ b/app.py @@ -29,30 +29,6 @@ def ocr(): return serve_first_ocr_page(request.json) else: return serve_ocr_page(request.json) - - -def serve_first_ocr_page(data): - """ - Prepares OCR (at load time, due to lazy loading) and serves the first page - """ - try: - data = dict(request.json) - mmif_str = open(cache.get_cache_root() / data["mmif_id"] / "file.mmif").read() - mmif = Mmif(mmif_str) - ocr_view = mmif.get_view_by_id(data["view_id"]) - return prepare_and_render_ocr(mmif, ocr_view, data["mmif_id"]) - except Exception as e: - app.logger.error(f"{e}\n{traceback.format_exc()}") - return f'

Error: {e} Check the server log for more information.

' - -def serve_ocr_page(data): - """ - Serves subsequent OCR pages - """ - try: - return render_ocr_page(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"]) - except Exception as e: - return f'

Unexpected error of type {type(e)}: {e}' @app.route('/upload', methods=['GET', 'POST']) @@ -121,9 +97,36 @@ def render_mmif(mmif_str, viz_id): rendered_documents = render_documents(mmif, viz_id) rendered_annotations = render_annotations(mmif, viz_id) return render_template('player.html', - docs=rendered_documents, - viz_id=viz_id, - annotations=rendered_annotations) + docs=rendered_documents, + viz_id=viz_id, + annotations=rendered_annotations) + + +def serve_first_ocr_page(data): + """ + Prepares OCR (at load time, due to lazy loading) and serves the first page + """ + try: + data = dict(request.json) + mmif_str = open(cache.get_cache_root() / + data["mmif_id"] / "file.mmif").read() + mmif = Mmif(mmif_str) + ocr_view = mmif.get_view_by_id(data["view_id"]) + return prepare_and_render_ocr(mmif, ocr_view, data["mmif_id"]) + except Exception as e: + app.logger.error(f"{e}\n{traceback.format_exc()}") + return f'

Error: {e} Check the server log for more information.' + + +def serve_ocr_page(data): + """ + Serves subsequent OCR pages + """ + try: + return render_ocr_page(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"]) + except Exception as e: + return f'

Unexpected error of type {type(e)}: {e}' + def upload_file(in_mmif): # Save file locally @@ -159,7 +162,8 @@ def upload_file(in_mmif): if __name__ == '__main__': # Make path for temp files cache_path = cache.get_cache_root() - cache_symlink_path = os.path.join(app.static_folder, cache._CACHE_DIR_SUFFIX) + cache_symlink_path = os.path.join( + app.static_folder, cache._CACHE_DIR_SUFFIX) if os.path.islink(cache_symlink_path): os.unlink(cache_symlink_path) elif os.path.exists(cache_symlink_path): @@ -174,5 +178,5 @@ def upload_file(in_mmif): port = 5000 if len(sys.argv) > 2 and sys.argv[1] == '-p': port = int(sys.argv[2]) - + app.run(port=port, host='0.0.0.0', debug=True, use_reloader=True) diff --git a/helpers.py b/helpers.py deleted file mode 100644 index 4756ed6..0000000 --- a/helpers.py +++ /dev/null @@ -1,123 +0,0 @@ -from mmif.serialize.annotation import Text -from flask import current_app -import cache - -def url2posix(path): - """For the visualizer we often want a POSIX path and not a URL so we strip off - the protocol if there is one.""" - if str(path).startswith('file:///'): - path = path[7:] - return path - - -def get_doc_path(document): - doc_path = document.location_path() - return doc_path - # app.logger.debug(f"MMIF on AV asset: {doc_path}") - # doc_symlink_path = pathlib.Path(app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{doc_path.split('.')[-1]}") - # os.symlink(doc_path, doc_symlink_path) - # app.logger.debug(f"{doc_path} is symlinked to {doc_symlink_path}") - # doc_symlink_rel_path = '/' + doc_symlink_path.relative_to(app.static_folder).as_posix() - # app.logger.debug(f"and {doc_symlink_rel_path} will be used in HTML src attribute") - - -def get_status(view): - return 'ERROR' if 'message' in view.metadata.error else 'OKAY' - - -def get_properties(annotation): - props = annotation.properties._serialize() - props.pop('id') - props_list = [] - for prop in sorted(props): - val = props[prop] - if type(val) == Text: - val = val.value - props_list.append("%s=%s" % (prop, val)) - return '{ %s }' % ', '.join(props_list) - - -def get_abstract_view_type(view, mmif): - annotation_types = [a.shortname for a in view.metadata.contains.keys()] - if "NamedEntity" in annotation_types: - return "NER" - elif all([anno_type in annotation_types for anno_type in ["Token", "TimeFrame", "Alignment"]]): - return "ASR" - # Define an OCR view as one that refers to a video and doesn't contain Sentences - # or Tokens - else: - for configuration in view.metadata.contains.values(): - if "document" in configuration \ - and mmif.get_document_by_id(configuration["document"]).at_type.shortname == "VideoDocument": - if not any([anno_type in annotation_types for anno_type in ["Sentence", "Token"]]): - return "OCR" - - - -def get_vtt_file(view, viz_id): - vtt_filename = cache.get_cache_root() / viz_id / f"{view.id.replace(':', '-')}.vtt" - if not vtt_filename.exists(): - with open(vtt_filename, 'w') as vtt_file: - vtt_file.write(write_vtt(view, viz_id)) - return str(vtt_filename) - - -def write_vtt(view, viz_id): - vtt = "WEBVTT\n\n" - timeunit = "milliseconds" - for a in view.metadata.contains.values(): - if "timeUnit" in a: - timeunit = a["timeUnit"] - break - token_idx = {a.id: a for a in view.annotations if a.at_type.shortname == "Token"} - timeframe_idx = {a.id: a for a in view.annotations if a.at_type.shortname == "TimeFrame"} - alignments = [a for a in view.annotations if a.at_type.shortname == "Alignment"] - vtt_start = None - texts = [] - for alignment in alignments: - start_end_text = build_alignment(alignment, token_idx, timeframe_idx) - if start_end_text is None: - continue - start, end, text = start_end_text - if not vtt_start: - vtt_start = format_time(start, timeunit) - texts.append(text) - if len(texts) > 8: - vtt_end = format_time(end, timeunit) - vtt += f"{vtt_start} --> {vtt_end}\n{' '.join(texts)}\n\n" - vtt_start = None - texts = [] - return vtt - - -def build_alignment(alignment, token_idx, timeframe_idx): - target = alignment.properties['target'] - source = alignment.properties['source'] - timeframe = timeframe_idx.get(source) - token = token_idx.get(target) - if timeframe and token: - start = timeframe.properties['start'] - end = timeframe.properties['end'] - text = token.properties['word'] - return start, end, text - - -def format_time(time, unit): - """ - Formats a time in seconds as a string in the format "hh:mm:ss.fff" - VTT specifically requires timestamps expressed in miliseconds and - must be be in one of these formats: mm:ss.ttt or hh:mm:ss.ttt - (https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API) - ISO format can have up to 6 below the decimal point, on the other hand - """ - if unit == "seconds": - time_in_ms = int(time * 1000) - else: - time_in_ms = int(time) - hours = time_in_ms // (1000 * 60 * 60) - time_in_ms %= (1000 * 60 * 60) - minutes = time_in_ms // (1000 * 60) - time_in_ms %= (1000 * 60) - seconds = time_in_ms // 1000 - time_in_ms %= 1000 - return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{time_in_ms:03d}" \ No newline at end of file diff --git a/ocr.py b/ocr.py index f229390..dc21f02 100644 --- a/ocr.py +++ b/ocr.py @@ -1,13 +1,11 @@ import datetime -import pathlib import cv2 -import tempfile import json import re -import os, shutil +import os +import shutil -from flask import render_template from mmif.utils.video_document_helper import convert_timepoint, convert_timeframe import cache @@ -50,16 +48,18 @@ def update(self, anno, mmif): elif anno.at_type.shortname == "Paragraph": view = mmif.get_view_by_id(anno.parent) - text_anno = view.get_annotation_by_id(anno.properties.get("document")) + text_anno = view.get_annotation_by_id( + anno.properties.get("document")) self.add_text_document(text_anno) - def add_bounding_box(self, anno, mmif): if "timePoint" in anno.properties: - timepoint_anno = find_annotation(anno.properties["timePoint"], mmif) + timepoint_anno = find_annotation( + anno.properties["timePoint"], mmif) if timepoint_anno: - self.add_timepoint(timepoint_anno, mmif, skip_if_view_has_frames=False) + self.add_timepoint(timepoint_anno, mmif, + skip_if_view_has_frames=False) else: self.frame_num = convert_timepoint(mmif, anno, "frames") self.secs = convert_timepoint(mmif, anno, "seconds") @@ -82,9 +82,11 @@ def add_bounding_box(self, anno, mmif): def add_timeframe(self, anno, mmif): # If annotation has multiple targets, pick the first and last as start and end if "targets" in anno.properties: - start_id, end_id = anno.properties.get("targets")[0], anno.properties.get("targets")[-1] + start_id, end_id = anno.properties.get( + "targets")[0], anno.properties.get("targets")[-1] anno_parent = mmif.get_view_by_id(anno.parent) - start_anno, end_anno = anno_parent.get_annotation_by_id(start_id), anno_parent.get_annotation_by_id(end_id) + start_anno, end_anno = anno_parent.get_annotation_by_id( + start_id), anno_parent.get_annotation_by_id(end_id) start = convert_timepoint(mmif, start_anno, "frames") end = convert_timepoint(mmif, end_anno, "frames") start_secs = convert_timepoint(mmif, start_anno, "seconds") @@ -93,7 +95,8 @@ def add_timeframe(self, anno, mmif): start, end = convert_timeframe(mmif, anno, "frames") start_secs, end_secs = convert_timeframe(mmif, anno, "seconds") self.range = (start, end) - self.timestamp_range = (str(datetime.timedelta(seconds=start_secs)), str(datetime.timedelta(seconds=end_secs))) + self.timestamp_range = (str(datetime.timedelta(seconds=start_secs)), str( + datetime.timedelta(seconds=end_secs))) self.sec_range = (start_secs, end_secs) if anno.properties.get("frameType"): self.frametype = str(anno.properties.get("frameType")) @@ -101,24 +104,28 @@ def add_timeframe(self, anno, mmif): self.frametype = str(anno.properties.get("label")) def add_timepoint(self, anno, mmif, skip_if_view_has_frames=True): - parent = mmif.get_view_by_id(anno.parent) - other_annotations = [k for k in parent.metadata.contains.keys() if k != anno.id] - # If there are TimeFrames in the same view, they most likely represent - # condensed information about representative frames (e.g. SWT). In this - # case, only render the TimeFrames and ignore the TimePoints. - if any([anno.shortname == "TimeFrame" for anno in other_annotations]) and skip_if_view_has_frames: - return - self.frame_num = convert_timepoint(mmif, anno, "frames") - self.secs = convert_timepoint(mmif, anno, "seconds") - self.timestamp = str(datetime.timedelta(seconds=self.secs)) - if anno.properties.get("label"): - self.frametype = anno.properties.get("label") + parent = mmif.get_view_by_id(anno.parent) + other_annotations = [ + k for k in parent.metadata.contains.keys() if k != anno.id] + # If there are TimeFrames in the same view, they most likely represent + # condensed information about representative frames (e.g. SWT). In this + # case, only render the TimeFrames and ignore the TimePoints. + if any([anno.shortname == "TimeFrame" for anno in other_annotations]) and skip_if_view_has_frames: + return + self.frame_num = convert_timepoint(mmif, anno, "frames") + self.secs = convert_timepoint(mmif, anno, "seconds") + self.timestamp = str(datetime.timedelta(seconds=self.secs)) + if anno.properties.get("label"): + self.frametype = anno.properties.get("label") def add_text_document(self, anno): - t = anno.properties.get("text_value") or anno.properties.get("text").value + t = anno.properties.get( + "text_value") or anno.properties.get("text").value if t: text_val = re.sub(r'([\\\/\|\"\'])', r'\1 ', t) - self.text = self.text + [text_val] if text_val not in self.text else self.text + self.text = self.text + \ + [text_val] if text_val not in self.text else self.text + def find_annotation(anno_id, mmif): if mmif.id_delimiter in anno_id: @@ -153,7 +160,7 @@ def get_ocr_frames(view, mmif): frames[i].update(target, mmif) else: frames[i] = frame - + else: for annotation in view.get_annotations(): frame = OCRFrame(annotation, mmif) @@ -185,6 +192,7 @@ def paginate(frames_list): return {i: page for (i, page) in enumerate(pages)} + def make_image_directory(mmif_id): # Make path for temp OCR image files or clear image files if it exists path = cache.get_cache_root() / mmif_id / "img" @@ -232,10 +240,14 @@ def is_duplicate_image(prev_frame, frame, cv2_vid): img2_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # Calculate the histogram and normalize it - hist_img1 = cv2.calcHist([img1_hsv], [0, 1], None, [180, 256], [0, 180, 0, 256]) - cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX); - hist_img2 = cv2.calcHist([img2_hsv], [0, 1], None, [180, 256], [0, 180, 0, 256]) - cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX); + hist_img1 = cv2.calcHist([img1_hsv], [0, 1], None, [ + 180, 256], [0, 180, 0, 256]) + cv2.normalize(hist_img1, hist_img1, alpha=0, + beta=1, norm_type=cv2.NORM_MINMAX) + hist_img2 = cv2.calcHist([img2_hsv], [0, 1], None, [ + 180, 256], [0, 180, 0, 256]) + cv2.normalize(hist_img2, hist_img2, alpha=0, + beta=1, norm_type=cv2.NORM_MINMAX) # Find the metric value metric_val = cv2.compareHist(hist_img1, hist_img2, cv2.HISTCMP_CHISQR) diff --git a/render.py b/render.py index 4e4d740..2020550 100644 --- a/render.py +++ b/render.py @@ -10,7 +10,7 @@ import displacy import traceback -from helpers import * +from utils import get_status, get_properties, get_abstract_view_type, url2posix, get_vtt_file from ocr import get_ocr_frames, paginate, find_duplicates, save_json, make_image_directory, is_duplicate_image import cv2 import json @@ -24,6 +24,7 @@ # -- Documents -- + def render_documents(mmif, viz_id): """ Returns HTML Tab representation of all documents in the MMIF object. @@ -34,9 +35,12 @@ def render_documents(mmif, viz_id): # Add symbolic link to document to static folder, so it can be accessed # by the browser. doc_path = document.location_path() - doc_symlink_path = pathlib.Path(current_app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{doc_path.split('.')[-1]}") + doc_symlink_path = pathlib.Path( + current_app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{doc_path.split('.')[-1]}") os.symlink(doc_path, doc_symlink_path) - doc_symlink_rel_path = '/' + doc_symlink_path.relative_to(current_app.static_folder).as_posix() + doc_symlink_rel_path = '/' + \ + doc_symlink_path.relative_to( + current_app.static_folder).as_posix() if document.at_type == DocumentTypes.TextDocument: html_tab = render_text(doc_path) @@ -47,16 +51,17 @@ def render_documents(mmif, viz_id): elif document.at_type == DocumentTypes.VideoDocument: html_tab = render_video(doc_symlink_rel_path, mmif, viz_id) - tabs.append({"id": document.id, - "tab_name": document.at_type.shortname, - "html": html_tab}) - + tabs.append({"id": document.id, + "tab_name": document.at_type.shortname, + "html": html_tab}) + except Exception: - tabs.append({"id": document.id, - "tab_name": document.at_type.shortname, - "html": f"Error rendering document:

{traceback.format_exc()}
"}) + tabs.append({"id": document.id, + "tab_name": document.at_type.shortname, + "html": f"Error rendering document:

{traceback.format_exc()}
"}) return tabs + def render_text(text_path): """Return the content of the text document, but with some HTML tags added.""" text_path = url2posix(text_path) @@ -66,12 +71,15 @@ def render_text(text_path): content = t_file.read().replace("\n", "
\n") return f"{content}\n" + def render_image(img_path): img_path = url2posix(img_path) html = StringIO() - html.write(f'Image\n') + html.write( + f'Image\n') return html.getvalue() + def render_audio(audio_path): audio_path = url2posix(audio_path) html = StringIO() @@ -80,6 +88,7 @@ def render_audio(audio_path): html.write("\n") return html.getvalue() + def render_video(vid_path, mmif, viz_id): vid_path = url2posix(vid_path) html = StringIO() @@ -89,12 +98,14 @@ def render_video(vid_path, mmif, viz_id): if get_abstract_view_type(view, mmif) == "ASR": vtt_path = get_vtt_file(view, viz_id) rel_vtt_path = re.search("mmif-viz-cache/.*", vtt_path).group(0) - html.write(f' \n') + html.write( + f' \n') html.write("\n") return html.getvalue() # -- Annotations -- + def render_annotations(mmif, viz_id): """ Returns HTML Tab representation of all annotations in the MMIF object. @@ -102,29 +113,36 @@ def render_annotations(mmif, viz_id): tabs = [] # These tabs should always be present tabs.append({"id": "info", "tab_name": "Info", "html": render_info(mmif)}) - tabs.append({"id": "annotations", "tab_name": "Annotations", "html": render_annotation_table(mmif)}) - tabs.append({"id": "tree", "tab_name": "Tree", "html": render_jstree(mmif)}) + tabs.append({"id": "annotations", "tab_name": "Annotations", + "html": render_annotation_table(mmif)}) + tabs.append({"id": "tree", "tab_name": "Tree", + "html": render_jstree(mmif)}) # These tabs are optional for view in mmif.views: try: abstract_view_type = get_abstract_view_type(view, mmif) # Workaround to deal with the fact that some apps have a version number in the URL - app_url = view.metadata.app if re.search(r"\/v\d+\.?\d?$", view.metadata.app) else view.metadata.app + "/v1" + app_url = view.metadata.app if re.search( + r"\/v\d+\.?\d?$", view.metadata.app) else view.metadata.app + "/v1" app_shortname = app_url.split("/")[-2] if abstract_view_type == "NER": - tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_ner(mmif, view)}) + tabs.append( + {"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_ner(mmif, view)}) elif abstract_view_type == "ASR": - tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_asr_vtt(view, viz_id)}) + tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", + "html": render_asr_vtt(view, viz_id)}) elif abstract_view_type == "OCR": - tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": prepare_and_render_ocr(mmif, view, viz_id)}) - + tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", + "html": prepare_and_render_ocr(mmif, view, viz_id)}) + except Exception as e: - tabs.append({"id": view.id, - "tab_name": view.id, + tabs.append({"id": view.id, + "tab_name": view.id, "html": f"Error rendering annotations:

{traceback.format_exc()}
"}) - + return tabs + def render_info(mmif): s = StringIO('Howdy') s.write("
")
@@ -136,7 +154,8 @@ def render_info(mmif):
     for view in mmif.views:
         app = view.metadata.app
         status = get_status(view)
-        s.write('%s  %s  %s  %d\n' % (view.id, app, status, len(view.annotations)))
+        s.write('%s  %s  %s  %d\n' %
+                (view.id, app, status, len(view.annotations)))
         if len(view.annotations) > 0:
             s.write('\n')
             types = Counter([a.at_type.shortname
@@ -156,37 +175,44 @@ def render_annotation_table(mmif):
                 % (view.id, view.metadata.app, status, len(view.annotations)))
         s.write("
\n") s.write("\n") - limit_len = lambda str: str[:500] + " . . . }" if len(str) > 500 else str + def limit_len(str): return str[:500] + \ + " . . . }" if len(str) > 500 else str for annotation in view.annotations: s.write(' \n') s.write(' \n' % annotation.id) s.write(' \n' % annotation.at_type.shortname) - s.write(' \n' % limit_len(get_properties(annotation))) + s.write(' \n' % + limit_len(get_properties(annotation))) s.write(' \n') s.write("
%s%s%s%s
\n") s.write("
\n") return s.getvalue() + def render_jstree(mmif): return render_template('interactive.html', mmif=mmif, aligned_views=[]) + def render_asr_vtt(view, viz_id): vtt_filename = get_vtt_file(view, viz_id) with open(vtt_filename) as vtt_file: vtt_content = vtt_file.read() return f"
{vtt_content}
" + def render_ner(mmif, view): metadata = view.metadata.contains.get(Uri.NE) ner_document = metadata.get('document') return displacy.visualize_ner(mmif, view, ner_document, current_app.root_path) + def prepare_and_render_ocr(mmif, view, viz_id): """ Prepares list of frames that will be passed back and forth between server and client, and renders the first page of the OCR. """ - vid_path = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[0].location_path() + vid_path = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[ + 0].location_path() ocr_frames = get_ocr_frames(view, mmif) @@ -198,6 +224,7 @@ def prepare_and_render_ocr(mmif, view, viz_id): save_json(frames_pages, view.id, viz_id) return render_ocr_page(viz_id, vid_path, view.id, 0) + def render_ocr_page(mmif_id, vid_path, view_id, page_number): """ Renders a single OCR page by iterating through frames and displaying the diff --git a/utils.py b/utils.py index 2c79dcb..6e7a362 100644 --- a/utils.py +++ b/utils.py @@ -1,332 +1,16 @@ -from collections import Counter -from datetime import timedelta -from io import StringIO - -from flask import Flask -from lapps.discriminators import Uri -from mmif import DocumentTypes from mmif.serialize.annotation import Text -from mmif.vocabulary import AnnotationTypes - +from flask import current_app import cache -import displacy -from iiif_utils import generate_iiif_manifest -from ocr import * - -# Get Properties from MMIF file --- - -# these two static folder-related params are important, do not remove -app = Flask(__name__, static_folder='static', static_url_path='') -app.secret_key = 'your_secret_key_here' - - -def asr_alignments_to_vtt(alignment_view, viz_id): - vtt_filename = cache.get_cache_root() / viz_id / f"{alignment_view.id.replace(':', '-')}.vtt" - if vtt_filename.exists(): - return str(vtt_filename) - vtt_file = open(vtt_filename, 'w') - vtt_file.write("WEBVTT\n\n") - annotations = alignment_view.annotations - timeframe_at_type = [at_type for at_type in alignment_view.metadata.contains if at_type.shortname == "TimeFrame"][0] - timeunit = alignment_view.metadata.contains[timeframe_at_type]["timeUnit"] - # TODO: wanted to use "mmif.get_alignments(AnnotationTypes.TimeFrame, Uri.TOKEN)" - # but that gave errors so I gave up on it - token_idx = {a.id: a for a in annotations if a.at_type.shortname == "Token"} - timeframe_idx = {a.id: a for a in annotations if a.at_type.shortname == "TimeFrame"} - alignments = [a for a in annotations if a.at_type.shortname == "Alignment"] - vtt_start = None - texts = [] - for alignment in alignments: - start_end_text = build_alignment(alignment, token_idx, timeframe_idx) - if start_end_text is not None: - # VTT specifically requires timestamps expressed in miliseconds and - # must be be in one of these formats: mm:ss.ttt or hh:mm:ss.ttt - # (https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API) - # ISO format can have up to 6 below the decimal point, on the other hand - # Assuming here that start and end are in miliseconds - start, end, text = start_end_text - start_kwarg, end_kwarg = {timeunit: float(start)}, {timeunit: float(end)} - start, end = timedelta(**start_kwarg), timedelta(**end_kwarg) - s_mins, s_secs = divmod(start.seconds, 60) - e_mins, e_secs = divmod(end.seconds, 60) - if not vtt_start: - vtt_start = f'{s_mins:02d}:{s_secs:02d}.{((s_secs - int(s_secs)) * 1000):03d}' - texts.append(text) - if len(texts) > 8: - vtt_end = f'{e_mins:02d}:{e_secs:02d}.{((e_secs - int(e_secs)) * 1000):03d}' - vtt_file.write(f'{vtt_start} --> {vtt_end}\n{" ".join(texts)}\n\n') - vtt_start = None - texts = [] - return vtt_file.name - - -def build_alignment(alignment, token_idx, timeframe_idx): - target = alignment.properties['target'] - source = alignment.properties['source'] - timeframe = timeframe_idx.get(source) - token = token_idx.get(target) - if timeframe and token: - start = timeframe.properties['start'] - end = timeframe.properties['end'] - text = token.properties['word'] - return start, end, text - - -def documents_to_htmls(mmif, viz_id): - """ - Returns a list of tuples, one for each element in the documents list of - the MMIF object, following the order in that list. Each tuple has four - elements: document type, document identifier, document path and the HTML - visualization. - """ - htmlized = [] - for document in mmif.documents: - doc_path = document.location_path() - app.logger.debug(f"MMIF on AV asset: {doc_path}") - doc_symlink_path = pathlib.Path(app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{doc_path.split('.')[-1]}") - os.symlink(doc_path, doc_symlink_path) - app.logger.debug(f"{doc_path} is symlinked to {doc_symlink_path}") - doc_symlink_rel_path = '/' + doc_symlink_path.relative_to(app.static_folder).as_posix() - app.logger.debug(f"and {doc_symlink_rel_path} will be used in HTML src attribute") - if document.at_type == DocumentTypes.TextDocument: - html = html_text(doc_symlink_rel_path) - elif document.at_type == DocumentTypes.VideoDocument: - fa_views = get_alignment_views(mmif) - fa_view = fa_views[0] if fa_views else None - html = html_video(viz_id, doc_symlink_rel_path, fa_view) - elif document.at_type == DocumentTypes.AudioDocument: - html = html_audio(doc_symlink_rel_path) - elif document.at_type == DocumentTypes.ImageDocument: - boxes = get_boxes(mmif) - html = html_img(doc_symlink_rel_path, boxes) - htmlized.append((document.at_type.shortname, document.id, doc_path, html)) - manifest_filename = generate_iiif_manifest(mmif, viz_id) - man = os.path.basename(manifest_filename) - temp = render_template("uv_player.html", manifest=man, mmif_id=viz_id) - htmlized.append(('UV', "", "", temp)) - return htmlized - - -def get_boxes(mmif): - # TODO: this gives you the last view with BoundingBoxes, should - # perhaps use get_views_contain() instead, should also select just - # the bounding boxes and add information from alignments to text - # documents. - tbox_view = mmif.get_view_contains(str(AnnotationTypes.BoundingBox)) - tbox_annotations = tbox_view.annotations - # For the boxes we pull some information from the annotation: the - # identifier, boxType and the (x,y,w,h) coordinates used by the - # Javascript code that draws the rectangle. - boxes = [] - for a in tbox_annotations: - coordinates = a.properties["coordinates"] - x = coordinates[0][0] - y = coordinates[0][1] - w = coordinates[1][0] - x - h = coordinates[2][1] - y - box = [a.properties["id"], a.properties["boxType"], [x, y, w, h]] - boxes.append(box) - return boxes - - -def prep_annotations(mmif, viz_id): - """Prepare annotations from the views, and return a list of pairs of tabname - and tab content. The first tab is alway the full MMIF pretty print.""" - tabs = [] - tabs.append(("Info", "
" + create_info(mmif) + "
")) - app.logger.debug(f"Prepared INFO Tab: {tabs[-1][0]}") - tabs.append(("Annotations", create_annotation_tables(mmif))) - app.logger.debug(f"Prepared SUMMARY Tab: {tabs[-1][0]}") - tabs.append(("Tree", render_interactive_mmif(mmif))) - app.logger.debug(f"Prepared JSTREE Tab: {tabs[-1][0]}") - # TODO: since this uses the same tab-name this will only show the same - # stuff; it does a loop but for now we assume there is just one file with - # alignments (generated by Kaldi) - for fa_view in get_alignment_views(mmif): - vtt_file = asr_alignments_to_vtt(fa_view, viz_id) - tabs.append(("WebVTT", '
' + open(vtt_file).read() + '
')) - app.logger.debug(f"Prepared a VTT Tab: {tabs[-1][0]}") - ner_views = get_ner_views(mmif) - use_id = True if len(ner_views) > 1 else False - for ner_view in ner_views: - if not ner_view.annotations: - continue - visualization = create_ner_visualization(mmif, ner_view) - tabname = "Entities-%s" % ner_view.id if use_id else "Entities" - tabs.append((tabname, visualization)) - app.logger.debug(f"Prepared a displaCy Tab: {tabs[-1][0]}") - # TODO: somewhat hackish - ocr_views = get_ocr_views(mmif) - use_id = True if len(ocr_views) > 1 else False - for ocr_view in ocr_views: - if not ocr_view.annotations: - continue - tabname = "Thumbnails-%s" % ocr_view.id - visualization = render_template("pre-ocr.html", view_id=ocr_view.id, tabname=tabname, mmif_id=viz_id) - tabs.append((tabname, visualization)) - app.logger.debug(f"Prepared a Thumbnails Tab: {tabs[-1][0]}") - return tabs - - -def create_info(mmif): - s = StringIO('Howdy') - for document in mmif.documents: - at_type = document.at_type.shortname - location = document.location - s.write("%s %s\n" % (at_type, location)) - s.write('\n') - for view in mmif.views: - app = view.metadata.app - status = get_status(view) - s.write('%s %s %s %d\n' % (view.id, app, status, len(view.annotations))) - if len(view.annotations) > 0: - s.write('\n') - types = Counter([a.at_type.shortname - for a in view.annotations]) - for attype, count in types.items(): - s.write(' %4d %s\n' % (count, attype)) - s.write('\n') - return s.getvalue() - - -def create_annotation_tables(mmif): - s = StringIO('Howdy') - for view in mmif.views: - status = get_status(view) - s.write('

%s %s %s %d annotations

\n' - % (view.id, view.metadata.app, status, len(view.annotations))) - s.write("
\n") - s.write("\n") - limit_len = lambda str: str[:500] + " . . . }" if len(str) > 500 else str - for annotation in view.annotations: - s.write(' \n') - s.write(' \n' % annotation.id) - s.write(' \n' % annotation.at_type.shortname) - s.write(' \n' % limit_len(get_properties(annotation))) - s.write(' \n') - s.write("
%s%s%s
\n") - s.write("
\n") - return s.getvalue() - - -def get_document_ids(view, annotation_type): - metadata = view.metadata.contains.get(annotation_type) - ids = set([metadata['document']]) if 'document' in metadata else set() - for annotation in view.annotations: - if annotation.at_type.shortname == str(annotation_type): - try: - ids.add(annotation.properties["document"]) - except KeyError: - pass - return list(ids) - - -def get_alignment_views(mmif): - """Return alignment views which have at least TextDocument, Token, TimeFrame and - Alignment annotations.""" - views = [] - needed_types = set(['TextDocument', 'Token', 'TimeFrame', 'Alignment']) - for view in mmif.views: - annotation_types = view.metadata.contains.keys() - annotation_types = [at.shortname for at in annotation_types] - if needed_types.issubset(annotation_types): - views.append(view) - return views - - -# Render documents as HTML ------------ - -def html_video(viz_id, vpath, vtt_srcview=None): - vpath = url2posix(vpath) - html = StringIO() - html.write('\n") - return html.getvalue() - - -def html_text(tpath): - """Return the content of the text document, but with some HTML tags added.""" - if not os.path.isfile(tpath): - raise FileNotFoundError(f"File not found: {tpath}") - with open(tpath) as t_file: - content = t_file.read().replace("\n", "
\n") - return f"{content}\n" - - -def html_img(ipath, boxes=None, id="imgCanvas"): - ipath = url2posix(ipath) - boxes = [] if boxes is None else boxes - return render_template('image.html', filename=ipath, boxes=boxes, id=id) - - -def html_audio(apath): - apath = url2posix(apath) - return f"" def url2posix(path): """For the visualizer we often want a POSIX path and not a URL so we strip off the protocol if there is one.""" - if path.startswith('file:///'): + if str(path).startswith('file:///'): path = path[7:] return path -# Interactive MMIF Tab ----------- - -def render_interactive_mmif(mmif): - return render_template('interactive.html', mmif=mmif, aligned_views=get_aligned_views(mmif)) - - -# Functions for checking if view can be rendered with alignment highlighting -def get_aligned_views(mmif): - """Return list of properly aligned views (for tree display)""" - aligned_views = [] - for view in mmif.views: - if any([at_type.shortname == "Alignment" for at_type in view.metadata.contains]): - if check_view_alignment(view.annotations) == True: - aligned_views.append(view.id) - return aligned_views - - -def check_view_alignment(annotations): - anno_stack = [] - for annotation in annotations: - if annotation.at_type.shortname == "Alignment": - anno_stack.insert(0, annotation.properties) - else: - anno_stack.append(annotation.id) - if len(anno_stack) == 3: - if type(anno_stack[0]) == str or not ( - anno_stack[0]["source"] in anno_stack and anno_stack[0]["target"] in anno_stack): - return False - anno_stack = [] - return True - - -# NER Tools ---------------------- - -def get_ner_views(mmif): - return [v for v in mmif.views if Uri.NE in v.metadata.contains] - - -def create_ner_visualization(mmif, view): - metadata = view.metadata.contains.get(Uri.NE) - try: - # all the view's named entities refer to the same text document (kaldi) - document_ids = get_document_ids(view, Uri.NE) - return displacy.visualize_ner(mmif, view, document_ids[0], app.root_path) - except KeyError as e: - # the view's entities refer to more than one text document (tessearct) - pass - - def get_status(view): return 'ERROR' if 'message' in view.metadata.error else 'OKAY' @@ -343,19 +27,90 @@ def get_properties(annotation): return '{ %s }' % ', '.join(props_list) -# OCR Tools ---------------------- +def get_abstract_view_type(view, mmif): + annotation_types = [a.shortname for a in view.metadata.contains.keys()] + if "NamedEntity" in annotation_types: + return "NER" + elif all([anno_type in annotation_types for anno_type in ["Token", "TimeFrame", "Alignment"]]): + return "ASR" + # Define an OCR view as one that refers to a video and doesn't contain Sentences + # or Tokens + else: + for configuration in view.metadata.contains.values(): + if "document" in configuration \ + and mmif.get_document_by_id(configuration["document"]).at_type.shortname == "VideoDocument": + if not any([anno_type in annotation_types for anno_type in ["Sentence", "Token"]]): + return "OCR" + + +def get_vtt_file(view, viz_id): + vtt_filename = cache.get_cache_root() / viz_id / \ + f"{view.id.replace(':', '-')}.vtt" + if not vtt_filename.exists(): + with open(vtt_filename, 'w') as vtt_file: + vtt_file.write(write_vtt(view, viz_id)) + return str(vtt_filename) + + +def write_vtt(view, viz_id): + vtt = "WEBVTT\n\n" + timeunit = "milliseconds" + for a in view.metadata.contains.values(): + if "timeUnit" in a: + timeunit = a["timeUnit"] + break + token_idx = { + a.id: a for a in view.annotations if a.at_type.shortname == "Token"} + timeframe_idx = { + a.id: a for a in view.annotations if a.at_type.shortname == "TimeFrame"} + alignments = [ + a for a in view.annotations if a.at_type.shortname == "Alignment"] + vtt_start = None + texts = [] + for alignment in alignments: + start_end_text = build_alignment(alignment, token_idx, timeframe_idx) + if start_end_text is None: + continue + start, end, text = start_end_text + if not vtt_start: + vtt_start = format_time(start, timeunit) + texts.append(text) + if len(texts) > 8: + vtt_end = format_time(end, timeunit) + vtt += f"{vtt_start} --> {vtt_end}\n{' '.join(texts)}\n\n" + vtt_start = None + texts = [] + return vtt -def prepare_ocr_visualization(mmif, view, mmif_id): - """ Visualize OCR by extracting image frames with BoundingBoxes from video""" - # frames, text_docs, alignments = {}, {}, {} - vid_path = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[0].location_path() - ocr_frames = get_ocr_frames(view, mmif) +def build_alignment(alignment, token_idx, timeframe_idx): + target = alignment.properties['target'] + source = alignment.properties['source'] + timeframe = timeframe_idx.get(source) + token = token_idx.get(target) + if timeframe and token: + start = timeframe.properties['start'] + end = timeframe.properties['end'] + text = token.properties['word'] + return start, end, text + - # Generate pages (necessary to reduce IO cost) and render - frames_list = [(k, vars(v)) for k, v in ocr_frames.items()] - frames_list = find_duplicates(frames_list) - frames_pages = paginate(frames_list) - # Save page list as temp file - save_json(frames_pages, view.id, mmif_id) - return render_ocr(mmif_id, vid_path, view.id, 0) +def format_time(time, unit): + """ + Formats a time in seconds as a string in the format "hh:mm:ss.fff" + VTT specifically requires timestamps expressed in miliseconds and + must be be in one of these formats: mm:ss.ttt or hh:mm:ss.ttt + (https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API) + ISO format can have up to 6 below the decimal point, on the other hand + """ + if unit == "seconds": + time_in_ms = int(time * 1000) + else: + time_in_ms = int(time) + hours = time_in_ms // (1000 * 60 * 60) + time_in_ms %= (1000 * 60 * 60) + minutes = time_in_ms // (1000 * 60) + time_in_ms %= (1000 * 60) + seconds = time_in_ms // 1000 + time_in_ms %= 1000 + return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{time_in_ms:03d}" From 773cff44754ad36bda4cf9ae0dbe11480865b8a4 Mon Sep 17 00:00:00 2001 From: Hayden McCormick Date: Sat, 8 Jun 2024 15:02:50 -0400 Subject: [PATCH 09/11] Migrate from function-based to class-based rendering --- app.py | 20 ++- ocr.py | 15 ++ render.py | 409 +++++++++++++++++++++++++++++++----------------------- 3 files changed, 262 insertions(+), 182 deletions(-) diff --git a/app.py b/app.py index 91b5149..75a19be 100644 --- a/app.py +++ b/app.py @@ -7,11 +7,12 @@ from flask import Flask, request, render_template, flash, send_from_directory, redirect from mmif.serialize import Mmif +from mmif.vocabulary import DocumentTypes import cache from cache import set_last_access, cleanup import traceback -from render import render_documents, render_annotations, prepare_and_render_ocr, render_ocr_page +from render import render_documents, render_annotations, prepare_ocr, render_ocr_page # these two static folder-related params are important, do not remove app = Flask(__name__, static_folder='static', static_url_path='') @@ -26,9 +27,11 @@ def index(): @app.route('/ocr', methods=['POST']) def ocr(): if "page_number" not in request.json: - return serve_first_ocr_page(request.json) - else: - return serve_ocr_page(request.json) + build_ocr_tab(request.json) + request.json["page_number"] = 0 + # return serve_first_ocr_page(request.json) + # else: + return serve_ocr_page(request.json) @app.route('/upload', methods=['GET', 'POST']) @@ -102,9 +105,9 @@ def render_mmif(mmif_str, viz_id): annotations=rendered_annotations) -def serve_first_ocr_page(data): +def build_ocr_tab(data): """ - Prepares OCR (at load time, due to lazy loading) and serves the first page + Prepares OCR (at load time, due to lazy loading) """ try: data = dict(request.json) @@ -112,7 +115,10 @@ def serve_first_ocr_page(data): data["mmif_id"] / "file.mmif").read() mmif = Mmif(mmif_str) ocr_view = mmif.get_view_by_id(data["view_id"]) - return prepare_and_render_ocr(mmif, ocr_view, data["mmif_id"]) + prepare_ocr(mmif, ocr_view, data["mmif_id"]) + request.json["vid_path"] = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[ + 0].location_path() + except Exception as e: app.logger.error(f"{e}\n{traceback.format_exc()}") return f'

Error: {e} Check the server log for more information.' diff --git a/ocr.py b/ocr.py index dc21f02..03a4012 100644 --- a/ocr.py +++ b/ocr.py @@ -127,6 +127,21 @@ def add_text_document(self, anno): [text_val] if text_val not in self.text else self.text +def prepare_ocr(mmif, view, viz_id): + """ + Prepares list of frames that will be passed back and forth between server + and client, and renders the first page of the OCR. + """ + ocr_frames = get_ocr_frames(view, mmif) + + # Generate pages (necessary to reduce IO cost) and render + frames_list = [(k, vars(v)) for k, v in ocr_frames.items()] + frames_list = find_duplicates(frames_list) + frames_pages = paginate(frames_list) + # Save page list as temp file + save_json(frames_pages, view.id, viz_id) + + def find_annotation(anno_id, mmif): if mmif.id_delimiter in anno_id: view_id, anno_id = anno_id.split(mmif.id_delimiter) diff --git a/render.py b/render.py index 2020550..13c769c 100644 --- a/render.py +++ b/render.py @@ -11,7 +11,7 @@ import traceback from utils import get_status, get_properties, get_abstract_view_type, url2posix, get_vtt_file -from ocr import get_ocr_frames, paginate, find_duplicates, save_json, make_image_directory, is_duplicate_image +from ocr import prepare_ocr, make_image_directory, is_duplicate_image import cv2 import json import tempfile @@ -22,7 +22,7 @@ Methods to render MMIF documents and their annotations in various formats. """ -# -- Documents -- +# -- Render methods -- def render_documents(mmif, viz_id): @@ -31,204 +31,263 @@ def render_documents(mmif, viz_id): """ tabs = [] for document in mmif.documents: - try: - # Add symbolic link to document to static folder, so it can be accessed - # by the browser. - doc_path = document.location_path() - doc_symlink_path = pathlib.Path( - current_app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{doc_path.split('.')[-1]}") - os.symlink(doc_path, doc_symlink_path) - doc_symlink_rel_path = '/' + \ - doc_symlink_path.relative_to( - current_app.static_folder).as_posix() + if document.at_type == DocumentTypes.TextDocument: + tabs.append(TextTab(document, viz_id)) + elif document.at_type == DocumentTypes.ImageDocument: + tabs.append(ImageTab(document, viz_id)) + elif document.at_type == DocumentTypes.AudioDocument: + tabs.append(AudioTab(document, viz_id)) + elif document.at_type == DocumentTypes.VideoDocument: + tabs.append(VideoTab(document, mmif, viz_id)) - if document.at_type == DocumentTypes.TextDocument: - html_tab = render_text(doc_path) - elif document.at_type == DocumentTypes.ImageDocument: - html_tab = render_image(doc_symlink_rel_path) - elif document.at_type == DocumentTypes.AudioDocument: - html_tab = render_audio(doc_symlink_rel_path) - elif document.at_type == DocumentTypes.VideoDocument: - html_tab = render_video(doc_symlink_rel_path, mmif, viz_id) - - tabs.append({"id": document.id, - "tab_name": document.at_type.shortname, - "html": html_tab}) - - except Exception: - tabs.append({"id": document.id, - "tab_name": document.at_type.shortname, - "html": f"Error rendering document:

{traceback.format_exc()}
"}) return tabs -def render_text(text_path): - """Return the content of the text document, but with some HTML tags added.""" - text_path = url2posix(text_path) - if not os.path.isfile(text_path): - raise FileNotFoundError(f"File not found: {text_path}") - with open(text_path) as t_file: - content = t_file.read().replace("\n", "
\n") - return f"{content}\n" - - -def render_image(img_path): - img_path = url2posix(img_path) - html = StringIO() - html.write( - f'Image\n') - return html.getvalue() - - -def render_audio(audio_path): - audio_path = url2posix(audio_path) - html = StringIO() - html.write('\n") - return html.getvalue() - - -def render_video(vid_path, mmif, viz_id): - vid_path = url2posix(vid_path) - html = StringIO() - html.write('\n") - return html.getvalue() - -# -- Annotations -- - - def render_annotations(mmif, viz_id): """ Returns HTML Tab representation of all annotations in the MMIF object. """ tabs = [] # These tabs should always be present - tabs.append({"id": "info", "tab_name": "Info", "html": render_info(mmif)}) - tabs.append({"id": "annotations", "tab_name": "Annotations", - "html": render_annotation_table(mmif)}) - tabs.append({"id": "tree", "tab_name": "Tree", - "html": render_jstree(mmif)}) + tabs.append(InfoTab(mmif)) + tabs.append(AnnotationTableTab(mmif)) + tabs.append(JSTreeTab(mmif)) # These tabs are optional for view in mmif.views: - try: - abstract_view_type = get_abstract_view_type(view, mmif) - # Workaround to deal with the fact that some apps have a version number in the URL - app_url = view.metadata.app if re.search( - r"\/v\d+\.?\d?$", view.metadata.app) else view.metadata.app + "/v1" - app_shortname = app_url.split("/")[-2] - if abstract_view_type == "NER": - tabs.append( - {"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_ner(mmif, view)}) - elif abstract_view_type == "ASR": - tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", - "html": render_asr_vtt(view, viz_id)}) - elif abstract_view_type == "OCR": - tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", - "html": prepare_and_render_ocr(mmif, view, viz_id)}) - - except Exception as e: - tabs.append({"id": view.id, - "tab_name": view.id, - "html": f"Error rendering annotations:

{traceback.format_exc()}
"}) + abstract_view_type = get_abstract_view_type(view, mmif) + if abstract_view_type == "NER": + tabs.append(NERTab(mmif, view)) + elif abstract_view_type == "ASR": + tabs.append(VTTTab(mmif, view, viz_id)) + elif abstract_view_type == "OCR": + tabs.append(OCRTab(mmif, view, viz_id)) return tabs -def render_info(mmif): - s = StringIO('Howdy') - s.write("
")
-    for document in mmif.documents:
-        at_type = document.at_type.shortname
-        location = document.location
-        s.write("%s  %s\n" % (at_type, location))
-    s.write('\n')
-    for view in mmif.views:
-        app = view.metadata.app
-        status = get_status(view)
-        s.write('%s  %s  %s  %d\n' %
-                (view.id, app, status, len(view.annotations)))
-        if len(view.annotations) > 0:
-            s.write('\n')
-            types = Counter([a.at_type.shortname
-                             for a in view.annotations])
-            for attype, count in types.items():
-                s.write('    %4d %s\n' % (count, attype))
-        s.write('\n')
-    s.write("
") - return s.getvalue() +# -- Base Tab Class -- +class DocumentTab(): + def __init__(self, document, viz_id): + self.id = document.id + self.tab_name = document.at_type.shortname + self.viz_id = viz_id -def render_annotation_table(mmif): - s = StringIO('Howdy') - for view in mmif.views: - status = get_status(view) - s.write('

%s %s %s %d annotations

\n' - % (view.id, view.metadata.app, status, len(view.annotations))) - s.write("
\n") - s.write("\n") - def limit_len(str): return str[:500] + \ - " . . . }" if len(str) > 500 else str - for annotation in view.annotations: - s.write(' \n') - s.write(' \n' % annotation.id) - s.write(' \n' % annotation.at_type.shortname) - s.write(' \n' % - limit_len(get_properties(annotation))) - s.write(' \n') - s.write("
%s%s%s
\n") - s.write("
\n") - return s.getvalue() - - -def render_jstree(mmif): - return render_template('interactive.html', mmif=mmif, aligned_views=[]) - - -def render_asr_vtt(view, viz_id): - vtt_filename = get_vtt_file(view, viz_id) - with open(vtt_filename) as vtt_file: - vtt_content = vtt_file.read() - return f"
{vtt_content}
" - - -def render_ner(mmif, view): - metadata = view.metadata.contains.get(Uri.NE) - ner_document = metadata.get('document') - return displacy.visualize_ner(mmif, view, ner_document, current_app.root_path) - - -def prepare_and_render_ocr(mmif, view, viz_id): - """ - Prepares list of frames that will be passed back and forth between server - and client, and renders the first page of the OCR. - """ - vid_path = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[ - 0].location_path() + try: + # Add symbolic link to document to static folder, so it can be accessed + # by the browser. + self.doc_path = document.location_path() + self.doc_symlink_path = pathlib.Path( + current_app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{self.doc_path.split('.')[-1]}") + os.symlink(self.doc_path, self.doc_symlink_path) + self.doc_symlink_rel_path = '/' + \ + self.doc_symlink_path.relative_to( + current_app.static_folder).as_posix() + + self.html = self.render() - ocr_frames = get_ocr_frames(view, mmif) + except Exception as e: + self.html = f"Error rendering document:

{traceback.format_exc()}
" + + def __str__(self): + return f"Tab: {self.tab_name} ({self.id})" + + +class AnnotationTab(): + def __init__(self, mmif, view=None): + self.mmif = mmif + # Some AnnotationTab sub-classes don't refer to a specific view, and so + # they specify their own ids and tab names. For ones that do refer to + # a specific view, we set the ids/tab names based on view properties. + if view: + self.view = view + # Workaround to deal with the fact that some apps have a version number + # in the URL + app_url = view.metadata.app if re.search( + r"\/v\d+\.?\d?$", view.metadata.app) else view.metadata.app + "/v1" + app_shortname = app_url.split("/")[-2] - # Generate pages (necessary to reduce IO cost) and render - frames_list = [(k, vars(v)) for k, v in ocr_frames.items()] - frames_list = find_duplicates(frames_list) - frames_pages = paginate(frames_list) - # Save page list as temp file - save_json(frames_pages, view.id, viz_id) - return render_ocr_page(viz_id, vid_path, view.id, 0) + self.id = view.id + self.tab_name = f"{app_shortname}-{view.id}" + try: + self.html = self.render() + except Exception as e: + self.html = f"Error rendering view:

{traceback.format_exc()}
" + + +# -- Document Classes -- + +class TextTab(DocumentTab): + def __init__(self, document, viz_id): + super().__init__(document, viz_id) + + def render(self): + with open(self.doc_path) as t_file: + content = t_file.read().replace("\n", "
\n") + return f"{content}\n" + + +class ImageTab(DocumentTab): + def __init__(self, document, viz_id): + super().__init__(document, viz_id) + + def render(self): + img_path = url2posix(self.doc_path) + html = StringIO() + html.write( + f'Image\n') + return html.getvalue() + + +class AudioTab(DocumentTab): + def __init__(self, document, viz_id): + super().__init__(document, viz_id) + + def render(self): + audio_path = url2posix(self.doc_symlink_rel_path) + html = StringIO() + html.write('\n") + return html.getvalue() + + +class VideoTab(DocumentTab): + def __init__(self, document, mmif, viz_id): + # VideoTab needs access to the MMIF object to get the VTT file + self.mmif = mmif + super().__init__(document, viz_id) + + def render(self): + vid_path = url2posix(self.doc_symlink_rel_path) + html = StringIO() + html.write('\n") + return html.getvalue() + + +# -- Annotation Classes -- + +class InfoTab(AnnotationTab): + def __init__(self, mmif): + self.id = "info" + self.tab_name = "Info" + super().__init__(mmif) + + def render(self): + mmif = self.mmif + s = StringIO('Howdy') + s.write("
")
+        for document in mmif.documents:
+            at_type = document.at_type.shortname
+            location = document.location
+            s.write("%s  %s\n" % (at_type, location))
+        s.write('\n')
+        for view in mmif.views:
+            app = view.metadata.app
+            status = get_status(view)
+            s.write('%s  %s  %s  %d\n' %
+                    (view.id, app, status, len(view.annotations)))
+            if len(view.annotations) > 0:
+                s.write('\n')
+                types = Counter([a.at_type.shortname
+                                for a in view.annotations])
+                for attype, count in types.items():
+                    s.write('    %4d %s\n' % (count, attype))
+            s.write('\n')
+        s.write("
") + return s.getvalue() + + +class AnnotationTableTab(AnnotationTab): + def __init__(self, mmif): + self.id = "annotations" + self.tab_name = "Annotations" + super().__init__(mmif) + + def render(self): + mmif = self.mmif + s = StringIO('Howdy') + for view in mmif.views: + status = get_status(view) + s.write('

%s %s %s %d annotations

\n' + % (view.id, view.metadata.app, status, len(view.annotations))) + s.write("
\n") + s.write("\n") + def limit_len(str): return str[:500] + \ + " . . . }" if len(str) > 500 else str + for annotation in view.annotations: + s.write(' \n') + s.write(' \n' % annotation.id) + s.write(' \n' % annotation.at_type.shortname) + s.write(' \n' % + limit_len(get_properties(annotation))) + s.write(' \n') + s.write("
%s%s%s
\n") + s.write("
\n") + return s.getvalue() + + +class JSTreeTab(AnnotationTab): + def __init__(self, mmif): + self.id = "tree" + self.tab_name = "Tree" + super().__init__(mmif) + + def render(self): + mmif = self.mmif + return render_template('interactive.html', mmif=mmif, aligned_views=[]) + + +class NERTab(AnnotationTab): + def __init__(self, mmif, view): + super().__init__(mmif, view) + + def render(self): + metadata = self.view.metadata.contains.get(Uri.NE) + ner_document = metadata.get('document') + return displacy.visualize_ner(self.mmif, self.view, ner_document, current_app.root_path) + + +class VTTTab(AnnotationTab): + def __init__(self, mmif, view, viz_id): + self.viz_id = viz_id + super().__init__(mmif, view) + + def render(self): + vtt_filename = get_vtt_file(self.view, self.viz_id) + with open(vtt_filename) as vtt_file: + vtt_content = vtt_file.read() + return f"
{vtt_content}
" + + +class OCRTab(AnnotationTab): + def __init__(self, mmif, view, viz_id): + self.viz_id = viz_id + self.vid_path = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[ + 0].location_path() + + super().__init__(mmif, view) + + def render(self): + return render_template("pre-ocr.html", view_id=self.view.id, tabname=self.tab_name, mmif_id=self.viz_id) + # prepare_ocr(self.mmif, self.view, self.viz_id) + # return render_ocr_page(self.viz_id, self.vid_path, self.view.id, 0) def render_ocr_page(mmif_id, vid_path, view_id, page_number): """ Renders a single OCR page by iterating through frames and displaying the - contents/alignments. + contents/alignments. Note: this needs to be a separate function (not a method + in OCRTab) because it is called by the server when the page is changed. """ # Path for storing temporary images generated by cv2 cv2_vid = cv2.VideoCapture(vid_path) From 1b7d9384a2fe1c19d3798e3fde90efa17c4b64f7 Mon Sep 17 00:00:00 2001 From: Hayden McCormick Date: Tue, 18 Jun 2024 16:52:58 -0400 Subject: [PATCH 10/11] Add better support for different types of OCR views --- ocr.py | 27 ++------------------------- render.py | 2 +- templates/ocr.html | 2 +- templates/tab-placeholder.html | 33 +++++++++++++++++++++++++++++++++ utils.py | 21 +++++++++++++-------- 5 files changed, 50 insertions(+), 35 deletions(-) create mode 100644 templates/tab-placeholder.html diff --git a/ocr.py b/ocr.py index dc21f02..6d8743d 100644 --- a/ocr.py +++ b/ocr.py @@ -193,9 +193,9 @@ def paginate(frames_list): return {i: page for (i, page) in enumerate(pages)} -def make_image_directory(mmif_id): +def make_image_directory(mmif_id, view_id): # Make path for temp OCR image files or clear image files if it exists - path = cache.get_cache_root() / mmif_id / "img" + path = cache.get_cache_root() / mmif_id / "img" / view_id if os.path.exists(path): shutil.rmtree(path) os.makedirs(path) @@ -267,29 +267,6 @@ def round_boxes(boxes): return rounded_boxes -def get_ocr_views(mmif): - """Returns all CV views, which contain timeframes or bounding boxes""" - views = [] - required_types = ["TimeFrame", "BoundingBox", "TimePoint"] - for view in mmif.views: - for anno_type, anno in view.metadata.contains.items(): - # Annotation belongs to a CV view if it is a TimeFrame/BB and it refers to a VideoDocument - # if anno.get("document") is None: - # continue - # if anno_type.shortname in required_types and mmif.get_document_by_id( - # anno["document"]).at_type.shortname == "VideoDocument": - # views.append(view) - # continue - if anno_type.shortname in required_types: - views.append(view) - break - # TODO: Couldn't find a simple way to show if an alignment view is a CV/Frames-type view - elif "parseq" in view.metadata.app: - views.append(view) - break - return views - - def save_json(data, view_id, mmif_id): path = cache.get_cache_root() / mmif_id / f"{view_id}-pages.json" with open(path, 'w') as f: diff --git a/render.py b/render.py index 2020550..45d6e69 100644 --- a/render.py +++ b/render.py @@ -236,7 +236,7 @@ def render_ocr_page(mmif_id, vid_path, view_id, page_number): thumbnail_pages = json.load(open(tn_data_fname)) page = thumbnail_pages[str(page_number)] prev_frame_cap = None - path = make_image_directory(mmif_id) + path = make_image_directory(mmif_id, view_id) for frame_num, frame in page: # If index is range instead of frame... if frame.get("range"): diff --git a/templates/ocr.html b/templates/ocr.html index 8d75c5e..9bf370f 100644 --- a/templates/ocr.html +++ b/templates/ocr.html @@ -1,7 +1,7 @@
{% for frame_num, frame in page %} - {% set filename = "/mmif-viz-cache/" + mmif_id + "/img/" + frame["id"] %} + {% set filename = "/mmif-viz-cache/" + mmif_id + "/img/" + view_id + "/" + frame["id"] %} {% set id = frame["id"] %} {% set boxes = frame["boxes"] %} {% set secs = frame["secs"] %} diff --git a/templates/tab-placeholder.html b/templates/tab-placeholder.html new file mode 100644 index 0000000..c2bf537 --- /dev/null +++ b/templates/tab-placeholder.html @@ -0,0 +1,33 @@ + + +
+
+
+ + \ No newline at end of file diff --git a/utils.py b/utils.py index 6e7a362..fd42011 100644 --- a/utils.py +++ b/utils.py @@ -33,16 +33,21 @@ def get_abstract_view_type(view, mmif): return "NER" elif all([anno_type in annotation_types for anno_type in ["Token", "TimeFrame", "Alignment"]]): return "ASR" + ocr_apps = ["swt-detection", "doctr-wrapper", "pyscenedetect-wrapper", "easyocr-wrapper", + "slatedetection", "fewshotclassifier", "barsdetection", "east-textdetection", + "parseqocr-wrapper", "tesseractocr-wrapper", "chyron-detection", "paddleocr-wrapper"] + if any([app in view.metadata.app for app in ocr_apps]): + return "OCR" # Define an OCR view as one that refers to a video and doesn't contain Sentences # or Tokens - else: - for configuration in view.metadata.contains.values(): - if "document" in configuration \ - and mmif.get_document_by_id(configuration["document"]).at_type.shortname == "VideoDocument": - if not any([anno_type in annotation_types for anno_type in ["Sentence", "Token"]]): - return "OCR" - - + # else: + # for configuration in view.metadata.contains.values(): + # if "document" in configuration \ + # and mmif.get_document_by_id(configuration["document"]).at_type.shortname == "VideoDocument": + # if not any([anno_type in annotation_types for anno_type in ["Sentence", "Token"]]): + # return "OCR" + + def get_vtt_file(view, viz_id): vtt_filename = cache.get_cache_root() / viz_id / \ f"{view.id.replace(':', '-')}.vtt" From 6e216614ca4da707739b05979ee46f194888c1ae Mon Sep 17 00:00:00 2001 From: Hayden McCormick Date: Tue, 25 Jun 2024 15:08:05 -0400 Subject: [PATCH 11/11] Resolve further merge bugs --- app.py | 9 +-------- ocr.py | 7 ++++--- utils.py | 2 +- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/app.py b/app.py index 1e95b35..a92f92c 100644 --- a/app.py +++ b/app.py @@ -148,13 +148,7 @@ def upload_file(in_mmif): with open(path / 'file.mmif', 'w') as in_mmif_file: app.logger.debug(f"Writing original MMIF to {path / 'file.mmif'}") in_mmif_file.write(in_mmif_str) - mmif = Mmif(in_mmif_str) - htmlized_docs = utils.documents_to_htmls(mmif, viz_id) - app.logger.debug(f"Prepared document: {[d[0] for d in htmlized_docs]}") - annotations = utils.prep_annotations(mmif, viz_id) - app.logger.debug(f"Prepared Annotations: {[annotation[0] for annotation in annotations]}") - html_page = render_template('player.html', - docs=htmlized_docs, viz_id=viz_id, annotations=annotations) + html_page = render_mmif(in_mmif_str, viz_id) with open(os.path.join(path, "index.html"), "w") as f: f.write(html_page) except FileExistsError: @@ -164,7 +158,6 @@ def upload_file(in_mmif): t = Thread(target=cleanup) t.daemon = True t.run() - agent = request.headers.get('User-Agent') if 'curl' in agent.lower(): return f"Visualization ID is {viz_id}\nYou can access the visualized file at {request.url_root}display/{viz_id}\n" diff --git a/ocr.py b/ocr.py index 6b2599c..dc1bbc6 100644 --- a/ocr.py +++ b/ocr.py @@ -5,6 +5,8 @@ import re import os import shutil +from mmif.vocabulary.annotation_types import AnnotationTypes +from mmif.vocabulary.document_types import DocumentTypes from mmif.utils.video_document_helper import convert_timepoint, convert_timeframe @@ -54,9 +56,8 @@ def update(self, anno, mmif): def add_bounding_box(self, anno, mmif): if "timePoint" in anno.properties: - timepoint_anno = find_annotation( - anno.properties["timePoint"], mmif) - + timepoint_anno = mmif[anno.get("timePoint")] + if timepoint_anno: self.add_timepoint(timepoint_anno, mmif, skip_if_view_has_frames=False) diff --git a/utils.py b/utils.py index fd42011..5f44679 100644 --- a/utils.py +++ b/utils.py @@ -11,7 +11,7 @@ def url2posix(path): return path -def get_status(view): +def get_status(view): return 'ERROR' if 'message' in view.metadata.error else 'OKAY'