Skip to content

Commit 24e9e61

Browse files
Mostly implemented refactor with new code structure, minus OCR functionality
1 parent 2038f94 commit 24e9e61

File tree

7 files changed

+284
-25
lines changed

7 files changed

+284
-25
lines changed

app.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,18 @@
44
import sys
55
from threading import Thread
66

7-
from flask import request, render_template, flash, send_from_directory, redirect
7+
from flask import Flask, request, render_template, flash, send_from_directory, redirect
88
from mmif.serialize import Mmif
99

1010
import cache
1111
from cache import set_last_access, cleanup
12-
from utils import app, render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization
12+
from utils import render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization
1313
import traceback
14+
from render import render_documents, render_annotations
15+
16+
# these two static folder-related params are important, do not remove
17+
app = Flask(__name__, static_folder='static', static_url_path='')
18+
app.secret_key = 'your_secret_key_here'
1419

1520

1621
@app.route('/')
@@ -103,13 +108,12 @@ def send_js(path):
103108

104109
def render_mmif(mmif_str, viz_id):
105110
mmif = Mmif(mmif_str)
106-
htmlized_docs = documents_to_htmls(mmif, viz_id)
107-
app.logger.debug(f"Prepared document: {[d[0] for d in htmlized_docs]}")
108-
annotations = prep_annotations(mmif, viz_id)
109-
app.logger.debug(f"Prepared Annotations: {[annotation[0] for annotation in annotations]}")
111+
rendered_documents = render_documents(mmif, viz_id)
112+
rendered_annotations = render_annotations(mmif, viz_id)
110113
return render_template('player.html',
111-
docs=htmlized_docs, viz_id=viz_id, annotations=annotations)
112-
114+
docs=rendered_documents,
115+
viz_id=viz_id,
116+
annotations=rendered_annotations)
113117

114118
def upload_file(in_mmif):
115119
# Save file locally

helpers.py

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
from mmif.serialize.annotation import Text
2+
from flask import current_app
3+
import cache
4+
5+
def url2posix(path):
6+
"""For the visualizer we often want a POSIX path and not a URL so we strip off
7+
the protocol if there is one."""
8+
if path.startswith('file:///'):
9+
path = path[7:]
10+
return path
11+
12+
13+
def get_doc_path(document):
14+
doc_path = document.location_path()
15+
return doc_path
16+
# app.logger.debug(f"MMIF on AV asset: {doc_path}")
17+
# doc_symlink_path = pathlib.Path(app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{doc_path.split('.')[-1]}")
18+
# os.symlink(doc_path, doc_symlink_path)
19+
# app.logger.debug(f"{doc_path} is symlinked to {doc_symlink_path}")
20+
# doc_symlink_rel_path = '/' + doc_symlink_path.relative_to(app.static_folder).as_posix()
21+
# app.logger.debug(f"and {doc_symlink_rel_path} will be used in HTML src attribute")
22+
23+
24+
def get_status(view):
25+
return 'ERROR' if 'message' in view.metadata.error else 'OKAY'
26+
27+
28+
def get_properties(annotation):
29+
props = annotation.properties._serialize()
30+
props.pop('id')
31+
props_list = []
32+
for prop in sorted(props):
33+
val = props[prop]
34+
if type(val) == Text:
35+
val = val.value
36+
props_list.append("%s=%s" % (prop, val))
37+
return '{ %s }' % ', '.join(props_list)
38+
39+
40+
def get_abstract_view_type(view):
41+
annotation_types = [a.shortname for a in view.metadata.contains.keys()]
42+
if "NamedEntity" in annotation_types:
43+
return "NER"
44+
elif all([anno_type in annotation_types for anno_type in ["Token", "TimeFrame", "Alignment"]]):
45+
return "ASR"
46+
47+
48+
def get_vtt_file(view, viz_id):
49+
vtt_filename = cache.get_cache_root() / viz_id / f"{view.id.replace(':', '-')}.vtt"
50+
if not vtt_filename.exists():
51+
with open(vtt_filename, 'w') as vtt_file:
52+
vtt_file.write(write_vtt(view, viz_id))
53+
return str(vtt_filename)
54+
55+
56+
def write_vtt(view, viz_id):
57+
vtt = "WEBVTT\n\n"
58+
token_idx = {a.id: a for a in view.annotations if a.at_type.shortname == "Token"}
59+
timeframe_idx = {a.id: a for a in view.annotations if a.at_type.shortname == "TimeFrame"}
60+
alignments = [a for a in view.annotations if a.at_type.shortname == "Alignment"]
61+
vtt_start = None
62+
texts = []
63+
for alignment in alignments:
64+
start_end_text = build_alignment(alignment, token_idx, timeframe_idx)
65+
if start_end_text is None:
66+
continue
67+
start, end, text = start_end_text
68+
if not vtt_start:
69+
vtt_start = format_time(start)
70+
texts.append(text)
71+
if len(texts) > 8:
72+
vtt_end = format_time(end)
73+
vtt += f"{vtt_start} --> {vtt_end}\n{' '.join(texts)}\n\n"
74+
vtt_start = None
75+
texts = []
76+
return vtt
77+
78+
79+
def build_alignment(alignment, token_idx, timeframe_idx):
80+
target = alignment.properties['target']
81+
source = alignment.properties['source']
82+
timeframe = timeframe_idx.get(source)
83+
token = token_idx.get(target)
84+
if timeframe and token:
85+
start = timeframe.properties['start']
86+
end = timeframe.properties['end']
87+
text = token.properties['word']
88+
return start, end, text
89+
90+
91+
def format_time(time_in_ms):
92+
"""
93+
Formats a time in seconds as a string in the format "hh:mm:ss.fff"
94+
VTT specifically requires timestamps expressed in miliseconds and
95+
must be be in one of these formats: mm:ss.ttt or hh:mm:ss.ttt
96+
(https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API)
97+
ISO format can have up to 6 below the decimal point, on the other hand
98+
"""
99+
hours = time_in_ms // (1000 * 60 * 60)
100+
time_in_ms %= (1000 * 60 * 60)
101+
minutes = time_in_ms // (1000 * 60)
102+
time_in_ms %= (1000 * 60)
103+
seconds = time_in_ms // 1000
104+
time_in_ms %= 1000
105+
return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{time_in_ms:03d}"

ocr.py

-1
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ def get_ocr_frames(view, mmif):
164164
frames[i].update(annotation, mmif)
165165
else:
166166
frames[i] = frame
167-
print(frames)
168167
return frames
169168

170169

render.py

+153
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import os
2+
import pathlib
3+
import shutil
4+
import tempfile
5+
import threading
6+
import time
7+
from io import StringIO
8+
from collections import Counter
9+
from flask import render_template, current_app
10+
11+
from mmif import DocumentTypes
12+
from mmif.serialize.annotation import Text
13+
from mmif.vocabulary import AnnotationTypes
14+
from lapps.discriminators import Uri
15+
import displacy
16+
17+
from helpers import *
18+
19+
import cache
20+
21+
"""
22+
Methods to render MMIF documents and their annotations in various formats.
23+
"""
24+
25+
# -- Documents --
26+
27+
def render_documents(mmif, viz_id):
28+
"""
29+
Returns HTML Tab representation of all documents in the MMIF object.
30+
"""
31+
tabs = []
32+
for document in mmif.documents:
33+
doc_path = get_doc_path(document)
34+
if document.at_type == DocumentTypes.TextDocument:
35+
html_tab = render_text(doc_path)
36+
elif document.at_type == DocumentTypes.ImageDocument:
37+
html_tab = render_image(doc_path)
38+
elif document.at_type == DocumentTypes.AudioDocument:
39+
html_tab = render_audio(doc_path)
40+
elif document.at_type == DocumentTypes.VideoDocument:
41+
html_tab = render_video(doc_path, mmif, viz_id)
42+
43+
tabs.append({"id": document.id,
44+
"tab_name": document.at_type.shortname,
45+
"html": html_tab})
46+
return tabs
47+
48+
def render_text(text_path):
49+
"""Return the content of the text document, but with some HTML tags added."""
50+
if not os.path.isfile(text_path):
51+
raise FileNotFoundError(f"File not found: {text_path}")
52+
with open(text_path) as t_file:
53+
content = t_file.read().replace("\n", "<br/>\n")
54+
return f"{content}\n"
55+
56+
def render_image(img_path):
57+
return ""
58+
59+
def render_audio(audio_path):
60+
return ""
61+
62+
def render_video(vid_path, mmif, viz_id):
63+
vid_path = url2posix(vid_path)
64+
html = StringIO()
65+
html.write('<video id="vid" controls crossorigin="anonymous" >\n')
66+
html.write(f' <source src=\"{vid_path}\">\n')
67+
for view in mmif.views:
68+
if get_abstract_view_type(view) == "ASR":
69+
vtt_path = get_vtt_file(view, viz_id)
70+
rel_vtt_path = vtt_path[(len("/tmp/") + len(current_app.static_folder)):]
71+
html.write(f' <track kind="captions" srclang="en" src="/{rel_vtt_path}" label="transcript" default/>\n')
72+
html.write("</video>\n")
73+
return html.getvalue()
74+
75+
# -- Annotations --
76+
77+
def render_annotations(mmif, viz_id):
78+
"""
79+
Returns HTML Tab representation of all annotations in the MMIF object.
80+
"""
81+
tabs = []
82+
# These tabs should always be present
83+
tabs.append({"id": "info", "tab_name": "Info", "html": render_info(mmif)})
84+
tabs.append({"id": "annotations", "tab_name": "Annotations", "html": render_annotation_table(mmif)})
85+
tabs.append({"id": "tree", "tab_name": "Tree", "html": render_jstree(mmif)})
86+
# These tabs are optional
87+
for view in mmif.views:
88+
abstract_view_type = get_abstract_view_type(view)
89+
app_shortname = view.metadata.app.split("/")[-2]
90+
if abstract_view_type == "NER":
91+
tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_ner(mmif, view)})
92+
elif abstract_view_type == "ASR":
93+
tabs.append({"id": view.id, "tab_name": f"{app_shortname}-{view.id}", "html": render_asr_vtt(view, viz_id)})
94+
return tabs
95+
96+
def render_info(mmif):
97+
s = StringIO('Howdy')
98+
s.write("<pre>")
99+
for document in mmif.documents:
100+
at_type = document.at_type.shortname
101+
location = document.location
102+
s.write("%s %s\n" % (at_type, location))
103+
s.write('\n')
104+
for view in mmif.views:
105+
app = view.metadata.app
106+
status = get_status(view)
107+
s.write('%s %s %s %d\n' % (view.id, app, status, len(view.annotations)))
108+
if len(view.annotations) > 0:
109+
s.write('\n')
110+
types = Counter([a.at_type.shortname
111+
for a in view.annotations])
112+
for attype, count in types.items():
113+
s.write(' %4d %s\n' % (count, attype))
114+
s.write('\n')
115+
s.write("</pre>")
116+
return s.getvalue()
117+
118+
119+
def render_annotation_table(mmif):
120+
s = StringIO('Howdy')
121+
for view in mmif.views:
122+
status = get_status(view)
123+
s.write('<p><b>%s %s</b> %s %d annotations</p>\n'
124+
% (view.id, view.metadata.app, status, len(view.annotations)))
125+
s.write("<blockquote>\n")
126+
s.write("<table cellspacing=0 cellpadding=5 border=1>\n")
127+
limit_len = lambda str: str[:500] + " . . . }" if len(str) > 500 else str
128+
for annotation in view.annotations:
129+
s.write(' <tr>\n')
130+
s.write(' <td>%s</td>\n' % annotation.id)
131+
s.write(' <td>%s</td>\n' % annotation.at_type.shortname)
132+
s.write(' <td>%s</td>\n' % limit_len(get_properties(annotation)))
133+
s.write(' </tr>\n')
134+
s.write("</table>\n")
135+
s.write("</blockquote>\n")
136+
return s.getvalue()
137+
138+
def render_jstree(mmif):
139+
return render_template('interactive.html', mmif=mmif, aligned_views=[])
140+
141+
def render_asr_vtt(view, viz_id):
142+
vtt_filename = get_vtt_file(view, viz_id)
143+
with open(vtt_filename) as vtt_file:
144+
vtt_content = vtt_file.read()
145+
return f"<pre>{vtt_content}</pre>"
146+
147+
def render_ner(mmif, view):
148+
metadata = view.metadata.contains.get(Uri.NE)
149+
ner_document = metadata.get('document')
150+
return displacy.visualize_ner(mmif, view, ner_document, current_app.root_path)
151+
152+
def render_ocr():
153+
pass

start_visualizer.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,5 @@ else
2727
fi
2828
# Start visualizer
2929
$container_engine build . -f Containerfile -t clams-mmif-visualizer
30-
$container_engine run -d --name clams-mmif-visualizer --rm -p 5001:5000 -e PYTHONUNBUFFERED=1 -v $datadir:$mountdir -v $datadir:/app/static/$mountdir clams-mmif-visualizer
30+
$container_engine run --name clams-mmif-visualizer --rm -p 5001:5000 -e PYTHONUNBUFFERED=1 -v $datadir:$mountdir -v $datadir:/app/static/$mountdir clams-mmif-visualizer
3131
echo "MMIF Visualizer is running in the background and can be accessed at http://localhost:5001/. To shut it down, run '$container_engine kill clams-mmif-visualizer'"

templates/player.html

+13-13
Original file line numberDiff line numberDiff line change
@@ -117,27 +117,27 @@ <h1 class="title">Visualizing MMIF</h1>
117117
<ul class="nav nav-tabs">
118118
<!-- printing the first one out of the loop so it can be made the active link -->
119119
<li class="nav-item">
120-
<a class="nav-link active" data-toggle="tab" href="#{{ docs[0][0] }}">{{ docs[0][0] }}</a>
120+
<a class="nav-link active" data-toggle="tab" href="#{{ docs[0]['tab_name'] }}">{{ docs[0]['tab_name'] }}</a>
121121
</li>
122122
{% for medium in docs[1:] %}
123-
<li class="nav-item {{medium[0]}}">
124-
<a class="nav-link" data-toggle="tab" href="#{{ medium[0] }}">{{ medium[0] }}</a>
123+
<li class="nav-item {{medium['tab_name']}}">
124+
<a class="nav-link" data-toggle="tab" href="#{{ medium['tab_name'] }}">{{ medium['tab_name'] }}</a>
125125
</li>
126126
{% endfor %}
127127
</ul>
128128

129129
<!-- contents of the documents -->
130130
<div class="tab-content">
131-
<div id="{{ docs[0][0] }}" class="tab-pane fade show active">
131+
<div id="{{ docs[0]['tab_name'] }}" class="tab-pane fade show active">
132132
<br/>
133-
<p>{{ docs[0][2] }}</p>
134-
{{ docs[0][3] | safe }}
133+
<!-- <p>{{ docs[0][2] }}</p> -->
134+
{{ docs[0]['html'] | safe }}
135135
</div>
136136
{% for medium in docs[1:] %}
137-
<div id="{{ medium[0] }}" class="tab-pane fade">
137+
<div id="{{ medium['tab_name'] }}" class="tab-pane fade">
138138
<br/>
139-
<p>{{ medium[2] }}</p>
140-
{{ medium[3] | safe }}
139+
<!-- <p>{{ medium[2] }}</p> -->
140+
{{ medium['html'] | safe }}
141141
</div>
142142
{% endfor %}
143143
</div>
@@ -148,18 +148,18 @@ <h1 class="title">Visualizing MMIF</h1>
148148
<!-- navigation tabs for the visualizations (WebVTT, Entities, etcetera) -->
149149
<ul class="nav nav-tabs">
150150
{% for annotation in annotations %}
151-
<li class="nav-item {{ annotation[0] }}">
152-
<a class="nav-link" data-toggle="tab" href="#{{ annotation[0] }}">{{ annotation[0] }}</a>
151+
<li class="nav-item {{ annotation['tab_name'] }}">
152+
<a class="nav-link" data-toggle="tab" href="#{{ annotation['tab_name'] }}">{{ annotation['tab_name'] }}</a>
153153
</li>
154154
{% endfor %}
155155
</ul>
156156

157157
<!-- visualization content -->
158158
<div class="tab-content">
159159
{% for annotation in annotations %}
160-
<div id="{{ annotation[0] }}" class="tab-pane fade">
160+
<div id="{{ annotation['tab_name'] }}" class="tab-pane fade">
161161
<br/>
162-
{{ annotation[1] | safe }}
162+
{{ annotation['html'] | safe }}
163163
</div>
164164
{% endfor %}
165165
</div>

utils.py

-2
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,6 @@ def prep_annotations(mmif, viz_id):
135135
tabs = []
136136
tabs.append(("Info", "<pre>" + create_info(mmif) + "</pre>"))
137137
app.logger.debug(f"Prepared INFO Tab: {tabs[-1][0]}")
138-
# tabs.append(("MMIF", "<pre>" + mmif.serialize(pretty=True) + "</pre>"))
139-
# app.logger.debug(f"Prepared RAW Tab: {tabs[-1][0]}")
140138
tabs.append(("Annotations", create_annotation_tables(mmif)))
141139
app.logger.debug(f"Prepared SUMMARY Tab: {tabs[-1][0]}")
142140
tabs.append(("Tree", render_interactive_mmif(mmif)))

0 commit comments

Comments
 (0)