Skip to content

Commit 664b895

Browse files
Merge refactor
2 parents 1be0ee9 + 6778625 commit 664b895

File tree

11 files changed

+590
-521
lines changed

11 files changed

+590
-521
lines changed

Diff for: .gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,6 @@ tags
7373

7474
# VSCode
7575
.devcontainer
76-
devcontainer.json
76+
devcontainer.json
77+
78+
static/mmif-viz-cache

Diff for: app.py

+58-33
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,20 @@
33
import secrets
44
import sys
55
from threading import Thread
6+
from shutil import rmtree
67

7-
from flask import request, render_template, flash, send_from_directory, redirect
8+
from flask import Flask, request, render_template, flash, send_from_directory, redirect
89
from mmif.serialize import Mmif
10+
from mmif.vocabulary import DocumentTypes
911

1012
import cache
1113
from cache import set_last_access, cleanup
12-
from utils import app, render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization
1314
import traceback
14-
import utils
15-
from utils import app
15+
from render import render_documents, render_annotations, prepare_ocr, render_ocr_page
16+
17+
# these two static folder-related params are important, do not remove
18+
app = Flask(__name__, static_folder='static', static_url_path='')
19+
app.secret_key = 'your_secret_key_here'
1620

1721

1822
@app.route('/')
@@ -22,24 +26,12 @@ def index():
2226

2327
@app.route('/ocr', methods=['POST'])
2428
def ocr():
25-
try:
26-
data = dict(request.json)
27-
mmif_str = open(cache.get_cache_root() / data["mmif_id"] / "file.mmif").read()
28-
mmif = Mmif(mmif_str)
29-
ocr_view = mmif.get_view_by_id(data["view_id"])
30-
return utils.prepare_ocr_visualization(mmif, ocr_view, data["mmif_id"])
31-
except Exception as e:
32-
app.logger.error(f"{e}\n{traceback.format_exc()}")
33-
return f'<p class="error">Error: {e} Check the server log for more information.</h1>'
34-
35-
36-
@app.route('/ocrpage', methods=['POST'])
37-
def ocrpage():
38-
data = request.json
39-
try:
40-
return utils.render_ocr(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
41-
except Exception as e:
42-
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
29+
if "page_number" not in request.json:
30+
build_ocr_tab(request.json)
31+
request.json["page_number"] = 0
32+
# return serve_first_ocr_page(request.json)
33+
# else:
34+
return serve_ocr_page(request.json)
4335

4436

4537
@app.route('/upload', methods=['GET', 'POST'])
@@ -93,7 +85,7 @@ def display(viz_id):
9385
return html_file
9486
else:
9587
app.logger.debug(f"Visualization {viz_id} not found in cache.")
96-
os.remove(path)
88+
rmtree(path)
9789
flash("File not found -- please upload again (it may have been deleted to clear up cache space).")
9890
return redirect("/upload")
9991

@@ -103,6 +95,45 @@ def send_js(path):
10395
return send_from_directory("uv", path)
10496

10597

98+
def render_mmif(mmif_str, viz_id):
99+
mmif = Mmif(mmif_str)
100+
rendered_documents = render_documents(mmif, viz_id)
101+
rendered_annotations = render_annotations(mmif, viz_id)
102+
return render_template('player.html',
103+
docs=rendered_documents,
104+
viz_id=viz_id,
105+
annotations=rendered_annotations)
106+
107+
108+
def build_ocr_tab(data):
109+
"""
110+
Prepares OCR (at load time, due to lazy loading)
111+
"""
112+
try:
113+
data = dict(request.json)
114+
mmif_str = open(cache.get_cache_root() /
115+
data["mmif_id"] / "file.mmif").read()
116+
mmif = Mmif(mmif_str)
117+
ocr_view = mmif.get_view_by_id(data["view_id"])
118+
prepare_ocr(mmif, ocr_view, data["mmif_id"])
119+
request.json["vid_path"] = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[
120+
0].location_path()
121+
122+
except Exception as e:
123+
app.logger.error(f"{e}\n{traceback.format_exc()}")
124+
return f'<p class="error">Error: {e} Check the server log for more information.</h1>'
125+
126+
127+
def serve_ocr_page(data):
128+
"""
129+
Serves subsequent OCR pages
130+
"""
131+
try:
132+
return render_ocr_page(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
133+
except Exception as e:
134+
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
135+
136+
106137
def upload_file(in_mmif):
107138
# Save file locally
108139
in_mmif_bytes = in_mmif if isinstance(in_mmif, bytes) else in_mmif.read()
@@ -117,13 +148,7 @@ def upload_file(in_mmif):
117148
with open(path / 'file.mmif', 'w') as in_mmif_file:
118149
app.logger.debug(f"Writing original MMIF to {path / 'file.mmif'}")
119150
in_mmif_file.write(in_mmif_str)
120-
mmif = Mmif(in_mmif_str)
121-
htmlized_docs = utils.documents_to_htmls(mmif, viz_id)
122-
app.logger.debug(f"Prepared document: {[d[0] for d in htmlized_docs]}")
123-
annotations = utils.prep_annotations(mmif, viz_id)
124-
app.logger.debug(f"Prepared Annotations: {[annotation[0] for annotation in annotations]}")
125-
html_page = render_template('player.html',
126-
docs=htmlized_docs, viz_id=viz_id, annotations=annotations)
151+
html_page = render_mmif(in_mmif_str, viz_id)
127152
with open(os.path.join(path, "index.html"), "w") as f:
128153
f.write(html_page)
129154
except FileExistsError:
@@ -133,7 +158,6 @@ def upload_file(in_mmif):
133158
t = Thread(target=cleanup)
134159
t.daemon = True
135160
t.run()
136-
137161
agent = request.headers.get('User-Agent')
138162
if 'curl' in agent.lower():
139163
return f"Visualization ID is {viz_id}\nYou can access the visualized file at {request.url_root}display/{viz_id}\n"
@@ -143,7 +167,8 @@ def upload_file(in_mmif):
143167
if __name__ == '__main__':
144168
# Make path for temp files
145169
cache_path = cache.get_cache_root()
146-
cache_symlink_path = os.path.join(app.static_folder, cache._CACHE_DIR_SUFFIX)
170+
cache_symlink_path = os.path.join(
171+
app.static_folder, cache._CACHE_DIR_SUFFIX)
147172
if os.path.islink(cache_symlink_path):
148173
os.unlink(cache_symlink_path)
149174
elif os.path.exists(cache_symlink_path):
@@ -158,5 +183,5 @@ def upload_file(in_mmif):
158183
port = 5000
159184
if len(sys.argv) > 2 and sys.argv[1] == '-p':
160185
port = int(sys.argv[2])
161-
186+
162187
app.run(port=port, host='0.0.0.0', debug=True, use_reloader=True)

Diff for: displacy/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ def read_text(textdoc, app_root):
4848
# container, see the comment in html_text() in ../app.py)
4949
if not os.path.isfile(location):
5050
if location.startswith('file:///'):
51-
location = location[8:]
51+
location = location[7:]
5252
else:
5353
# this should not happen anymore, but keeping it anyway
5454
location = location[1:]
55-
location = os.path.join(app_root, 'static', location)
55+
# location = os.path.join(app_root, 'static', location)
5656
with open(location) as fh:
5757
text = fh.read()
5858
else:

Diff for: examples/whisper-spacy.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,23 @@
88
"properties": {
99
"mime": "video",
1010
"id": "d1",
11-
"location": "file:///data/video/service-mbrs-ntscrm-01181182.mp4"
11+
"location": "file:///data/service-mbrs-ntscrm-01181182.mp4"
1212
}
1313
},
1414
{
1515
"@type": "http://mmif.clams.ai/vocabulary/AudioDocument/v1",
1616
"properties": {
1717
"mime": "audio",
1818
"id": "d2",
19-
"location": "file:///data/audio/service-mbrs-ntscrm-01181182.wav"
19+
"location": "file:///data/service-mbrs-ntscrm-01181182.wav"
2020
}
2121
},
2222
{
2323
"@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1",
2424
"properties": {
2525
"mime": "text",
2626
"id": "d3",
27-
"location": "file:///data/text/service-mbrs-ntscrm-01181182.txt"
27+
"location": "file:///data/service-mbrs-ntscrm-01181182.txt"
2828
}
2929
}
3030
],

0 commit comments

Comments
 (0)