Skip to content

Commit cef51fa

Browse files
authored
Merge pull request #34 from clamsproject/33-fix-uv
fixes, and then turn-off UV
2 parents 10eb0e2 + 91cec33 commit cef51fa

File tree

5 files changed

+89
-106
lines changed

5 files changed

+89
-106
lines changed

app.py

+11-14
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99

1010
import cache
1111
from cache import set_last_access, cleanup
12-
from utils import app, render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization
12+
import utils
13+
from utils import app
1314

1415

1516
@app.route('/')
@@ -24,7 +25,7 @@ def ocr():
2425
mmif_str = open(cache.get_cache_root() / data["mmif_id"] / "file.mmif").read()
2526
mmif = Mmif(mmif_str)
2627
ocr_view = mmif.get_view_by_id(data["view_id"])
27-
return prepare_ocr_visualization(mmif, ocr_view, data["mmif_id"])
28+
return utils.prepare_ocr_visualization(mmif, ocr_view, data["mmif_id"])
2829
except Exception as e:
2930
return f'<p class="error">{e}</h1>'
3031

@@ -33,7 +34,7 @@ def ocr():
3334
def ocrpage():
3435
data = request.json
3536
try:
36-
return render_ocr(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
37+
return utils.render_ocr(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
3738
except Exception as e:
3839
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
3940

@@ -99,16 +100,6 @@ def send_js(path):
99100
return send_from_directory("uv", path)
100101

101102

102-
def render_mmif(mmif_str, viz_id):
103-
mmif = Mmif(mmif_str)
104-
htmlized_docs = documents_to_htmls(mmif, viz_id)
105-
app.logger.debug(f"Prepared document: {[d[0] for d in htmlized_docs]}")
106-
annotations = prep_annotations(mmif, viz_id)
107-
app.logger.debug(f"Prepared Annotations: {[annotation[0] for annotation in annotations]}")
108-
return render_template('player.html',
109-
docs=htmlized_docs, viz_id=viz_id, annotations=annotations)
110-
111-
112103
def upload_file(in_mmif):
113104
# Save file locally
114105
in_mmif_bytes = in_mmif if isinstance(in_mmif, bytes) else in_mmif.read()
@@ -123,7 +114,13 @@ def upload_file(in_mmif):
123114
with open(path / 'file.mmif', 'w') as in_mmif_file:
124115
app.logger.debug(f"Writing original MMIF to {path / 'file.mmif'}")
125116
in_mmif_file.write(in_mmif_str)
126-
html_page = render_mmif(in_mmif_str, viz_id)
117+
mmif = Mmif(in_mmif_str)
118+
htmlized_docs = utils.documents_to_htmls(mmif, viz_id)
119+
app.logger.debug(f"Prepared document: {[d[0] for d in htmlized_docs]}")
120+
annotations = utils.prep_annotations(mmif, viz_id)
121+
app.logger.debug(f"Prepared Annotations: {[annotation[0] for annotation in annotations]}")
122+
html_page = render_template('player.html',
123+
docs=htmlized_docs, viz_id=viz_id, annotations=annotations)
127124
with open(os.path.join(path, "index.html"), "w") as f:
128125
f.write(html_page)
129126
except FileExistsError:

cache.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import os
23
import pathlib
34
import shutil
@@ -16,7 +17,7 @@ def get_cache_root():
1617
return pathlib.Path(_CACHE_DIR_ROOT.name)
1718

1819

19-
def invalidate_cache(viz_ids):
20+
def invalidate_cache(viz_ids=[]):
2021
if not viz_ids:
2122
shutil.rmtree(get_cache_root())
2223
os.makedirs(get_cache_root())
@@ -55,11 +56,11 @@ def scan_tmp_directory():
5556

5657
def cleanup():
5758
with lock:
58-
print("Checking visualization cache...")
59+
logging.info("Checking visualization cache...")
5960
# Max tmp size is 500MB
6061
max_size = 500000000
6162
folder_size, oldest_dir = scan_tmp_directory()
6263
while folder_size > max_size:
63-
print(f"Maximum cache size reached. Deleting {os.path.basename(oldest_dir)}.")
64+
logging.info(f"Maximum cache size reached. Deleting {os.path.basename(oldest_dir)}.")
6465
shutil.rmtree(oldest_dir)
6566
folder_size, oldest_dir = scan_tmp_directory()

iiif_utils.py

+30-73
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import datetime
22
import json
3-
import os
43
import tempfile
54
from typing import Dict
65

76
import mmif
87
from flask import url_for
98
from mmif import AnnotationTypes, DocumentTypes, Mmif
9+
from mmif.utils import video_document_helper as vdh
1010

1111
import cache
12+
import utils
1213

1314

1415
def generate_iiif_manifest(in_mmif: mmif.Mmif, viz_id):
@@ -27,18 +28,20 @@ def generate_iiif_manifest(in_mmif: mmif.Mmif, viz_id):
2728
],
2829
"structures": []
2930
}
30-
add_canvas_from_documents(in_mmif, iiif_json)
31+
add_canvas_from_documents(viz_id, in_mmif, iiif_json)
3132
add_structure_from_timeframe(in_mmif, iiif_json)
3233
return save_manifest(iiif_json, viz_id)
3334

3435

35-
def add_canvas_from_documents(in_mmif, iiif_json):
36+
def add_canvas_from_documents(viz_id, in_mmif, iiif_json):
3637
video_documents = in_mmif.get_documents_by_type(DocumentTypes.VideoDocument)
3738
audio_documents = in_mmif.get_documents_by_type(DocumentTypes.AudioDocument)
3839
image_documents = in_mmif.get_documents_by_type(DocumentTypes.ImageDocument)
3940
all_documents = video_documents + audio_documents + image_documents
4041
document_canvas_dict = {}
4142
for _id, document in enumerate(all_documents, start=1):
43+
canvas_media_path = url_for(
44+
'static', filename=f"{cache._CACHE_DIR_SUFFIX}/{viz_id}/{utils.get_src_media_symlink_basename(document)}")
4245
document_canvas_dict[document.id] = _id
4346
canvas = {
4447
"id": f"http://0.0.0.0:5000/mmif_example_manifest.json/canvas/{_id}",
@@ -62,7 +65,7 @@ def add_canvas_from_documents(in_mmif, iiif_json):
6265
"choiceHint": "user",
6366
"items": [
6467
{
65-
"id": build_document_url(document),
68+
"id": canvas_media_path,
6669
"type": get_iiif_type(document),
6770
"label": "",
6871
"format": get_iiif_format(document)
@@ -76,34 +79,37 @@ def add_canvas_from_documents(in_mmif, iiif_json):
7679
}
7780
],
7881
}
79-
# if not os.path.isfile(f"static{document.location_path()}"):
80-
# shutil.copyfile(
81-
# f"{document.location_path()}",
82-
# f"static{os.path.basename(document.location_path())}"
83-
# )
8482
iiif_json["sequences"][0]["canvases"].append(canvas)
8583
break # todo currently only supports single document, needs more work to align canvas values
8684

8785

88-
def build_document_url(document):
89-
"""
90-
This trims off all of the path to the document except the filename then prepends data/video/. This is so
91-
mmif's from running locally can still be found if the viewe
92-
r is run in docker, assuming the volume mount or
93-
symlink is correctly set.
94-
"""
95-
location = document.location
96-
if location.startswith("file://"):
97-
location = document.location[7:]
98-
file_path = os.path.join("data", "video", os.path.basename(location))
99-
return url_for('static', filename=file_path)
100-
101-
10286
def add_structure_from_timeframe(in_mmif: Mmif, iiif_json: Dict):
10387
# # get all views with timeframe annotations from mmif obj
10488
tf_views = in_mmif.get_views_contain(AnnotationTypes.TimeFrame)
10589
for range_id, view in enumerate(tf_views, start=1):
106-
view_range = tf_view_to_iiif_range(range_id, view)
90+
view_range = {
91+
"id": f"http://0.0.0.0:5000/mmif_example_manifest.json/range/{range_id}",
92+
"type": "Range",
93+
"label": f"View: {view.id}",
94+
"members": []
95+
}
96+
for ann in view.get_annotations(AnnotationTypes.TimeFrame):
97+
label = ann.get_property('label')
98+
s, e = vdh.convert_timeframe(in_mmif, ann, "seconds")
99+
100+
structure = {
101+
"id": f"http://0.0.0.0:5000/mmif_example_manifest.json/range/{range_id}",
102+
"type": "Range",
103+
"label": f"{label.capitalize()}",
104+
"members": [
105+
{
106+
"id": f"http://0.0.0.0:5000/mmif_example_manifest.json/canvas/{1}#t={s},{e}",
107+
# need to align id here to support more than one document
108+
"type": "Canvas"
109+
}
110+
]
111+
}
112+
view_range["members"].append(structure)
107113
iiif_json["structures"].append(view_range)
108114

109115

@@ -115,55 +121,6 @@ def save_manifest(iiif_json: Dict, viz_id) -> str:
115121
return manifest.name
116122

117123

118-
def tf_view_to_iiif_range(range_id, view):
119-
view_range = {
120-
"id": f"http://0.0.0.0:5000/mmif_example_manifest.json/range/{range_id}",
121-
"type": "Range",
122-
"label": f"View: {view.id}",
123-
"members": []
124-
}
125-
# for annotation in view.annotations:
126-
# # TODO: TimeUnits generated by Kaldi have no "timeUnit" or "unit" property.
127-
# The mmif documentation does specify a "unit" property, but the Kaldi
128-
# ASR doesn't seem to include that in annotations.
129-
130-
# if annotation.at_type == AnnotationTypes.TimeFrame:
131-
# if 'unit' in annotation.properties:
132-
# annotation_unit = annotation.properties['unit']
133-
# elif 'unit' in view.metadata.parameters:
134-
# annotation_unit = view.metadata.parameters['unit']
135-
# else:
136-
# raise Exception("Error finding timeframe unit.")
137-
# frame_type = annotation.properties["frameType"]
138-
# if annotation_unit == "frame":
139-
# start_fn = int(annotation.properties["start"])
140-
# end_fn = int(annotation.properties["end"])
141-
# frame_rate = 29.97
142-
# start_sec = int(start_fn // frame_rate)
143-
# end_sec = int(end_fn // frame_rate)
144-
# elif annotation_unit == "milliseconds":
145-
# start_milli = int(annotation.properties["start"])
146-
# end_milli = int(annotation.properties["end"])
147-
# start_sec = int(start_milli // 1000)
148-
# end_sec = int(end_milli // 1000)
149-
# else:
150-
# continue
151-
# structure = {
152-
# "id": f"http://0.0.0.0:5000/mmif_example_manifest.json/range/{range_id}",
153-
# "type": "Range",
154-
# "label": f"{frame_type.capitalize()}",
155-
# "members": [
156-
# {
157-
# "id": f"http://0.0.0.0:5000/mmif_example_manifest.json/canvas/{1}#t={start_sec},{end_sec}",
158-
# # need to align id here to support more than one document
159-
# "type": "Canvas"
160-
# }
161-
# ]
162-
# }
163-
# view_range["members"].append(structure)
164-
return view_range
165-
166-
167124
def get_iiif_format(document):
168125
if document.is_type(DocumentTypes.VideoDocument):
169126
return 'video/mp4'

templates/uv_player.html

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
$(".nav-item.UV").click(function() {
2121
if (!uvLoaded) {
2222
const data = {
23-
manifest: "/tmp/{{mmif_id}}/{{manifest}}",
23+
manifest: "/mmif-viz-cache/{{mmif_id}}/{{manifest}}",
2424
embedded: true
2525
};
2626

utils.py

+43-15
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,14 @@
22
from datetime import timedelta
33
from io import StringIO
44

5-
from flask import Flask
5+
from flask import Flask, url_for
66
from lapps.discriminators import Uri
77
from mmif import DocumentTypes
8-
from mmif.serialize.annotation import Text
8+
from mmif.serialize.annotation import Text, Document
99
from mmif.vocabulary import AnnotationTypes
1010

11-
import cache
1211
import displacy
13-
from iiif_utils import generate_iiif_manifest
12+
import iiif_utils
1413
from ocr import *
1514

1615
# Get Properties from MMIF file ---
@@ -72,6 +71,31 @@ def build_alignment(alignment, token_idx, timeframe_idx):
7271
return start, end, text
7372

7473

74+
def get_src_media_symlink_basename(doc: Document):
75+
doc_path = doc.location_path()
76+
return f"{doc.id}.{doc_path.split('.')[-1]}"
77+
78+
79+
def get_symlink_relurl(viz_id, symlink_fname):
80+
static_folder = pathlib.Path(app.static_folder)
81+
symlink_path = pathlib.Path(cache._CACHE_DIR_SUFFIX) / viz_id / symlink_fname
82+
return static_folder / symlink_path
83+
84+
85+
def symlink_to_static(viz_id, original_path, symlink_fname) -> str:
86+
static_folder = pathlib.Path(app.static_folder)
87+
symlink_path = pathlib.Path(cache._CACHE_DIR_SUFFIX) / viz_id / symlink_fname
88+
app.logger.debug(f"Symlinking {original_path} to {symlink_path}")
89+
try:
90+
os.symlink(original_path, static_folder / symlink_path)
91+
except Exception as e:
92+
app.logger.error(f"SOME ERROR when symlinking: {str(e)}")
93+
app.logger.debug(f"{original_path} is symlinked to {symlink_path}")
94+
symlink_rel_path = url_for('static', filename=symlink_path)
95+
app.logger.debug(f"and exposable as {symlink_rel_path}")
96+
return symlink_rel_path
97+
98+
7599
def documents_to_htmls(mmif, viz_id):
76100
"""
77101
Returns a list of tuples, one for each element in the documents list of
@@ -83,27 +107,31 @@ def documents_to_htmls(mmif, viz_id):
83107
for document in mmif.documents:
84108
doc_path = document.location_path()
85109
app.logger.debug(f"MMIF on AV asset: {doc_path}")
86-
doc_symlink_path = pathlib.Path(app.static_folder) / cache._CACHE_DIR_SUFFIX / viz_id / (f"{document.id}.{doc_path.split('.')[-1]}")
87-
os.symlink(doc_path, doc_symlink_path)
88-
app.logger.debug(f"{doc_path} is symlinked to {doc_symlink_path}")
89-
doc_symlink_rel_path = '/' + doc_symlink_path.relative_to(app.static_folder).as_posix()
90-
app.logger.debug(f"and {doc_symlink_rel_path} will be used in HTML src attribute")
110+
linked = symlink_to_static(viz_id, doc_path, get_src_media_symlink_basename(document))
91111
if document.at_type == DocumentTypes.TextDocument:
92-
html = html_text(doc_symlink_rel_path)
112+
html = html_text(linked)
93113
elif document.at_type == DocumentTypes.VideoDocument:
94114
fa_views = get_alignment_views(mmif)
95115
fa_view = fa_views[0] if fa_views else None
96-
html = html_video(viz_id, doc_symlink_rel_path, fa_view)
116+
html = html_video(viz_id, linked, fa_view)
97117
elif document.at_type == DocumentTypes.AudioDocument:
98-
html = html_audio(doc_symlink_rel_path)
118+
html = html_audio(linked)
99119
elif document.at_type == DocumentTypes.ImageDocument:
100120
boxes = get_boxes(mmif)
101-
html = html_img(doc_symlink_rel_path, boxes)
121+
html = html_img(linked, boxes)
102122
htmlized.append((document.at_type.shortname, document.id, doc_path, html))
103-
manifest_filename = generate_iiif_manifest(mmif, viz_id)
123+
manifest_filename = iiif_utils.generate_iiif_manifest(mmif, viz_id)
124+
app.logger.debug(f"Generated IIIF manifest: {manifest_filename}")
104125
man = os.path.basename(manifest_filename)
126+
app.logger.debug(f"Manifest filename: {man}")
127+
symlink_to_static(viz_id, manifest_filename, man)
128+
app.logger.debug(f"Symlinked IIIF manifest: {None}")
105129
temp = render_template("uv_player.html", manifest=man, mmif_id=viz_id)
106-
htmlized.append(('UV', "", "", temp))
130+
# TODO (krim @ 2024-03-12): Turning off IIIF added to the HTML page since
131+
# 1. current IIIF manifest conversion is based on old version of manifest API, and quite brittle
132+
# 2. the conversion code at the moment can only convert TimeFrame annotation to "jump-able" IIIF canvases,
133+
# but the case is already covered by `Thumbnails` tab (look for usage of `pre-ocr.html` template)
134+
# htmlized.append(('UV', "", "", temp))
107135
return htmlized
108136

109137

0 commit comments

Comments
 (0)