Skip to content

Commit 3fa2b5a

Browse files
Merge branch 'main' into code-refactor
2 parents cd6bf12 + 8000d53 commit 3fa2b5a

File tree

6 files changed

+67
-112
lines changed

6 files changed

+67
-112
lines changed

app.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,13 @@ def upload_file(in_mmif):
148148
with open(path / 'file.mmif', 'w') as in_mmif_file:
149149
app.logger.debug(f"Writing original MMIF to {path / 'file.mmif'}")
150150
in_mmif_file.write(in_mmif_str)
151-
html_page = render_mmif(in_mmif_str, viz_id)
151+
mmif = Mmif(in_mmif_str)
152+
htmlized_docs = utils.documents_to_htmls(mmif, viz_id)
153+
app.logger.debug(f"Prepared document: {[d[0] for d in htmlized_docs]}")
154+
annotations = utils.prep_annotations(mmif, viz_id)
155+
app.logger.debug(f"Prepared Annotations: {[annotation[0] for annotation in annotations]}")
156+
html_page = render_template('player.html',
157+
docs=htmlized_docs, viz_id=viz_id, annotations=annotations)
152158
with open(os.path.join(path, "index.html"), "w") as f:
153159
f.write(html_page)
154160
except FileExistsError:

cache.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import os
23
import pathlib
34
import shutil
@@ -16,7 +17,7 @@ def get_cache_root():
1617
return pathlib.Path(_CACHE_DIR_ROOT.name)
1718

1819

19-
def invalidate_cache(viz_ids):
20+
def invalidate_cache(viz_ids=[]):
2021
if not viz_ids:
2122
shutil.rmtree(get_cache_root())
2223
os.makedirs(get_cache_root())
@@ -55,11 +56,11 @@ def scan_tmp_directory():
5556

5657
def cleanup():
5758
with lock:
58-
print("Checking visualization cache...")
59+
logging.info("Checking visualization cache...")
5960
# Max tmp size is 500MB
6061
max_size = 500000000
6162
folder_size, oldest_dir = scan_tmp_directory()
6263
while folder_size > max_size:
63-
print(f"Maximum cache size reached. Deleting {os.path.basename(oldest_dir)}.")
64+
logging.info(f"Maximum cache size reached. Deleting {os.path.basename(oldest_dir)}.")
6465
shutil.rmtree(oldest_dir)
6566
folder_size, oldest_dir = scan_tmp_directory()

displacy/__init__.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,9 @@ def mmif_to_dict(mmif: Mmif):
8383

8484

8585
def entity(view: View, annotation: Annotation):
86-
if "targets" in annotation.properties:
87-
start = min([view.annotations[target].properties["start"] for target in annotation.properties["targets"]])
88-
end = max([view.annotations[target].properties["end"] for target in annotation.properties["targets"]])
89-
else:
90-
start = annotation.properties['start']
91-
end = annotation.properties['end']
92-
return {'start': start,
93-
'end': end,
94-
'label': annotation.properties['category']}
86+
return {'start': annotation.get('start'),
87+
'end': annotation.get('end'),
88+
'label': annotation.get('category')}
9589

9690

9791
def dict_to_html(d):

iiif_utils.py

+30-73
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import datetime
22
import json
3-
import os
43
import tempfile
54
from typing import Dict
65

76
import mmif
87
from flask import url_for
98
from mmif import AnnotationTypes, DocumentTypes, Mmif
9+
from mmif.utils import video_document_helper as vdh
1010

1111
import cache
12+
import utils
1213

1314

1415
def generate_iiif_manifest(in_mmif: mmif.Mmif, viz_id):
@@ -27,18 +28,20 @@ def generate_iiif_manifest(in_mmif: mmif.Mmif, viz_id):
2728
],
2829
"structures": []
2930
}
30-
add_canvas_from_documents(in_mmif, iiif_json)
31+
add_canvas_from_documents(viz_id, in_mmif, iiif_json)
3132
add_structure_from_timeframe(in_mmif, iiif_json)
3233
return save_manifest(iiif_json, viz_id)
3334

3435

35-
def add_canvas_from_documents(in_mmif, iiif_json):
36+
def add_canvas_from_documents(viz_id, in_mmif, iiif_json):
3637
video_documents = in_mmif.get_documents_by_type(DocumentTypes.VideoDocument)
3738
audio_documents = in_mmif.get_documents_by_type(DocumentTypes.AudioDocument)
3839
image_documents = in_mmif.get_documents_by_type(DocumentTypes.ImageDocument)
3940
all_documents = video_documents + audio_documents + image_documents
4041
document_canvas_dict = {}
4142
for _id, document in enumerate(all_documents, start=1):
43+
canvas_media_path = url_for(
44+
'static', filename=f"{cache._CACHE_DIR_SUFFIX}/{viz_id}/{utils.get_src_media_symlink_basename(document)}")
4245
document_canvas_dict[document.id] = _id
4346
canvas = {
4447
"id": f"http://0.0.0.0:5000/mmif_example_manifest.json/canvas/{_id}",
@@ -62,7 +65,7 @@ def add_canvas_from_documents(in_mmif, iiif_json):
6265
"choiceHint": "user",
6366
"items": [
6467
{
65-
"id": build_document_url(document),
68+
"id": canvas_media_path,
6669
"type": get_iiif_type(document),
6770
"label": "",
6871
"format": get_iiif_format(document)
@@ -76,34 +79,37 @@ def add_canvas_from_documents(in_mmif, iiif_json):
7679
}
7780
],
7881
}
79-
# if not os.path.isfile(f"static{document.location_path()}"):
80-
# shutil.copyfile(
81-
# f"{document.location_path()}",
82-
# f"static{os.path.basename(document.location_path())}"
83-
# )
8482
iiif_json["sequences"][0]["canvases"].append(canvas)
8583
break # todo currently only supports single document, needs more work to align canvas values
8684

8785

88-
def build_document_url(document):
89-
"""
90-
This trims off all of the path to the document except the filename then prepends data/video/. This is so
91-
mmif's from running locally can still be found if the viewe
92-
r is run in docker, assuming the volume mount or
93-
symlink is correctly set.
94-
"""
95-
location = document.location
96-
if location.startswith("file://"):
97-
location = document.location[7:]
98-
file_path = os.path.join("data", "video", os.path.basename(location))
99-
return url_for('static', filename=file_path)
100-
101-
10286
def add_structure_from_timeframe(in_mmif: Mmif, iiif_json: Dict):
10387
# # get all views with timeframe annotations from mmif obj
10488
tf_views = in_mmif.get_views_contain(AnnotationTypes.TimeFrame)
10589
for range_id, view in enumerate(tf_views, start=1):
106-
view_range = tf_view_to_iiif_range(range_id, view)
90+
view_range = {
91+
"id": f"http://0.0.0.0:5000/mmif_example_manifest.json/range/{range_id}",
92+
"type": "Range",
93+
"label": f"View: {view.id}",
94+
"members": []
95+
}
96+
for ann in view.get_annotations(AnnotationTypes.TimeFrame):
97+
label = ann.get_property('label')
98+
s, e = vdh.convert_timeframe(in_mmif, ann, "seconds")
99+
100+
structure = {
101+
"id": f"http://0.0.0.0:5000/mmif_example_manifest.json/range/{range_id}",
102+
"type": "Range",
103+
"label": f"{label.capitalize()}",
104+
"members": [
105+
{
106+
"id": f"http://0.0.0.0:5000/mmif_example_manifest.json/canvas/{1}#t={s},{e}",
107+
# need to align id here to support more than one document
108+
"type": "Canvas"
109+
}
110+
]
111+
}
112+
view_range["members"].append(structure)
107113
iiif_json["structures"].append(view_range)
108114

109115

@@ -115,55 +121,6 @@ def save_manifest(iiif_json: Dict, viz_id) -> str:
115121
return manifest.name
116122

117123

118-
def tf_view_to_iiif_range(range_id, view):
119-
view_range = {
120-
"id": f"http://0.0.0.0:5000/mmif_example_manifest.json/range/{range_id}",
121-
"type": "Range",
122-
"label": f"View: {view.id}",
123-
"members": []
124-
}
125-
# for annotation in view.annotations:
126-
# # TODO: TimeUnits generated by Kaldi have no "timeUnit" or "unit" property.
127-
# The mmif documentation does specify a "unit" property, but the Kaldi
128-
# ASR doesn't seem to include that in annotations.
129-
130-
# if annotation.at_type == AnnotationTypes.TimeFrame:
131-
# if 'unit' in annotation.properties:
132-
# annotation_unit = annotation.properties['unit']
133-
# elif 'unit' in view.metadata.parameters:
134-
# annotation_unit = view.metadata.parameters['unit']
135-
# else:
136-
# raise Exception("Error finding timeframe unit.")
137-
# frame_type = annotation.properties["frameType"]
138-
# if annotation_unit == "frame":
139-
# start_fn = int(annotation.properties["start"])
140-
# end_fn = int(annotation.properties["end"])
141-
# frame_rate = 29.97
142-
# start_sec = int(start_fn // frame_rate)
143-
# end_sec = int(end_fn // frame_rate)
144-
# elif annotation_unit == "milliseconds":
145-
# start_milli = int(annotation.properties["start"])
146-
# end_milli = int(annotation.properties["end"])
147-
# start_sec = int(start_milli // 1000)
148-
# end_sec = int(end_milli // 1000)
149-
# else:
150-
# continue
151-
# structure = {
152-
# "id": f"http://0.0.0.0:5000/mmif_example_manifest.json/range/{range_id}",
153-
# "type": "Range",
154-
# "label": f"{frame_type.capitalize()}",
155-
# "members": [
156-
# {
157-
# "id": f"http://0.0.0.0:5000/mmif_example_manifest.json/canvas/{1}#t={start_sec},{end_sec}",
158-
# # need to align id here to support more than one document
159-
# "type": "Canvas"
160-
# }
161-
# ]
162-
# }
163-
# view_range["members"].append(structure)
164-
return view_range
165-
166-
167124
def get_iiif_format(document):
168125
if document.is_type(DocumentTypes.VideoDocument):
169126
return 'video/mp4'

ocr.py

+22-25
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,16 @@ def __init__(self, anno, mmif):
3434

3535
def update(self, anno, mmif):
3636

37-
if anno.at_type.shortname == "BoundingBox":
37+
if anno.at_type == AnnotationTypes.BoundingBox:
3838
self.add_bounding_box(anno, mmif)
3939

40-
elif anno.at_type.shortname == "TimeFrame":
40+
elif anno.at_type == AnnotationTypes.TimeFrame:
4141
self.add_timeframe(anno, mmif)
4242

43-
elif anno.at_type.shortname == "TimePoint":
43+
elif anno.at_type == AnnotationTypes.TimePoint:
4444
self.add_timepoint(anno, mmif)
4545

46-
elif anno.at_type.shortname == "TextDocument":
46+
elif anno.at_type == DocumentTypes.TextDocument:
4747
self.add_text_document(anno)
4848

4949
elif anno.at_type.shortname == "Paragraph":
@@ -61,19 +61,26 @@ def add_bounding_box(self, anno, mmif):
6161
self.add_timepoint(timepoint_anno, mmif,
6262
skip_if_view_has_frames=False)
6363
else:
64-
self.frame_num = convert_timepoint(mmif, anno, "frames")
65-
self.secs = convert_timepoint(mmif, anno, "seconds")
66-
box_id = anno.properties["id"]
67-
boxType = anno.properties["boxType"]
68-
coordinates = anno.properties["coordinates"]
64+
for alignment_anns in mmif.get_alignments(AnnotationTypes.BoundingBox, AnnotationTypes.TimePoint).values():
65+
for alignment_ann in alignment_anns:
66+
if alignment_ann.get('source') == anno.id:
67+
timepoint_anno = mmif[alignment_ann.get('target')]
68+
break
69+
elif alignment_ann.get('target') == anno.id:
70+
timepoint_anno = mmif[alignment_ann.get('source')]
71+
break
72+
if timepoint_anno:
73+
self.add_timepoint(timepoint_anno, mmif, skip_if_view_has_frames=False)
74+
75+
box_id = anno.get("id")
76+
boxType = anno.get("boxType")
77+
coordinates = anno.get("coordinates")
6978
x = coordinates[0][0]
7079
y = coordinates[0][1]
7180
w = coordinates[1][0] - x
7281
h = coordinates[1][1] - y
7382
box = [box_id, boxType, [x, y, w, h]]
74-
# TODO: This is a hack to ignore percentage-based Doctr bounding boxes
75-
if "doctr" not in mmif.get_view_by_id(anno.parent).metadata["app"]:
76-
self.boxes.append(box)
83+
self.boxes.append(box)
7784
self.anno_ids.append(box_id)
7885
self.timestamp = str(datetime.timedelta(seconds=self.secs))
7986
if anno.properties.get("boxType") and anno.properties.get("boxType") not in self.boxtypes:
@@ -142,26 +149,16 @@ def prepare_ocr(mmif, view, viz_id):
142149
save_json(frames_pages, view.id, viz_id)
143150

144151

145-
def find_annotation(anno_id, mmif):
146-
if mmif.id_delimiter in anno_id:
147-
view_id, anno_id = anno_id.split(mmif.id_delimiter)
148-
view = mmif.get_view_by_id(view_id)
149-
for view in mmif.views:
150-
try:
151-
return view.get_annotation_by_id(anno_id)
152-
except KeyError:
153-
continue
154-
155152

156153
def get_ocr_frames(view, mmif):
157154
frames = {}
158155
full_alignment_type = [
159-
at_type for at_type in view.metadata.contains if at_type.shortname == "Alignment"]
156+
at_type for at_type in view.metadata.contains if at_type == AnnotationTypes.Alignment]
160157
# If view contains alignments
161158
if full_alignment_type:
162159
for alignment in view.get_annotations(full_alignment_type[0]):
163-
source = find_annotation(alignment.properties["source"], mmif)
164-
target = find_annotation(alignment.properties["target"], mmif)
160+
source = mmif[alignment.get("source")]
161+
target = mmif[alignment.get("target")]
165162

166163
# Account for alignment in either direction
167164
frame = OCRFrame(source, mmif)

templates/uv_player.html

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
$(".nav-item.UV").click(function() {
2121
if (!uvLoaded) {
2222
const data = {
23-
manifest: "/tmp/{{mmif_id}}/{{manifest}}",
23+
manifest: "/mmif-viz-cache/{{mmif_id}}/{{manifest}}",
2424
embedded: true
2525
};
2626

0 commit comments

Comments
 (0)