Skip to content

Commit ed43dfa

Browse files
authored
Merge pull request #35 from clamsproject/fix-swt
updates for new SWT and docTR format
2 parents cef51fa + 9019c15 commit ed43dfa

File tree

4 files changed

+116
-61
lines changed

4 files changed

+116
-61
lines changed

app.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
import cache
1111
from cache import set_last_access, cleanup
12+
from utils import app, render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization
13+
import traceback
1214
import utils
1315
from utils import app
1416

@@ -27,7 +29,8 @@ def ocr():
2729
ocr_view = mmif.get_view_by_id(data["view_id"])
2830
return utils.prepare_ocr_visualization(mmif, ocr_view, data["mmif_id"])
2931
except Exception as e:
30-
return f'<p class="error">{e}</h1>'
32+
app.logger.error(f"{e}\n{traceback.format_exc()}")
33+
return f'<p class="error">Error: {e} Check the server log for more information.</h1>'
3134

3235

3336
@app.route('/ocrpage', methods=['POST'])

displacy/__init__.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,9 @@ def mmif_to_dict(mmif: Mmif):
8383

8484

8585
def entity(view: View, annotation: Annotation):
86-
if "targets" in annotation.properties:
87-
start = min([view.annotations[target].properties["start"] for target in annotation.properties["targets"]])
88-
end = max([view.annotations[target].properties["end"] for target in annotation.properties["targets"]])
89-
else:
90-
start = annotation.properties['start']
91-
end = annotation.properties['end']
92-
return {'start': start,
93-
'end': end,
94-
'label': annotation.properties['category']}
86+
return {'start': annotation.get('start'),
87+
'end': annotation.get('end'),
88+
'label': annotation.get('category')}
9589

9690

9791
def dict_to_html(d):

ocr.py

+96-38
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import os, shutil
99

1010
from flask import render_template
11+
from mmif import AnnotationTypes, DocumentTypes, Mmif
1112
from mmif.utils.video_document_helper import convert_timepoint, convert_timeframe
1213

1314
import cache
@@ -35,27 +36,48 @@ def __init__(self, anno, mmif):
3536
self.update(anno, mmif)
3637

3738
def update(self, anno, mmif):
38-
if anno.at_type.shortname == "BoundingBox":
39+
40+
if anno.at_type == AnnotationTypes.BoundingBox:
3941
self.add_bounding_box(anno, mmif)
4042

41-
elif anno.at_type.shortname == "TimeFrame":
43+
elif anno.at_type == AnnotationTypes.TimeFrame:
4244
self.add_timeframe(anno, mmif)
4345

44-
elif anno.at_type.shortname == "TextDocument":
45-
t = anno.properties.get("text_value") or anno.properties.get("text").value
46-
if t:
47-
self.text.append(re.sub(r'([\\\/\|\"\'])', r'\1 ', t))
48-
49-
def add_bounding_box(self, anno, mmif):
50-
self.frame_num = convert_timepoint(mmif, anno, "frames")
51-
self.secs = convert_timepoint(mmif, anno, "seconds")
52-
box_id = anno.properties["id"]
53-
boxType = anno.properties["boxType"]
54-
coordinates = anno.properties["coordinates"]
46+
elif anno.at_type == AnnotationTypes.TimePoint:
47+
self.add_timepoint(anno, mmif)
48+
49+
elif anno.at_type == DocumentTypes.TextDocument:
50+
self.add_text_document(anno)
51+
52+
elif anno.at_type.shortname == "Paragraph":
53+
view = mmif.get_view_by_id(anno.parent)
54+
text_anno = mmif[anno.properties.get("document")]
55+
self.add_text_document(text_anno)
56+
57+
def add_bounding_box(self, anno, mmif: Mmif):
58+
timepoint_anno = None
59+
if "timePoint" in anno.properties:
60+
timepoint_anno = mmif[anno.get("timePoint")]
61+
62+
else:
63+
for alignment_anns in mmif.get_alignments(AnnotationTypes.BoundingBox, AnnotationTypes.TimePoint).values():
64+
for alignment_ann in alignment_anns:
65+
if alignment_ann.get('source') == anno.id:
66+
timepoint_anno = mmif[alignment_ann.get('target')]
67+
break
68+
elif alignment_ann.get('target') == anno.id:
69+
timepoint_anno = mmif[alignment_ann.get('source')]
70+
break
71+
if timepoint_anno:
72+
self.add_timepoint(timepoint_anno, mmif, skip_if_view_has_frames=False)
73+
74+
box_id = anno.get("id")
75+
boxType = anno.get("boxType")
76+
coordinates = anno.get("coordinates")
5577
x = coordinates[0][0]
5678
y = coordinates[0][1]
57-
w = coordinates[3][0] - x
58-
h = coordinates[3][1] - y
79+
w = coordinates[1][0] - x
80+
h = coordinates[1][1] - y
5981
box = [box_id, boxType, [x, y, w, h]]
6082
self.boxes.append(box)
6183
self.anno_ids.append(box_id)
@@ -64,40 +86,70 @@ def add_bounding_box(self, anno, mmif):
6486
self.boxtypes.append(anno.properties.get("boxType"))
6587

6688
def add_timeframe(self, anno, mmif):
67-
start, end = convert_timeframe(mmif, anno, "frames")
68-
start_secs, end_secs = convert_timeframe(mmif, anno, "seconds")
89+
# If annotation has multiple targets, pick the first and last as start and end
90+
if "targets" in anno.properties:
91+
start_id, end_id = anno.properties.get("targets")[0], anno.properties.get("targets")[-1]
92+
anno_parent = mmif.get_view_by_id(anno.parent)
93+
start_anno, end_anno = mmif[start_id], mmif[end_id]
94+
start = convert_timepoint(mmif, start_anno, "frames")
95+
end = convert_timepoint(mmif, end_anno, "frames")
96+
start_secs = convert_timepoint(mmif, start_anno, "seconds")
97+
end_secs = convert_timepoint(mmif, end_anno, "seconds")
98+
else:
99+
start, end = convert_timeframe(mmif, anno, "frames")
100+
start_secs, end_secs = convert_timeframe(mmif, anno, "seconds")
69101
self.range = (start, end)
70102
self.timestamp_range = (str(datetime.timedelta(seconds=start_secs)), str(datetime.timedelta(seconds=end_secs)))
71103
self.sec_range = (start_secs, end_secs)
72104
if anno.properties.get("frameType"):
73-
self.frametype = anno.properties.get("frameType")
74-
75-
76-
def find_annotation(anno_id, view, mmif):
77-
if mmif.id_delimiter in anno_id:
78-
view_id, anno_id = anno_id.split(mmif.id_delimiter)
79-
view = mmif.get_view_by_id(view_id)
80-
return view.get_annotation_by_id(anno_id)
81-
82-
83-
def get_ocr_frames(view, mmif, fps):
105+
self.frametype = str(anno.properties.get("frameType"))
106+
elif anno.properties.get("label"):
107+
self.frametype = str(anno.properties.get("label"))
108+
109+
def add_timepoint(self, anno, mmif, skip_if_view_has_frames=True):
110+
parent = mmif.get_view_by_id(anno.parent)
111+
other_annotations = [k for k in parent.metadata.contains.keys() if k != anno.id]
112+
# If there are TimeFrames in the same view, they most likely represent
113+
# condensed information about representative frames (e.g. SWT). In this
114+
# case, only render the TimeFrames and ignore the TimePoints.
115+
if any([anno == AnnotationTypes.TimeFrame for anno in other_annotations]) and skip_if_view_has_frames:
116+
return
117+
self.frame_num = convert_timepoint(mmif, anno, "frames")
118+
self.secs = convert_timepoint(mmif, anno, "seconds")
119+
self.timestamp = str(datetime.timedelta(seconds=self.secs))
120+
if anno.properties.get("label"):
121+
self.frametype = anno.properties.get("label")
122+
123+
def add_text_document(self, anno):
124+
t = anno.properties.get("text_value") or anno.properties.get("text").value
125+
if t:
126+
text_val = re.sub(r'([\\\/\|\"\'])', r'\1 ', t)
127+
self.text = self.text + [text_val] if text_val not in self.text else self.text
128+
129+
130+
def get_ocr_frames(view, mmif):
84131
frames = {}
85132
full_alignment_type = [
86-
at_type for at_type in view.metadata.contains if at_type.shortname == "Alignment"]
133+
at_type for at_type in view.metadata.contains if at_type == AnnotationTypes.Alignment]
87134
# If view contains alignments
88135
if full_alignment_type:
89136
for alignment in view.get_annotations(full_alignment_type[0]):
90-
source = find_annotation(alignment.properties["source"], view, mmif)
91-
target = find_annotation(alignment.properties["target"], view, mmif)
137+
source = mmif[alignment.get("source")]
138+
target = mmif[alignment.get("target")]
92139

140+
# Account for alignment in either direction
93141
frame = OCRFrame(source, mmif)
142+
frame.update(target, mmif)
143+
94144
i = frame.frame_num if frame.frame_num is not None else frame.range
145+
if i is None:
146+
continue
95147
if i in frames.keys():
96148
frames[i].update(source, mmif)
97149
frames[i].update(target, mmif)
98150
else:
99-
frame.update(target, mmif)
100151
frames[i] = frame
152+
101153
else:
102154
for annotation in view.get_annotations():
103155
frame = OCRFrame(annotation, mmif)
@@ -108,6 +160,7 @@ def get_ocr_frames(view, mmif, fps):
108160
frames[i].update(annotation, mmif)
109161
else:
110162
frames[i] = frame
163+
print(frames)
111164
return frames
112165

113166

@@ -175,7 +228,7 @@ def make_image_directory(mmif_id):
175228
return path
176229

177230

178-
def find_duplicates(frames_list, cv2_vid):
231+
def find_duplicates(frames_list):
179232
"""Find duplicate frames"""
180233
prev_frame = None
181234
for frame_num, frame in frames_list:
@@ -239,18 +292,23 @@ def round_boxes(boxes):
239292
def get_ocr_views(mmif):
240293
"""Returns all CV views, which contain timeframes or bounding boxes"""
241294
views = []
242-
required_types = ["TimeFrame", "BoundingBox"]
295+
required_types = ["TimeFrame", "BoundingBox", "TimePoint"]
243296
for view in mmif.views:
244297
for anno_type, anno in view.metadata.contains.items():
245298
# Annotation belongs to a CV view if it is a TimeFrame/BB and it refers to a VideoDocument
246-
if anno_type.shortname in required_types and mmif.get_document_by_id(
247-
anno["document"]).at_type.shortname == "VideoDocument":
299+
# if anno.get("document") is None:
300+
# continue
301+
# if anno_type.shortname in required_types and mmif.get_document_by_id(
302+
# anno["document"]).at_type.shortname == "VideoDocument":
303+
# views.append(view)
304+
# continue
305+
if anno_type.shortname in required_types:
248306
views.append(view)
249-
continue
307+
break
250308
# TODO: Couldn't find a simple way to show if an alignment view is a CV/Frames-type view
251309
elif "parseq" in view.metadata.app:
252310
views.append(view)
253-
continue
311+
break
254312
return views
255313

256314

utils.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,17 @@ def asr_alignments_to_vtt(alignment_view, viz_id):
6060

6161

6262
def build_alignment(alignment, token_idx, timeframe_idx):
63-
target = alignment.properties['target']
64-
source = alignment.properties['source']
63+
target = alignment.get('target')
64+
source = alignment.get('source')
6565
timeframe = timeframe_idx.get(source)
6666
token = token_idx.get(target)
6767
if timeframe and token:
68-
start = timeframe.properties['start']
69-
end = timeframe.properties['end']
70-
text = token.properties['word']
71-
return start, end, text
68+
start = timeframe.get('start')
69+
end = timeframe.get('end')
70+
for text_key in ['text', 'word']:
71+
if text_key in token:
72+
text = token.get(text_key)
73+
return start, end, text
7274

7375

7476
def get_src_media_symlink_basename(doc: Document):
@@ -147,12 +149,12 @@ def get_boxes(mmif):
147149
# Javascript code that draws the rectangle.
148150
boxes = []
149151
for a in tbox_annotations:
150-
coordinates = a.properties["coordinates"]
152+
coordinates = a.get("coordinates")
151153
x = coordinates[0][0]
152154
y = coordinates[0][1]
153155
w = coordinates[1][0] - x
154156
h = coordinates[2][1] - y
155-
box = [a.properties["id"], a.properties["boxType"], [x, y, w, h]]
157+
box = [a.get("id"), a.get("boxType"), [x, y, w, h]]
156158
boxes.append(box)
157159
return boxes
158160

@@ -245,7 +247,7 @@ def get_document_ids(view, annotation_type):
245247
for annotation in view.annotations:
246248
if annotation.at_type.shortname == str(annotation_type):
247249
try:
248-
ids.add(annotation.properties["document"])
250+
ids.add(annotation.get("document"))
249251
except KeyError:
250252
pass
251253
return list(ids)
@@ -379,14 +381,12 @@ def prepare_ocr_visualization(mmif, view, mmif_id):
379381
""" Visualize OCR by extracting image frames with BoundingBoxes from video"""
380382
# frames, text_docs, alignments = {}, {}, {}
381383
vid_path = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[0].location_path()
382-
cv2_vid = cv2.VideoCapture(vid_path)
383-
fps = cv2_vid.get(cv2.CAP_PROP_FPS)
384384

385-
ocr_frames = get_ocr_frames(view, mmif, fps)
385+
ocr_frames = get_ocr_frames(view, mmif)
386386

387387
# Generate pages (necessary to reduce IO cost) and render
388388
frames_list = [(k, vars(v)) for k, v in ocr_frames.items()]
389-
frames_list = find_duplicates(frames_list, cv2_vid)
389+
frames_list = find_duplicates(frames_list)
390390
frames_pages = paginate(frames_list)
391391
# Save page list as temp file
392392
save_json(frames_pages, view.id, mmif_id)

0 commit comments

Comments
 (0)