Skip to content

Commit adcb791

Browse files
PEP-8/style fixes ahead of PR
1 parent 0e27a42 commit adcb791

File tree

5 files changed

+214
-539
lines changed

5 files changed

+214
-539
lines changed

app.py

+33-29
Original file line numberDiff line numberDiff line change
@@ -29,30 +29,6 @@ def ocr():
2929
return serve_first_ocr_page(request.json)
3030
else:
3131
return serve_ocr_page(request.json)
32-
33-
34-
def serve_first_ocr_page(data):
35-
"""
36-
Prepares OCR (at load time, due to lazy loading) and serves the first page
37-
"""
38-
try:
39-
data = dict(request.json)
40-
mmif_str = open(cache.get_cache_root() / data["mmif_id"] / "file.mmif").read()
41-
mmif = Mmif(mmif_str)
42-
ocr_view = mmif.get_view_by_id(data["view_id"])
43-
return prepare_and_render_ocr(mmif, ocr_view, data["mmif_id"])
44-
except Exception as e:
45-
app.logger.error(f"{e}\n{traceback.format_exc()}")
46-
return f'<p class="error">Error: {e} Check the server log for more information.</h1>'
47-
48-
def serve_ocr_page(data):
49-
"""
50-
Serves subsequent OCR pages
51-
"""
52-
try:
53-
return render_ocr_page(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
54-
except Exception as e:
55-
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
5632

5733

5834
@app.route('/upload', methods=['GET', 'POST'])
@@ -121,9 +97,36 @@ def render_mmif(mmif_str, viz_id):
12197
rendered_documents = render_documents(mmif, viz_id)
12298
rendered_annotations = render_annotations(mmif, viz_id)
12399
return render_template('player.html',
124-
docs=rendered_documents,
125-
viz_id=viz_id,
126-
annotations=rendered_annotations)
100+
docs=rendered_documents,
101+
viz_id=viz_id,
102+
annotations=rendered_annotations)
103+
104+
105+
def serve_first_ocr_page(data):
106+
"""
107+
Prepares OCR (at load time, due to lazy loading) and serves the first page
108+
"""
109+
try:
110+
data = dict(request.json)
111+
mmif_str = open(cache.get_cache_root() /
112+
data["mmif_id"] / "file.mmif").read()
113+
mmif = Mmif(mmif_str)
114+
ocr_view = mmif.get_view_by_id(data["view_id"])
115+
return prepare_and_render_ocr(mmif, ocr_view, data["mmif_id"])
116+
except Exception as e:
117+
app.logger.error(f"{e}\n{traceback.format_exc()}")
118+
return f'<p class="error">Error: {e} Check the server log for more information.</h1>'
119+
120+
121+
def serve_ocr_page(data):
122+
"""
123+
Serves subsequent OCR pages
124+
"""
125+
try:
126+
return render_ocr_page(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
127+
except Exception as e:
128+
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
129+
127130

128131
def upload_file(in_mmif):
129132
# Save file locally
@@ -159,7 +162,8 @@ def upload_file(in_mmif):
159162
if __name__ == '__main__':
160163
# Make path for temp files
161164
cache_path = cache.get_cache_root()
162-
cache_symlink_path = os.path.join(app.static_folder, cache._CACHE_DIR_SUFFIX)
165+
cache_symlink_path = os.path.join(
166+
app.static_folder, cache._CACHE_DIR_SUFFIX)
163167
if os.path.islink(cache_symlink_path):
164168
os.unlink(cache_symlink_path)
165169
elif os.path.exists(cache_symlink_path):
@@ -174,5 +178,5 @@ def upload_file(in_mmif):
174178
port = 5000
175179
if len(sys.argv) > 2 and sys.argv[1] == '-p':
176180
port = int(sys.argv[2])
177-
181+
178182
app.run(port=port, host='0.0.0.0', debug=True, use_reloader=True)

helpers.py

-123
This file was deleted.

ocr.py

+42-30
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
import datetime
2-
import pathlib
32

43
import cv2
5-
import tempfile
64
import json
75
import re
8-
import os, shutil
6+
import os
7+
import shutil
98

10-
from flask import render_template
119
from mmif.utils.video_document_helper import convert_timepoint, convert_timeframe
1210

1311
import cache
@@ -50,16 +48,18 @@ def update(self, anno, mmif):
5048

5149
elif anno.at_type.shortname == "Paragraph":
5250
view = mmif.get_view_by_id(anno.parent)
53-
text_anno = view.get_annotation_by_id(anno.properties.get("document"))
51+
text_anno = view.get_annotation_by_id(
52+
anno.properties.get("document"))
5453
self.add_text_document(text_anno)
5554

56-
5755
def add_bounding_box(self, anno, mmif):
5856
if "timePoint" in anno.properties:
59-
timepoint_anno = find_annotation(anno.properties["timePoint"], mmif)
57+
timepoint_anno = find_annotation(
58+
anno.properties["timePoint"], mmif)
6059

6160
if timepoint_anno:
62-
self.add_timepoint(timepoint_anno, mmif, skip_if_view_has_frames=False)
61+
self.add_timepoint(timepoint_anno, mmif,
62+
skip_if_view_has_frames=False)
6363
else:
6464
self.frame_num = convert_timepoint(mmif, anno, "frames")
6565
self.secs = convert_timepoint(mmif, anno, "seconds")
@@ -82,9 +82,11 @@ def add_bounding_box(self, anno, mmif):
8282
def add_timeframe(self, anno, mmif):
8383
# If annotation has multiple targets, pick the first and last as start and end
8484
if "targets" in anno.properties:
85-
start_id, end_id = anno.properties.get("targets")[0], anno.properties.get("targets")[-1]
85+
start_id, end_id = anno.properties.get(
86+
"targets")[0], anno.properties.get("targets")[-1]
8687
anno_parent = mmif.get_view_by_id(anno.parent)
87-
start_anno, end_anno = anno_parent.get_annotation_by_id(start_id), anno_parent.get_annotation_by_id(end_id)
88+
start_anno, end_anno = anno_parent.get_annotation_by_id(
89+
start_id), anno_parent.get_annotation_by_id(end_id)
8890
start = convert_timepoint(mmif, start_anno, "frames")
8991
end = convert_timepoint(mmif, end_anno, "frames")
9092
start_secs = convert_timepoint(mmif, start_anno, "seconds")
@@ -93,32 +95,37 @@ def add_timeframe(self, anno, mmif):
9395
start, end = convert_timeframe(mmif, anno, "frames")
9496
start_secs, end_secs = convert_timeframe(mmif, anno, "seconds")
9597
self.range = (start, end)
96-
self.timestamp_range = (str(datetime.timedelta(seconds=start_secs)), str(datetime.timedelta(seconds=end_secs)))
98+
self.timestamp_range = (str(datetime.timedelta(seconds=start_secs)), str(
99+
datetime.timedelta(seconds=end_secs)))
97100
self.sec_range = (start_secs, end_secs)
98101
if anno.properties.get("frameType"):
99102
self.frametype = str(anno.properties.get("frameType"))
100103
elif anno.properties.get("label"):
101104
self.frametype = str(anno.properties.get("label"))
102105

103106
def add_timepoint(self, anno, mmif, skip_if_view_has_frames=True):
104-
parent = mmif.get_view_by_id(anno.parent)
105-
other_annotations = [k for k in parent.metadata.contains.keys() if k != anno.id]
106-
# If there are TimeFrames in the same view, they most likely represent
107-
# condensed information about representative frames (e.g. SWT). In this
108-
# case, only render the TimeFrames and ignore the TimePoints.
109-
if any([anno.shortname == "TimeFrame" for anno in other_annotations]) and skip_if_view_has_frames:
110-
return
111-
self.frame_num = convert_timepoint(mmif, anno, "frames")
112-
self.secs = convert_timepoint(mmif, anno, "seconds")
113-
self.timestamp = str(datetime.timedelta(seconds=self.secs))
114-
if anno.properties.get("label"):
115-
self.frametype = anno.properties.get("label")
107+
parent = mmif.get_view_by_id(anno.parent)
108+
other_annotations = [
109+
k for k in parent.metadata.contains.keys() if k != anno.id]
110+
# If there are TimeFrames in the same view, they most likely represent
111+
# condensed information about representative frames (e.g. SWT). In this
112+
# case, only render the TimeFrames and ignore the TimePoints.
113+
if any([anno.shortname == "TimeFrame" for anno in other_annotations]) and skip_if_view_has_frames:
114+
return
115+
self.frame_num = convert_timepoint(mmif, anno, "frames")
116+
self.secs = convert_timepoint(mmif, anno, "seconds")
117+
self.timestamp = str(datetime.timedelta(seconds=self.secs))
118+
if anno.properties.get("label"):
119+
self.frametype = anno.properties.get("label")
116120

117121
def add_text_document(self, anno):
118-
t = anno.properties.get("text_value") or anno.properties.get("text").value
122+
t = anno.properties.get(
123+
"text_value") or anno.properties.get("text").value
119124
if t:
120125
text_val = re.sub(r'([\\\/\|\"\'])', r'\1 ', t)
121-
self.text = self.text + [text_val] if text_val not in self.text else self.text
126+
self.text = self.text + \
127+
[text_val] if text_val not in self.text else self.text
128+
122129

123130
def find_annotation(anno_id, mmif):
124131
if mmif.id_delimiter in anno_id:
@@ -153,7 +160,7 @@ def get_ocr_frames(view, mmif):
153160
frames[i].update(target, mmif)
154161
else:
155162
frames[i] = frame
156-
163+
157164
else:
158165
for annotation in view.get_annotations():
159166
frame = OCRFrame(annotation, mmif)
@@ -185,6 +192,7 @@ def paginate(frames_list):
185192

186193
return {i: page for (i, page) in enumerate(pages)}
187194

195+
188196
def make_image_directory(mmif_id):
189197
# Make path for temp OCR image files or clear image files if it exists
190198
path = cache.get_cache_root() / mmif_id / "img"
@@ -232,10 +240,14 @@ def is_duplicate_image(prev_frame, frame, cv2_vid):
232240
img2_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
233241

234242
# Calculate the histogram and normalize it
235-
hist_img1 = cv2.calcHist([img1_hsv], [0, 1], None, [180, 256], [0, 180, 0, 256])
236-
cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);
237-
hist_img2 = cv2.calcHist([img2_hsv], [0, 1], None, [180, 256], [0, 180, 0, 256])
238-
cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);
243+
hist_img1 = cv2.calcHist([img1_hsv], [0, 1], None, [
244+
180, 256], [0, 180, 0, 256])
245+
cv2.normalize(hist_img1, hist_img1, alpha=0,
246+
beta=1, norm_type=cv2.NORM_MINMAX)
247+
hist_img2 = cv2.calcHist([img2_hsv], [0, 1], None, [
248+
180, 256], [0, 180, 0, 256])
249+
cv2.normalize(hist_img2, hist_img2, alpha=0,
250+
beta=1, norm_type=cv2.NORM_MINMAX)
239251

240252
# Find the metric value
241253
metric_val = cv2.compareHist(hist_img1, hist_img2, cv2.HISTCMP_CHISQR)

0 commit comments

Comments
 (0)