Skip to content

Commit c1b4c9a

Browse files
committed
Merge branch 'develop'
2 parents 9b3ae27 + b222a7c commit c1b4c9a

File tree

7 files changed

+185
-59
lines changed

7 files changed

+185
-59
lines changed

app.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,39 @@
22
import pathlib
33
import sys
44
import secrets
5+
import json
6+
import html
57

6-
from flask import request, render_template, flash, redirect, send_from_directory
8+
from flask import request, render_template, flash, redirect, send_from_directory, session
79
from werkzeug.utils import secure_filename
810
from mmif.serialize import Mmif
911

10-
from utils import app, render_ocr, get_media, prep_annotations
12+
from utils import app, render_ocr, get_media, prep_annotations, prepare_ocr_visualization
1113

1214
@app.route('/')
1315
def index():
1416
return render_template('index.html')
1517

18+
19+
@app.route('/ocr', methods=['POST'])
20+
def ocr():
21+
try:
22+
data = dict(request.json)
23+
mmif_str = open(session["mmif_file"]).read()
24+
mmif = Mmif(mmif_str)
25+
ocr_view = mmif.get_view_by_id(data["view_id"])
26+
return prepare_ocr_visualization(mmif, ocr_view)
27+
except Exception as e:
28+
return f'<p class="error">{e}</h1>'
29+
30+
1631
@app.route('/ocrpage', methods=['POST'])
1732
def ocrpage():
1833
data = request.json
1934
try:
20-
page_number = data["page_number"]
21-
view_id = data["view_id"]
22-
return (render_ocr(data['vid_path'], data["view_id"], page_number))
35+
return (render_ocr(data['vid_path'], data["view_id"], data["page_number"]))
2336
except Exception as e:
2437
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
25-
pass
2638

2739
@app.route('/upload', methods=['GET', 'POST'])
2840
def upload():
@@ -47,6 +59,7 @@ def upload():
4759
filename = secure_filename(file.filename)
4860
file.save(os.path.join('temp', filename))
4961
with open("temp/" + filename) as fh:
62+
session["mmif_file"] = fh.name
5063
mmif_str = fh.read()
5164
return render_mmif(mmif_str)
5265
return render_template('upload.html')

ocr.py

+88-41
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,14 @@
88
import html
99

1010
from flask import render_template, session
11-
# from utils import app
11+
from mmif.utils.video_document_helper import convert_timepoint, convert_timeframe
1212

1313

1414
class OCRFrame():
1515
"""Class representing an (aligned or otherwise) set of OCR annotations for a single frame"""
1616

17-
def __init__(self, anno, fps):
17+
def __init__(self, anno, mmif):
1818
self.text = []
19-
self.fps = fps
2019
self.boxes = []
2120
self.anno_ids = []
2221
self.timestamp = None
@@ -26,24 +25,26 @@ def __init__(self, anno, fps):
2625
self.range = None
2726
self.timestamp_range = None
2827
self.sec_range = None
28+
self.frametype = None
29+
self.boxtypes = []
2930

30-
self.update(anno)
31+
self.update(anno, mmif)
3132

32-
def update(self, anno):
33+
def update(self, anno, mmif):
3334
if anno.at_type.shortname == "BoundingBox":
34-
self.add_bounding_box(anno)
35+
self.add_bounding_box(anno, mmif)
3536

3637
elif anno.at_type.shortname == "TimeFrame":
37-
self.add_timeframe(anno)
38+
self.add_timeframe(anno, mmif)
3839

3940
elif anno.at_type.shortname == "TextDocument":
4041
t = anno.properties.get("text_value") or anno.properties.get("text").value
4142
if t:
4243
self.text.append(re.sub(r'([\\\/\|\"\'])', r'\1 ', t))
4344

44-
def add_bounding_box(self, anno):
45-
self.frame_num = anno.properties.get(
46-
"frame") or anno.properties.get("timePoint")
45+
def add_bounding_box(self, anno, mmif):
46+
self.frame_num = convert_timepoint(mmif, anno, "frames")
47+
self.secs = convert_timepoint(mmif, anno, "seconds")
4748
box_id = anno.properties["id"]
4849
boxType = anno.properties["boxType"]
4950
coordinates = anno.properties["coordinates"]
@@ -54,18 +55,19 @@ def add_bounding_box(self, anno):
5455
box = [box_id, boxType, [x, y, w, h]]
5556
self.boxes.append(box)
5657
self.anno_ids.append(box_id)
57-
if self.fps:
58-
secs = int(self.frame_num/self.fps)
59-
self.timestamp = str(datetime.timedelta(seconds=secs))
60-
self.secs = secs
58+
self.timestamp = str(datetime.timedelta(seconds=self.secs))
59+
if anno.properties.get("boxType") and anno.properties.get("boxType") not in self.boxtypes:
60+
self.boxtypes.append(anno.properties.get("boxType"))
6161

62-
def add_timeframe(self, anno):
63-
start, end = anno.properties.get('start'), anno.properties.get('end')
62+
63+
def add_timeframe(self, anno, mmif):
64+
start, end = convert_timeframe(mmif, anno, "frames")
65+
start_secs, end_secs = convert_timeframe(mmif, anno, "seconds")
6466
self.range = (start, end)
65-
if self.fps:
66-
start_secs, end_secs = int(start/self.fps), int(end/self.fps)
67-
self.timestamp_range = (str(datetime.timedelta(seconds=start_secs)), str(datetime.timedelta(seconds=end_secs)))
68-
self.sec_range = (start_secs, end_secs)
67+
self.timestamp_range = (str(datetime.timedelta(seconds=start_secs)), str(datetime.timedelta(seconds=end_secs)))
68+
self.sec_range = (start_secs, end_secs)
69+
if anno.properties.get("frameType"):
70+
self.frametype = anno.properties.get("frameType")
6971

7072

7173
def find_annotation(anno_id, view, mmif):
@@ -84,22 +86,23 @@ def get_ocr_frames(view, mmif, fps):
8486
for alignment in view.get_annotations(full_alignment_type[0]):
8587
source = find_annotation(alignment.properties["source"], view, mmif)
8688
target = find_annotation(alignment.properties["target"], view, mmif)
87-
frame = OCRFrame(source, fps)
89+
90+
frame = OCRFrame(source, mmif)
8891
i = frame.frame_num if frame.frame_num is not None else frame.range
8992
if i in frames.keys():
90-
frames[i].update(source)
91-
frames[i].update(target)
93+
frames[i].update(source, mmif)
94+
frames[i].update(target, mmif)
9295
else:
93-
frame.update(target)
96+
frame.update(target, mmif)
9497
frames[i] = frame
9598
else:
9699
for annotation in view.get_annotations():
97-
frame = OCRFrame(annotation, fps)
100+
frame = OCRFrame(annotation, mmif)
98101
i = frame.frame_num if frame.frame_num is not None else frame.range
99102
if i is None:
100103
continue
101104
if i in frames.keys():
102-
frames[i].update(annotation)
105+
frames[i].update(annotation, mmif)
103106
else:
104107
frames[i] = frame
105108
return frames
@@ -128,17 +131,26 @@ def render_ocr(vid_path, view_id, page_number):
128131
f = open(session[f"{view_id}-page-file"])
129132
frames_pages = json.load(f)
130133
page = frames_pages[str(page_number)]
134+
prev_frame_cap = None
131135
for frame_num, frame in page:
132136
# If index is range instead of frame...
133137
if frame.get("range"):
134138
frame_num = (int(frame["range"][0]) + int(frame["range"][1])) / 2
135139
cv2_vid.set(1, frame_num)
136140
_, frame_cap = cv2_vid.read()
141+
if frame_cap is None:
142+
raise FileNotFoundError(f"Video file {vid_path} not found!")
143+
144+
# Double check histogram similarity of "repeat" frames -- if they're significantly different, un-mark as repeat
145+
if prev_frame_cap is not None and frame["repeat"] and not is_duplicate_image(prev_frame_cap, frame_cap, cv2_vid):
146+
frame["repeat"] = False
147+
137148
with tempfile.NamedTemporaryFile(
138149
prefix=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix=".jpg", delete=False) as tf:
139150
cv2.imwrite(tf.name, frame_cap)
140151
# "id" is just the name of the temp image file
141152
frame["id"] = pathlib.Path(tf.name).name
153+
prev_frame_cap = frame_cap
142154

143155
return render_template('ocr.html',
144156
vid_path=vid_path,
@@ -148,48 +160,83 @@ def render_ocr(vid_path, view_id, page_number):
148160
page_number=str(page_number))
149161

150162

151-
def find_duplicates(frames_list):
163+
def find_duplicates(frames_list, cv2_vid):
152164
"""Find duplicate frames"""
153165
prev_frame = None
154166
for frame_num, frame in frames_list:
155-
if is_duplicate_ocr_frame(frame, prev_frame):
167+
# Frame is timeframe annotation
168+
if type(frame_num) != int:
169+
continue
170+
if is_duplicate_ocr_frame(prev_frame, frame):
156171
frame["repeat"] = True
157172
prev_frame = frame
158173
return frames_list
159174

160175

161-
def is_duplicate_ocr_frame(frame, prev_frame):
162-
if prev_frame:
163-
# Check Boundingbox distances
164-
rounded_prev = round_boxes(prev_frame["boxes"])
165-
for box in round_boxes(frame["boxes"]):
166-
if box in rounded_prev and frame["secs"]-prev_frame["secs"] < 5:
167-
return True
176+
def is_duplicate_ocr_frame(prev_frame, frame):
177+
if not prev_frame:
178+
return False
179+
if prev_frame.get("boxtypes") != frame.get("boxtypes"):
180+
return False
181+
if abs(len(prev_frame.get("boxes"))-len(frame.get("boxes"))) > 3:
182+
return False
183+
# Check Boundingbox distances
184+
rounded_prev = round_boxes(prev_frame.get("boxes"))
185+
for box in round_boxes(frame.get("boxes")):
186+
if box in rounded_prev and frame["secs"]-prev_frame["secs"] < 10:
187+
return True
188+
# Check overlap in text
189+
prev_text, text = set(prev_frame.get("text")), set(frame.get("text"))
190+
if prev_text and text and prev_text.intersection(text):
191+
return True
168192
return False
169193

194+
def is_duplicate_image(prev_frame, frame, cv2_vid):
195+
196+
# Convert it to HSV
197+
img1_hsv = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2HSV)
198+
img2_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
199+
200+
# Calculate the histogram and normalize it
201+
hist_img1 = cv2.calcHist([img1_hsv], [0,1], None, [180,256], [0,180,0,256])
202+
cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);
203+
hist_img2 = cv2.calcHist([img2_hsv], [0,1], None, [180,256], [0,180,0,256])
204+
cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);
205+
206+
# Find the metric value
207+
metric_val = cv2.compareHist(hist_img1, hist_img2, cv2.HISTCMP_CHISQR)
208+
return metric_val < 50
209+
210+
170211

171212
def round_boxes(boxes):
172213
# To account for jittery bounding boxes in OCR annotations
173214
rounded_boxes = []
174215
for box in boxes:
175216
rounded_box = []
176217
for coord in box[2]:
177-
rounded_box.append(round(coord/10)*10)
218+
rounded_box.append(round(coord/100)*100)
178219
rounded_boxes.append(rounded_box)
179220
return rounded_boxes
180221

181222

182223
def get_ocr_views(mmif):
183-
"""Return OCR views, which have TextDocument, BoundingBox, and Alignment annotations"""
224+
"""Returns all CV views, which contain timeframes or bounding boxes"""
184225
views = []
185-
ocr_apps = ["east-textdetection", "tesseract", "chyron-text-recognition", "slatedetection", "barsdetection", "parseq-wrapper"]
226+
required_types = ["TimeFrame", "BoundingBox"]
186227
for view in mmif.views:
187-
if any([ocr_app in view.metadata.app for ocr_app in ocr_apps]):
188-
views.append(view)
228+
for anno_type, anno in view.metadata.contains.items():
229+
# Annotation belongs to a CV view if it is a TimeFrame/BB and it refers to a VideoDocument
230+
if anno_type.shortname in required_types and mmif.get_document_by_id(anno["document"]).at_type.shortname == "VideoDocument":
231+
views.append(view)
232+
continue
233+
# TODO: Couldn't find a simple way to show if an alignment view is a CV/Frames-type view
234+
elif "parseq" in view.metadata.app:
235+
views.append(view)
236+
continue
189237
return views
190238

191239
def save_json(dict, view_id):
192-
# jsonified_pages = json.dumps(dict)
193240
with tempfile.NamedTemporaryFile(prefix=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix=".json", delete=False) as tf:
194241
pages_json = open(tf.name, "w")
195242
json.dump(dict, pages_json)

requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
spacy==2.3.2
2-
clams-python==1.0.*
2+
mmif-python==1.0.8
3+
mmif-python[utils]==1.0.8
34
flask-session
45
opencv-python==4.*

templates/ocr.html

+11-4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
<div id="ocr_tab_{{view_id}}">
23
{% for frame_num, frame in page %}
34
{% set filename = frame["id"] %}
@@ -11,19 +12,25 @@
1112
{% include "image.html" %}
1213
<div>
1314
<h4>
14-
{% if frame["frame_num"] %}
15+
{% if frame["frame_num"] is not none %}
1516
frame: {{frame["frame_num"]}}<br>
1617
{% endif %}
17-
{% if frame["range"] %}
18+
{% if frame["range"] is not none %}
1819
frames: {{frame["range"][0]}} - {{frame["range"][1]}}<br>
1920
{% endif %}
20-
{% if frame["timestamp"] %}
21+
{% if frame["timestamp"] is not none %}
2122
timestamp: <a class="timestamp" onclick="SetCurTime('{{secs}}')">{{frame["timestamp"]}}</a><br>
2223
{% endif %}
23-
{% if frame["timestamp_range"] %}
24+
{% if frame["timestamp_range"] is not none %}
2425
{% set sec_range = frame["sec_range"] %}
2526
timestamps: <a class="timestamp" onclick="SetCurTime('{{sec_range[0]}}')">{{frame["timestamp_range"][0]}}</a> - <a class="timestamp" onclick="SetCurTime('{{sec_range[1]}}')">{{frame["timestamp_range"][1]}}</a><br>
2627
{% endif %}
28+
{% if frame["frametype"] is not none %}
29+
frame type: {{frame["frametype"]}}<br>
30+
{% endif %}
31+
{% if frame["boxtypes"] %}
32+
box types: {{frame["boxtypes"]}}<br>
33+
{% endif %}
2734

2835
{% if frame["text"] %}
2936
text detected:<br>

templates/player.html

+31
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,37 @@
4141
padding: 10px;
4242
border-radius: 5px;
4343
}
44+
45+
/* Loading spinner styles */
46+
47+
.loader-container {
48+
display: flex;
49+
justify-content: center;
50+
align-items: center;
51+
height: 50vh;
52+
}
53+
54+
.loader {
55+
border: 16px solid #f3f3f3;
56+
border-radius: 50%;
57+
border-top: 16px solid #3498db;
58+
margin-top: 20px;
59+
width: 60px;
60+
height: 60px;
61+
-webkit-animation: spin 2s linear infinite; /* Safari */
62+
animation: spin 2s linear infinite;
63+
}
64+
65+
/* Safari */
66+
@-webkit-keyframes spin {
67+
0% { -webkit-transform: rotate(0deg); }
68+
100% { -webkit-transform: rotate(360deg); }
69+
}
70+
71+
@keyframes spin {
72+
0% { transform: rotate(0deg); }
73+
100% { transform: rotate(360deg); }
74+
}
4475
</style>
4576

4677
<body>

0 commit comments

Comments
 (0)