8
8
import html
9
9
10
10
from flask import render_template , session
11
- # from utils import app
11
+ from mmif . utils . video_document_helper import convert_timepoint , convert_timeframe
12
12
13
13
14
14
class OCRFrame ():
15
15
"""Class representing an (aligned or otherwise) set of OCR annotations for a single frame"""
16
16
17
- def __init__ (self , anno , fps ):
17
+ def __init__ (self , anno , mmif ):
18
18
self .text = []
19
- self .fps = fps
20
19
self .boxes = []
21
20
self .anno_ids = []
22
21
self .timestamp = None
@@ -26,24 +25,26 @@ def __init__(self, anno, fps):
26
25
self .range = None
27
26
self .timestamp_range = None
28
27
self .sec_range = None
28
+ self .frametype = None
29
+ self .boxtypes = []
29
30
30
- self .update (anno )
31
+ self .update (anno , mmif )
31
32
32
- def update (self , anno ):
33
+ def update (self , anno , mmif ):
33
34
if anno .at_type .shortname == "BoundingBox" :
34
- self .add_bounding_box (anno )
35
+ self .add_bounding_box (anno , mmif )
35
36
36
37
elif anno .at_type .shortname == "TimeFrame" :
37
- self .add_timeframe (anno )
38
+ self .add_timeframe (anno , mmif )
38
39
39
40
elif anno .at_type .shortname == "TextDocument" :
40
41
t = anno .properties .get ("text_value" ) or anno .properties .get ("text" ).value
41
42
if t :
42
43
self .text .append (re .sub (r'([\\\/\|\"\'])' , r'\1 ' , t ))
43
44
44
- def add_bounding_box (self , anno ):
45
- self .frame_num = anno . properties . get (
46
- "frame" ) or anno . properties . get ( "timePoint " )
45
+ def add_bounding_box (self , anno , mmif ):
46
+ self .frame_num = convert_timepoint ( mmif , anno , "frames" )
47
+ self . secs = convert_timepoint ( mmif , anno , "seconds " )
47
48
box_id = anno .properties ["id" ]
48
49
boxType = anno .properties ["boxType" ]
49
50
coordinates = anno .properties ["coordinates" ]
@@ -54,18 +55,19 @@ def add_bounding_box(self, anno):
54
55
box = [box_id , boxType , [x , y , w , h ]]
55
56
self .boxes .append (box )
56
57
self .anno_ids .append (box_id )
57
- if self .fps :
58
- secs = int (self .frame_num / self .fps )
59
- self .timestamp = str (datetime .timedelta (seconds = secs ))
60
- self .secs = secs
58
+ self .timestamp = str (datetime .timedelta (seconds = self .secs ))
59
+ if anno .properties .get ("boxType" ) and anno .properties .get ("boxType" ) not in self .boxtypes :
60
+ self .boxtypes .append (anno .properties .get ("boxType" ))
61
61
62
- def add_timeframe (self , anno ):
63
- start , end = anno .properties .get ('start' ), anno .properties .get ('end' )
62
+
63
+ def add_timeframe (self , anno , mmif ):
64
+ start , end = convert_timeframe (mmif , anno , "frames" )
65
+ start_secs , end_secs = convert_timeframe (mmif , anno , "seconds" )
64
66
self .range = (start , end )
65
- if self .fps :
66
- start_secs , end_secs = int ( start / self . fps ), int ( end / self . fps )
67
- self . timestamp_range = ( str ( datetime . timedelta ( seconds = start_secs )), str ( datetime . timedelta ( seconds = end_secs )))
68
- self .sec_range = ( start_secs , end_secs )
67
+ self .timestamp_range = ( str ( datetime . timedelta ( seconds = start_secs )), str ( datetime . timedelta ( seconds = end_secs )))
68
+ self . sec_range = ( start_secs , end_secs )
69
+ if anno . properties . get ( "frameType" ):
70
+ self .frametype = anno . properties . get ( "frameType" )
69
71
70
72
71
73
def find_annotation (anno_id , view , mmif ):
@@ -84,22 +86,23 @@ def get_ocr_frames(view, mmif, fps):
84
86
for alignment in view .get_annotations (full_alignment_type [0 ]):
85
87
source = find_annotation (alignment .properties ["source" ], view , mmif )
86
88
target = find_annotation (alignment .properties ["target" ], view , mmif )
87
- frame = OCRFrame (source , fps )
89
+
90
+ frame = OCRFrame (source , mmif )
88
91
i = frame .frame_num if frame .frame_num is not None else frame .range
89
92
if i in frames .keys ():
90
- frames [i ].update (source )
91
- frames [i ].update (target )
93
+ frames [i ].update (source , mmif )
94
+ frames [i ].update (target , mmif )
92
95
else :
93
- frame .update (target )
96
+ frame .update (target , mmif )
94
97
frames [i ] = frame
95
98
else :
96
99
for annotation in view .get_annotations ():
97
- frame = OCRFrame (annotation , fps )
100
+ frame = OCRFrame (annotation , mmif )
98
101
i = frame .frame_num if frame .frame_num is not None else frame .range
99
102
if i is None :
100
103
continue
101
104
if i in frames .keys ():
102
- frames [i ].update (annotation )
105
+ frames [i ].update (annotation , mmif )
103
106
else :
104
107
frames [i ] = frame
105
108
return frames
@@ -128,17 +131,26 @@ def render_ocr(vid_path, view_id, page_number):
128
131
f = open (session [f"{ view_id } -page-file" ])
129
132
frames_pages = json .load (f )
130
133
page = frames_pages [str (page_number )]
134
+ prev_frame_cap = None
131
135
for frame_num , frame in page :
132
136
# If index is range instead of frame...
133
137
if frame .get ("range" ):
134
138
frame_num = (int (frame ["range" ][0 ]) + int (frame ["range" ][1 ])) / 2
135
139
cv2_vid .set (1 , frame_num )
136
140
_ , frame_cap = cv2_vid .read ()
141
+ if frame_cap is None :
142
+ raise FileNotFoundError (f"Video file { vid_path } not found!" )
143
+
144
+ # Double check histogram similarity of "repeat" frames -- if they're significantly different, un-mark as repeat
145
+ if prev_frame_cap is not None and frame ["repeat" ] and not is_duplicate_image (prev_frame_cap , frame_cap , cv2_vid ):
146
+ frame ["repeat" ] = False
147
+
137
148
with tempfile .NamedTemporaryFile (
138
149
prefix = str (pathlib .Path (__file__ ).parent / 'static' / 'tmp' ), suffix = ".jpg" , delete = False ) as tf :
139
150
cv2 .imwrite (tf .name , frame_cap )
140
151
# "id" is just the name of the temp image file
141
152
frame ["id" ] = pathlib .Path (tf .name ).name
153
+ prev_frame_cap = frame_cap
142
154
143
155
return render_template ('ocr.html' ,
144
156
vid_path = vid_path ,
@@ -148,48 +160,83 @@ def render_ocr(vid_path, view_id, page_number):
148
160
page_number = str (page_number ))
149
161
150
162
151
- def find_duplicates (frames_list ):
163
+ def find_duplicates (frames_list , cv2_vid ):
152
164
"""Find duplicate frames"""
153
165
prev_frame = None
154
166
for frame_num , frame in frames_list :
155
- if is_duplicate_ocr_frame (frame , prev_frame ):
167
+ # Frame is timeframe annotation
168
+ if type (frame_num ) != int :
169
+ continue
170
+ if is_duplicate_ocr_frame (prev_frame , frame ):
156
171
frame ["repeat" ] = True
157
172
prev_frame = frame
158
173
return frames_list
159
174
160
175
161
- def is_duplicate_ocr_frame (frame , prev_frame ):
162
- if prev_frame :
163
- # Check Boundingbox distances
164
- rounded_prev = round_boxes (prev_frame ["boxes" ])
165
- for box in round_boxes (frame ["boxes" ]):
166
- if box in rounded_prev and frame ["secs" ]- prev_frame ["secs" ] < 5 :
167
- return True
176
+ def is_duplicate_ocr_frame (prev_frame , frame ):
177
+ if not prev_frame :
178
+ return False
179
+ if prev_frame .get ("boxtypes" ) != frame .get ("boxtypes" ):
180
+ return False
181
+ if abs (len (prev_frame .get ("boxes" ))- len (frame .get ("boxes" ))) > 3 :
182
+ return False
183
+ # Check Boundingbox distances
184
+ rounded_prev = round_boxes (prev_frame .get ("boxes" ))
185
+ for box in round_boxes (frame .get ("boxes" )):
186
+ if box in rounded_prev and frame ["secs" ]- prev_frame ["secs" ] < 10 :
187
+ return True
188
+ # Check overlap in text
189
+ prev_text , text = set (prev_frame .get ("text" )), set (frame .get ("text" ))
190
+ if prev_text and text and prev_text .intersection (text ):
191
+ return True
168
192
return False
169
193
194
+ def is_duplicate_image (prev_frame , frame , cv2_vid ):
195
+
196
+ # Convert it to HSV
197
+ img1_hsv = cv2 .cvtColor (prev_frame , cv2 .COLOR_BGR2HSV )
198
+ img2_hsv = cv2 .cvtColor (frame , cv2 .COLOR_BGR2HSV )
199
+
200
+ # Calculate the histogram and normalize it
201
+ hist_img1 = cv2 .calcHist ([img1_hsv ], [0 ,1 ], None , [180 ,256 ], [0 ,180 ,0 ,256 ])
202
+ cv2 .normalize (hist_img1 , hist_img1 , alpha = 0 , beta = 1 , norm_type = cv2 .NORM_MINMAX );
203
+ hist_img2 = cv2 .calcHist ([img2_hsv ], [0 ,1 ], None , [180 ,256 ], [0 ,180 ,0 ,256 ])
204
+ cv2 .normalize (hist_img2 , hist_img2 , alpha = 0 , beta = 1 , norm_type = cv2 .NORM_MINMAX );
205
+
206
+ # Find the metric value
207
+ metric_val = cv2 .compareHist (hist_img1 , hist_img2 , cv2 .HISTCMP_CHISQR )
208
+ return metric_val < 50
209
+
210
+
170
211
171
212
def round_boxes (boxes ):
172
213
# To account for jittery bounding boxes in OCR annotations
173
214
rounded_boxes = []
174
215
for box in boxes :
175
216
rounded_box = []
176
217
for coord in box [2 ]:
177
- rounded_box .append (round (coord / 10 ) * 10 )
218
+ rounded_box .append (round (coord / 100 ) * 100 )
178
219
rounded_boxes .append (rounded_box )
179
220
return rounded_boxes
180
221
181
222
182
223
def get_ocr_views (mmif ):
183
- """Return OCR views, which have TextDocument, BoundingBox, and Alignment annotations """
224
+ """Returns all CV views, which contain timeframes or bounding boxes """
184
225
views = []
185
- ocr_apps = ["east-textdetection " , "tesseract" , "chyron-text-recognition" , "slatedetection" , "barsdetection" , "parseq-wrapper " ]
226
+ required_types = ["TimeFrame " , "BoundingBox " ]
186
227
for view in mmif .views :
187
- if any ([ocr_app in view .metadata .app for ocr_app in ocr_apps ]):
188
- views .append (view )
228
+ for anno_type , anno in view .metadata .contains .items ():
229
+ # Annotation belongs to a CV view if it is a TimeFrame/BB and it refers to a VideoDocument
230
+ if anno_type .shortname in required_types and mmif .get_document_by_id (anno ["document" ]).at_type .shortname == "VideoDocument" :
231
+ views .append (view )
232
+ continue
233
+ # TODO: Couldn't find a simple way to show if an alignment view is a CV/Frames-type view
234
+ elif "parseq" in view .metadata .app :
235
+ views .append (view )
236
+ continue
189
237
return views
190
238
191
239
def save_json (dict , view_id ):
192
- # jsonified_pages = json.dumps(dict)
193
240
with tempfile .NamedTemporaryFile (prefix = str (pathlib .Path (__file__ ).parent / 'static' / 'tmp' ), suffix = ".json" , delete = False ) as tf :
194
241
pages_json = open (tf .name , "w" )
195
242
json .dump (dict , pages_json )
0 commit comments