88import html
99
1010from flask import render_template , session
11- # from utils import app
11+ from mmif . utils . video_document_helper import convert_timepoint , convert_timeframe
1212
1313
1414class OCRFrame ():
1515 """Class representing an (aligned or otherwise) set of OCR annotations for a single frame"""
1616
17- def __init__ (self , anno , fps ):
17+ def __init__ (self , anno , mmif ):
1818 self .text = []
19- self .fps = fps
2019 self .boxes = []
2120 self .anno_ids = []
2221 self .timestamp = None
@@ -26,24 +25,26 @@ def __init__(self, anno, fps):
2625 self .range = None
2726 self .timestamp_range = None
2827 self .sec_range = None
28+ self .frametype = None
29+ self .boxtypes = []
2930
30- self .update (anno )
31+ self .update (anno , mmif )
3132
32- def update (self , anno ):
33+ def update (self , anno , mmif ):
3334 if anno .at_type .shortname == "BoundingBox" :
34- self .add_bounding_box (anno )
35+ self .add_bounding_box (anno , mmif )
3536
3637 elif anno .at_type .shortname == "TimeFrame" :
37- self .add_timeframe (anno )
38+ self .add_timeframe (anno , mmif )
3839
3940 elif anno .at_type .shortname == "TextDocument" :
4041 t = anno .properties .get ("text_value" ) or anno .properties .get ("text" ).value
4142 if t :
4243 self .text .append (re .sub (r'([\\\/\|\"\'])' , r'\1 ' , t ))
4344
44- def add_bounding_box (self , anno ):
45- self .frame_num = anno . properties . get (
46- "frame" ) or anno . properties . get ( "timePoint " )
45+ def add_bounding_box (self , anno , mmif ):
46+ self .frame_num = convert_timepoint ( mmif , anno , "frames" )
47+ self . secs = convert_timepoint ( mmif , anno , "seconds " )
4748 box_id = anno .properties ["id" ]
4849 boxType = anno .properties ["boxType" ]
4950 coordinates = anno .properties ["coordinates" ]
@@ -54,18 +55,19 @@ def add_bounding_box(self, anno):
5455 box = [box_id , boxType , [x , y , w , h ]]
5556 self .boxes .append (box )
5657 self .anno_ids .append (box_id )
57- if self .fps :
58- secs = int (self .frame_num / self .fps )
59- self .timestamp = str (datetime .timedelta (seconds = secs ))
60- self .secs = secs
58+ self .timestamp = str (datetime .timedelta (seconds = self .secs ))
59+ if anno .properties .get ("boxType" ) and anno .properties .get ("boxType" ) not in self .boxtypes :
60+ self .boxtypes .append (anno .properties .get ("boxType" ))
6161
62- def add_timeframe (self , anno ):
63- start , end = anno .properties .get ('start' ), anno .properties .get ('end' )
62+
63+ def add_timeframe (self , anno , mmif ):
64+ start , end = convert_timeframe (mmif , anno , "frames" )
65+ start_secs , end_secs = convert_timeframe (mmif , anno , "seconds" )
6466 self .range = (start , end )
65- if self .fps :
66- start_secs , end_secs = int ( start / self . fps ), int ( end / self . fps )
67- self . timestamp_range = ( str ( datetime . timedelta ( seconds = start_secs )), str ( datetime . timedelta ( seconds = end_secs )))
68- self .sec_range = ( start_secs , end_secs )
67+ self .timestamp_range = ( str ( datetime . timedelta ( seconds = start_secs )), str ( datetime . timedelta ( seconds = end_secs )))
68+ self . sec_range = ( start_secs , end_secs )
69+ if anno . properties . get ( "frameType" ):
70+ self .frametype = anno . properties . get ( "frameType" )
6971
7072
7173def find_annotation (anno_id , view , mmif ):
@@ -84,22 +86,23 @@ def get_ocr_frames(view, mmif, fps):
8486 for alignment in view .get_annotations (full_alignment_type [0 ]):
8587 source = find_annotation (alignment .properties ["source" ], view , mmif )
8688 target = find_annotation (alignment .properties ["target" ], view , mmif )
87- frame = OCRFrame (source , fps )
89+
90+ frame = OCRFrame (source , mmif )
8891 i = frame .frame_num if frame .frame_num is not None else frame .range
8992 if i in frames .keys ():
90- frames [i ].update (source )
91- frames [i ].update (target )
93+ frames [i ].update (source , mmif )
94+ frames [i ].update (target , mmif )
9295 else :
93- frame .update (target )
96+ frame .update (target , mmif )
9497 frames [i ] = frame
9598 else :
9699 for annotation in view .get_annotations ():
97- frame = OCRFrame (annotation , fps )
100+ frame = OCRFrame (annotation , mmif )
98101 i = frame .frame_num if frame .frame_num is not None else frame .range
99102 if i is None :
100103 continue
101104 if i in frames .keys ():
102- frames [i ].update (annotation )
105+ frames [i ].update (annotation , mmif )
103106 else :
104107 frames [i ] = frame
105108 return frames
@@ -128,17 +131,26 @@ def render_ocr(vid_path, view_id, page_number):
128131 f = open (session [f"{ view_id } -page-file" ])
129132 frames_pages = json .load (f )
130133 page = frames_pages [str (page_number )]
134+ prev_frame_cap = None
131135 for frame_num , frame in page :
132136 # If index is range instead of frame...
133137 if frame .get ("range" ):
134138 frame_num = (int (frame ["range" ][0 ]) + int (frame ["range" ][1 ])) / 2
135139 cv2_vid .set (1 , frame_num )
136140 _ , frame_cap = cv2_vid .read ()
141+ if frame_cap is None :
142+ raise FileNotFoundError (f"Video file { vid_path } not found!" )
143+
144+ # Double check histogram similarity of "repeat" frames -- if they're significantly different, un-mark as repeat
145+ if prev_frame_cap is not None and frame ["repeat" ] and not is_duplicate_image (prev_frame_cap , frame_cap , cv2_vid ):
146+ frame ["repeat" ] = False
147+
137148 with tempfile .NamedTemporaryFile (
138149 prefix = str (pathlib .Path (__file__ ).parent / 'static' / 'tmp' ), suffix = ".jpg" , delete = False ) as tf :
139150 cv2 .imwrite (tf .name , frame_cap )
140151 # "id" is just the name of the temp image file
141152 frame ["id" ] = pathlib .Path (tf .name ).name
153+ prev_frame_cap = frame_cap
142154
143155 return render_template ('ocr.html' ,
144156 vid_path = vid_path ,
@@ -148,48 +160,83 @@ def render_ocr(vid_path, view_id, page_number):
148160 page_number = str (page_number ))
149161
150162
151- def find_duplicates (frames_list ):
163+ def find_duplicates (frames_list , cv2_vid ):
152164 """Find duplicate frames"""
153165 prev_frame = None
154166 for frame_num , frame in frames_list :
155- if is_duplicate_ocr_frame (frame , prev_frame ):
167+ # Frame is timeframe annotation
168+ if type (frame_num ) != int :
169+ continue
170+ if is_duplicate_ocr_frame (prev_frame , frame ):
156171 frame ["repeat" ] = True
157172 prev_frame = frame
158173 return frames_list
159174
160175
161- def is_duplicate_ocr_frame (frame , prev_frame ):
162- if prev_frame :
163- # Check Boundingbox distances
164- rounded_prev = round_boxes (prev_frame ["boxes" ])
165- for box in round_boxes (frame ["boxes" ]):
166- if box in rounded_prev and frame ["secs" ]- prev_frame ["secs" ] < 5 :
167- return True
176+ def is_duplicate_ocr_frame (prev_frame , frame ):
177+ if not prev_frame :
178+ return False
179+ if prev_frame .get ("boxtypes" ) != frame .get ("boxtypes" ):
180+ return False
181+ if abs (len (prev_frame .get ("boxes" ))- len (frame .get ("boxes" ))) > 3 :
182+ return False
183+ # Check Boundingbox distances
184+ rounded_prev = round_boxes (prev_frame .get ("boxes" ))
185+ for box in round_boxes (frame .get ("boxes" )):
186+ if box in rounded_prev and frame ["secs" ]- prev_frame ["secs" ] < 10 :
187+ return True
188+ # Check overlap in text
189+ prev_text , text = set (prev_frame .get ("text" )), set (frame .get ("text" ))
190+ if prev_text and text and prev_text .intersection (text ):
191+ return True
168192 return False
169193
194+ def is_duplicate_image (prev_frame , frame , cv2_vid ):
195+
196+ # Convert it to HSV
197+ img1_hsv = cv2 .cvtColor (prev_frame , cv2 .COLOR_BGR2HSV )
198+ img2_hsv = cv2 .cvtColor (frame , cv2 .COLOR_BGR2HSV )
199+
200+ # Calculate the histogram and normalize it
201+ hist_img1 = cv2 .calcHist ([img1_hsv ], [0 ,1 ], None , [180 ,256 ], [0 ,180 ,0 ,256 ])
202+ cv2 .normalize (hist_img1 , hist_img1 , alpha = 0 , beta = 1 , norm_type = cv2 .NORM_MINMAX );
203+ hist_img2 = cv2 .calcHist ([img2_hsv ], [0 ,1 ], None , [180 ,256 ], [0 ,180 ,0 ,256 ])
204+ cv2 .normalize (hist_img2 , hist_img2 , alpha = 0 , beta = 1 , norm_type = cv2 .NORM_MINMAX );
205+
206+ # Find the metric value
207+ metric_val = cv2 .compareHist (hist_img1 , hist_img2 , cv2 .HISTCMP_CHISQR )
208+ return metric_val < 50
209+
210+
170211
171212def round_boxes (boxes ):
172213 # To account for jittery bounding boxes in OCR annotations
173214 rounded_boxes = []
174215 for box in boxes :
175216 rounded_box = []
176217 for coord in box [2 ]:
177- rounded_box .append (round (coord / 10 ) * 10 )
218+ rounded_box .append (round (coord / 100 ) * 100 )
178219 rounded_boxes .append (rounded_box )
179220 return rounded_boxes
180221
181222
182223def get_ocr_views (mmif ):
183- """Return OCR views, which have TextDocument, BoundingBox, and Alignment annotations """
224+ """Returns all CV views, which contain timeframes or bounding boxes """
184225 views = []
185- ocr_apps = ["east-textdetection " , "tesseract" , "chyron-text-recognition" , "slatedetection" , "barsdetection" , "parseq-wrapper " ]
226+ required_types = ["TimeFrame " , "BoundingBox " ]
186227 for view in mmif .views :
187- if any ([ocr_app in view .metadata .app for ocr_app in ocr_apps ]):
188- views .append (view )
228+ for anno_type , anno in view .metadata .contains .items ():
229+ # Annotation belongs to a CV view if it is a TimeFrame/BB and it refers to a VideoDocument
230+ if anno_type .shortname in required_types and mmif .get_document_by_id (anno ["document" ]).at_type .shortname == "VideoDocument" :
231+ views .append (view )
232+ continue
233+ # TODO: Couldn't find a simple way to show if an alignment view is a CV/Frames-type view
234+ elif "parseq" in view .metadata .app :
235+ views .append (view )
236+ continue
189237 return views
190238
191239def save_json (dict , view_id ):
192- # jsonified_pages = json.dumps(dict)
193240 with tempfile .NamedTemporaryFile (prefix = str (pathlib .Path (__file__ ).parent / 'static' / 'tmp' ), suffix = ".json" , delete = False ) as tf :
194241 pages_json = open (tf .name , "w" )
195242 json .dump (dict , pages_json )
0 commit comments