8
8
import os , shutil
9
9
10
10
from flask import render_template
11
+ from mmif import AnnotationTypes , DocumentTypes , Mmif
11
12
from mmif .utils .video_document_helper import convert_timepoint , convert_timeframe
12
13
13
14
import cache
@@ -35,27 +36,48 @@ def __init__(self, anno, mmif):
35
36
self .update (anno , mmif )
36
37
37
38
def update (self , anno , mmif ):
38
- if anno .at_type .shortname == "BoundingBox" :
39
+
40
+ if anno .at_type == AnnotationTypes .BoundingBox :
39
41
self .add_bounding_box (anno , mmif )
40
42
41
- elif anno .at_type . shortname == " TimeFrame" :
43
+ elif anno .at_type == AnnotationTypes . TimeFrame :
42
44
self .add_timeframe (anno , mmif )
43
45
44
- elif anno .at_type .shortname == "TextDocument" :
45
- t = anno .properties .get ("text_value" ) or anno .properties .get ("text" ).value
46
- if t :
47
- self .text .append (re .sub (r'([\\\/\|\"\'])' , r'\1 ' , t ))
48
-
49
- def add_bounding_box (self , anno , mmif ):
50
- self .frame_num = convert_timepoint (mmif , anno , "frames" )
51
- self .secs = convert_timepoint (mmif , anno , "seconds" )
52
- box_id = anno .properties ["id" ]
53
- boxType = anno .properties ["boxType" ]
54
- coordinates = anno .properties ["coordinates" ]
46
+ elif anno .at_type == AnnotationTypes .TimePoint :
47
+ self .add_timepoint (anno , mmif )
48
+
49
+ elif anno .at_type == DocumentTypes .TextDocument :
50
+ self .add_text_document (anno )
51
+
52
+ elif anno .at_type .shortname == "Paragraph" :
53
+ view = mmif .get_view_by_id (anno .parent )
54
+ text_anno = mmif [anno .properties .get ("document" )]
55
+ self .add_text_document (text_anno )
56
+
57
+ def add_bounding_box (self , anno , mmif : Mmif ):
58
+ timepoint_anno = None
59
+ if "timePoint" in anno .properties :
60
+ timepoint_anno = mmif [anno .get ("timePoint" )]
61
+
62
+ else :
63
+ for alignment_anns in mmif .get_alignments (AnnotationTypes .BoundingBox , AnnotationTypes .TimePoint ).values ():
64
+ for alignment_ann in alignment_anns :
65
+ if alignment_ann .get ('source' ) == anno .id :
66
+ timepoint_anno = mmif [alignment_ann .get ('target' )]
67
+ break
68
+ elif alignment_ann .get ('target' ) == anno .id :
69
+ timepoint_anno = mmif [alignment_ann .get ('source' )]
70
+ break
71
+ if timepoint_anno :
72
+ self .add_timepoint (timepoint_anno , mmif , skip_if_view_has_frames = False )
73
+
74
+ box_id = anno .get ("id" )
75
+ boxType = anno .get ("boxType" )
76
+ coordinates = anno .get ("coordinates" )
55
77
x = coordinates [0 ][0 ]
56
78
y = coordinates [0 ][1 ]
57
- w = coordinates [3 ][0 ] - x
58
- h = coordinates [3 ][1 ] - y
79
+ w = coordinates [1 ][0 ] - x
80
+ h = coordinates [1 ][1 ] - y
59
81
box = [box_id , boxType , [x , y , w , h ]]
60
82
self .boxes .append (box )
61
83
self .anno_ids .append (box_id )
@@ -64,40 +86,70 @@ def add_bounding_box(self, anno, mmif):
64
86
self .boxtypes .append (anno .properties .get ("boxType" ))
65
87
66
88
def add_timeframe (self , anno , mmif ):
67
- start , end = convert_timeframe (mmif , anno , "frames" )
68
- start_secs , end_secs = convert_timeframe (mmif , anno , "seconds" )
89
+ # If annotation has multiple targets, pick the first and last as start and end
90
+ if "targets" in anno .properties :
91
+ start_id , end_id = anno .properties .get ("targets" )[0 ], anno .properties .get ("targets" )[- 1 ]
92
+ anno_parent = mmif .get_view_by_id (anno .parent )
93
+ start_anno , end_anno = mmif [start_id ], mmif [end_id ]
94
+ start = convert_timepoint (mmif , start_anno , "frames" )
95
+ end = convert_timepoint (mmif , end_anno , "frames" )
96
+ start_secs = convert_timepoint (mmif , start_anno , "seconds" )
97
+ end_secs = convert_timepoint (mmif , end_anno , "seconds" )
98
+ else :
99
+ start , end = convert_timeframe (mmif , anno , "frames" )
100
+ start_secs , end_secs = convert_timeframe (mmif , anno , "seconds" )
69
101
self .range = (start , end )
70
102
self .timestamp_range = (str (datetime .timedelta (seconds = start_secs )), str (datetime .timedelta (seconds = end_secs )))
71
103
self .sec_range = (start_secs , end_secs )
72
104
if anno .properties .get ("frameType" ):
73
- self .frametype = anno .properties .get ("frameType" )
74
-
75
-
76
- def find_annotation (anno_id , view , mmif ):
77
- if mmif .id_delimiter in anno_id :
78
- view_id , anno_id = anno_id .split (mmif .id_delimiter )
79
- view = mmif .get_view_by_id (view_id )
80
- return view .get_annotation_by_id (anno_id )
81
-
82
-
83
- def get_ocr_frames (view , mmif , fps ):
105
+ self .frametype = str (anno .properties .get ("frameType" ))
106
+ elif anno .properties .get ("label" ):
107
+ self .frametype = str (anno .properties .get ("label" ))
108
+
109
+ def add_timepoint (self , anno , mmif , skip_if_view_has_frames = True ):
110
+ parent = mmif .get_view_by_id (anno .parent )
111
+ other_annotations = [k for k in parent .metadata .contains .keys () if k != anno .id ]
112
+ # If there are TimeFrames in the same view, they most likely represent
113
+ # condensed information about representative frames (e.g. SWT). In this
114
+ # case, only render the TimeFrames and ignore the TimePoints.
115
+ if any ([anno == AnnotationTypes .TimeFrame for anno in other_annotations ]) and skip_if_view_has_frames :
116
+ return
117
+ self .frame_num = convert_timepoint (mmif , anno , "frames" )
118
+ self .secs = convert_timepoint (mmif , anno , "seconds" )
119
+ self .timestamp = str (datetime .timedelta (seconds = self .secs ))
120
+ if anno .properties .get ("label" ):
121
+ self .frametype = anno .properties .get ("label" )
122
+
123
+ def add_text_document (self , anno ):
124
+ t = anno .properties .get ("text_value" ) or anno .properties .get ("text" ).value
125
+ if t :
126
+ text_val = re .sub (r'([\\\/\|\"\'])' , r'\1 ' , t )
127
+ self .text = self .text + [text_val ] if text_val not in self .text else self .text
128
+
129
+
130
+ def get_ocr_frames (view , mmif ):
84
131
frames = {}
85
132
full_alignment_type = [
86
- at_type for at_type in view .metadata .contains if at_type . shortname == " Alignment" ]
133
+ at_type for at_type in view .metadata .contains if at_type == AnnotationTypes . Alignment ]
87
134
# If view contains alignments
88
135
if full_alignment_type :
89
136
for alignment in view .get_annotations (full_alignment_type [0 ]):
90
- source = find_annotation ( alignment .properties [ "source" ], view , mmif )
91
- target = find_annotation ( alignment .properties [ "target" ], view , mmif )
137
+ source = mmif [ alignment .get ( "source" )]
138
+ target = mmif [ alignment .get ( "target" )]
92
139
140
+ # Account for alignment in either direction
93
141
frame = OCRFrame (source , mmif )
142
+ frame .update (target , mmif )
143
+
94
144
i = frame .frame_num if frame .frame_num is not None else frame .range
145
+ if i is None :
146
+ continue
95
147
if i in frames .keys ():
96
148
frames [i ].update (source , mmif )
97
149
frames [i ].update (target , mmif )
98
150
else :
99
- frame .update (target , mmif )
100
151
frames [i ] = frame
152
+
101
153
else :
102
154
for annotation in view .get_annotations ():
103
155
frame = OCRFrame (annotation , mmif )
@@ -108,6 +160,7 @@ def get_ocr_frames(view, mmif, fps):
108
160
frames [i ].update (annotation , mmif )
109
161
else :
110
162
frames [i ] = frame
163
+ print (frames )
111
164
return frames
112
165
113
166
@@ -175,7 +228,7 @@ def make_image_directory(mmif_id):
175
228
return path
176
229
177
230
178
- def find_duplicates (frames_list , cv2_vid ):
231
+ def find_duplicates (frames_list ):
179
232
"""Find duplicate frames"""
180
233
prev_frame = None
181
234
for frame_num , frame in frames_list :
@@ -239,18 +292,23 @@ def round_boxes(boxes):
239
292
def get_ocr_views (mmif ):
240
293
"""Returns all CV views, which contain timeframes or bounding boxes"""
241
294
views = []
242
- required_types = ["TimeFrame" , "BoundingBox" ]
295
+ required_types = ["TimeFrame" , "BoundingBox" , "TimePoint" ]
243
296
for view in mmif .views :
244
297
for anno_type , anno in view .metadata .contains .items ():
245
298
# Annotation belongs to a CV view if it is a TimeFrame/BB and it refers to a VideoDocument
246
- if anno_type .shortname in required_types and mmif .get_document_by_id (
247
- anno ["document" ]).at_type .shortname == "VideoDocument" :
299
+ # if anno.get("document") is None:
300
+ # continue
301
+ # if anno_type.shortname in required_types and mmif.get_document_by_id(
302
+ # anno["document"]).at_type.shortname == "VideoDocument":
303
+ # views.append(view)
304
+ # continue
305
+ if anno_type .shortname in required_types :
248
306
views .append (view )
249
- continue
307
+ break
250
308
# TODO: Couldn't find a simple way to show if an alignment view is a CV/Frames-type view
251
309
elif "parseq" in view .metadata .app :
252
310
views .append (view )
253
- continue
311
+ break
254
312
return views
255
313
256
314
0 commit comments