5
5
import argparse
6
6
import logging
7
7
from concurrent .futures import ThreadPoolExecutor
8
+ from math import floor , ceil
8
9
9
10
import numpy as np
10
11
import torch
19
20
# Non-NLP Clams applications will require AnnotationTypes
20
21
21
22
23
+ def rel_coords_to_abs (coords , width , height ):
24
+ """
25
+ Simple conversion from relative coordinates (percentage) to absolute coordinates (pixel).
26
+ Assumes the passed shape is a rectangle, represented by top-left and bottom-right corners,
27
+ and compute floor and ceiling based on the geometry.
28
+ """
29
+ x1 , y1 = coords [0 ]
30
+ x2 , y2 = coords [1 ]
31
+ return [(floor (x1 * height ), floor (y1 * width )), (ceil (x2 * height ), ceil (y2 * width ))]
32
+
33
+
22
34
def create_bbox (view : View , coordinates , box_type , time_point ):
23
35
bbox = view .new_annotation (AnnotationTypes .BoundingBox )
24
36
bbox .add_property ("coordinates" , coordinates )
@@ -82,43 +94,45 @@ def process_timepoint(self, representative: Annotation, new_view: View, video_do
82
94
video_doc .get ("fps" ))
83
95
image : np .ndarray = vdh .extract_frames_as_images (video_doc , [rep_frame_index ], as_PIL = False )[0 ]
84
96
result = self .reader ([image ])
97
+ # assume only one page, as we are passing one image at a time
85
98
blocks = result .pages [0 ].blocks
86
99
text_document : Document = new_view .new_textdocument (result .render ())
87
100
101
+ h , w = image .shape [:2 ]
88
102
for block in blocks :
89
103
try :
90
- self .process_block (block , new_view , text_document , representative )
104
+ self .process_block (block , new_view , text_document , representative , w , h )
91
105
except Exception as e :
92
106
self .logger .error (f"Error processing block: { e } " )
93
107
continue
94
108
95
109
return text_document , representative
96
110
97
- def process_block (self , block , view , text_document , representative ):
111
+ def process_block (self , block , view , text_document , representative , img_width , img_height ):
98
112
paragraph = self .LingUnit (view .new_annotation (at_type = Uri .PARAGRAPH ), text_document )
99
- paragraph_bb = create_bbox (view , block .geometry , "text" , representative .id )
113
+ paragraph_bb = create_bbox (view , rel_coords_to_abs ( block .geometry , img_width , img_height ) , "text" , representative .id )
100
114
create_alignment (view , paragraph .region .id , paragraph_bb .id )
101
115
102
116
for line in block .lines :
103
117
try :
104
- sentence = self .process_line (line , view , text_document , representative )
118
+ sentence = self .process_line (line , view , text_document , representative , img_width , img_height )
105
119
except Exception as e :
106
120
self .logger .error (f"Error processing line: { e } " )
107
121
continue
108
122
paragraph .add_child (sentence )
109
123
paragraph .collect_targets ()
110
124
111
- def process_line (self , line , view , text_document , representative ):
125
+ def process_line (self , line , view , text_document , representative , img_width , img_height ):
112
126
sentence = self .LingUnit (view .new_annotation (at_type = Uri .SENTENCE ), text_document )
113
- sentence_bb = create_bbox (view , line .geometry , "text" , representative .id )
127
+ sentence_bb = create_bbox (view , rel_coords_to_abs ( line .geometry , img_width , img_height ) , "text" , representative .id )
114
128
create_alignment (view , sentence .region .id , sentence_bb .id )
115
129
116
130
for word in line .words :
117
131
if word .confidence > 0.4 :
118
132
start = text_document .text_value .find (word .value )
119
133
end = start + len (word .value )
120
134
token = self .Token (view .new_annotation (at_type = Uri .TOKEN ), text_document , start , end )
121
- token_bb = create_bbox (view , word .geometry , "text" , representative .id )
135
+ token_bb = create_bbox (view , rel_coords_to_abs ( word .geometry , img_width , img_height ) , "text" , representative .id )
122
136
create_alignment (view , token .region .id , token_bb .id )
123
137
sentence .add_child (token )
124
138
@@ -144,6 +158,9 @@ def _annotate(self, mmif: Mmif, **parameters) -> Mmif:
144
158
rep_id = f'{ input_view .id } { Mmif .id_delimiter } { rep_id } '
145
159
representative = mmif [rep_id ]
146
160
futures .append (executor .submit (self .process_timepoint , representative , new_view , video_doc ))
161
+ if len (futures ) == 0 :
162
+ # TODO (krim @ 4/18/24): if "representatives" is not present, process just the middle frame
163
+ pass
147
164
148
165
for future in futures :
149
166
try :
0 commit comments