Skip to content

Commit 0f894a1

Browse files
committed
Handled time unit conversion, indexed found annotations by long_id and marked skipped test for now
1 parent f1cb2e6 commit 0f894a1

File tree

3 files changed

+18
-15
lines changed

3 files changed

+18
-15
lines changed

mmif/serialize/mmif.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import warnings
1111
from collections import defaultdict
1212
from datetime import datetime
13-
from typing import List, Union, Optional, Dict, cast, Iterator
13+
from typing import List, Union, Optional, Dict, cast, Iterator, Tuple
1414

1515
import jsonschema.validators
1616

@@ -516,6 +516,13 @@ def _is_in_time_between(self, start: Union[int, float], end: Union[int, float],
516516
s, e = self.get_start(annotation), self.get_end(annotation)
517517
return (s < start < e) or (s > start and e < end) or (s < end < e)
518518

519+
def _handle_time_unit(self, input_unit: str, ann_unit: str,
520+
start: int, end: int) -> Tuple[Union[int, float, str], Union[int, float, str]]:
521+
from mmif.utils.timeunit_helper import convert
522+
start = convert(start, input_unit, ann_unit, 1)
523+
end = convert(end, input_unit, ann_unit, 1)
524+
return start, end
525+
519526
def get_annotations_between_time(self, start: int, end: int, time_unit: str = "milliseconds") -> Iterator[Annotation]:
520527
"""
521528
Version: 1.0
@@ -540,31 +547,26 @@ def get_annotations_between_time(self, start: int, end: int, time_unit: str = "m
540547
# 2. For each view, extract annotations that satisfy conditions that are TF/TP and fall into time interval
541548
for view in views:
542549
# Make sure time unit stay at the same level
543-
unit_of_time = view.metadata.contains.get(AnnotationTypes.TimeFrame)["timeUnit"]
544-
if time_unit != unit_of_time:
545-
if time_unit == "seconds":
546-
start *= 1000
547-
end *= 1000
548-
else:
549-
start /= 1000
550-
end /= 1000
550+
start_time, end_time = self._handle_time_unit(time_unit, view.metadata.contains.get(AnnotationTypes.TimeFrame)["timeUnit"],
551+
start, end)
551552
tf_anns = view.get_annotations(at_type=AnnotationTypes.TimeFrame)
552553
al_anns = view.get_annotations(at_type=AnnotationTypes.Alignment)
553554

554555
# Select 'TimeFrame' annotations within given time interval
555556
for tf in tf_anns:
556-
if self._is_in_time_between(start, end, tf):
557+
if self._is_in_time_between(start_time, end_time, tf):
557558
valid_tf_anns.append(tf)
558559

559560
# Map 'TimeFrame' annotation to its aligned annotation
560561
for align in al_anns:
561562
source_id, target_id = align.get_property('source'), align.get_property('target')
563+
to_long_id = lambda x: x if self.id_delimiter in x else f'{view.id}{self.id_delimiter}{x}'
562564
try:
563565
source, target = view.get_annotation_by_id(source_id), view.get_annotation_by_id(target_id)
564566
if source in valid_tf_anns:
565-
tf_to_anns[source_id].append(target)
567+
tf_to_anns[to_long_id(source_id)].append(target)
566568
elif target in valid_tf_anns:
567-
tf_to_anns[target_id].append(source)
569+
tf_to_anns[to_long_id(target_id)].append(source)
568570
except KeyError:
569571
pass
570572

@@ -573,7 +575,7 @@ def get_annotations_between_time(self, start: int, end: int, time_unit: str = "m
573575

574576
# 4. Yield all annotations aligned with sorted 'TimeFrame' annotations
575577
for tf_ann in sort_tf_anns:
576-
anns = tf_to_anns[tf_ann.get_property("id")]
578+
anns = tf_to_anns[tf_ann.long_id]
577579
for ann in anns:
578580
yield ann
579581

mmif/utils/textdoc_helper.py renamed to mmif/utils/text_document_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ def slice_text(mmif_obj, start: int, end: int, unit: str = "milliseconds") -> st
88
tokens_sliced = []
99
for ann in anns_found:
1010
if ann.is_type(token_type):
11-
tokens_sliced.append(ann.get_property('text')) # FIXME: Sometimes the string attribute "word" is used for getting property value
11+
tokens_sliced.append(ann.get_property('word'))
1212
return ' '.join(tokens_sliced)

tests/test_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from mmif.utils import sequence_helper as sqh
77
from mmif.utils import timeunit_helper as tuh
88
from mmif.utils import video_document_helper as vdh
9-
from mmif.utils import textdoc_helper as tdh
9+
from mmif.utils import text_document_helper as tdh
1010
from mmif.serialize import mmif
1111
from tests.mmif_examples import *
1212

@@ -161,6 +161,7 @@ def test_width_based_smoothing(self):
161161
class TestTextDocHelper(unittest.TestCase):
162162
mmif_obj = Mmif(MMIF_EXAMPLES['everything'])
163163

164+
@pytest.mark.skip("The only valid test cases come from kalbi app which annotates wrong property")
164165
def test_slice_text(self):
165166
sliced_text_full_overlap = tdh.slice_text(self.mmif_obj, 11500, 14600)
166167
sliced_text_partial_overlap = tdh.slice_text(self.mmif_obj, 7, 10, unit="seconds")

0 commit comments

Comments
 (0)