Skip to content

Commit ec053fa

Browse files
committed
added built-in at_type filter for time window-based annotation getter
1 parent 43bb3f1 commit ec053fa

File tree

2 files changed

+13
-7
lines changed

2 files changed

+13
-7
lines changed

mmif/serialize/mmif.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -656,14 +656,15 @@ def _is_in_time_range(self, ann: Annotation, range_s: Union[int, float], range_e
656656
ann_s, ann_e = self.get_start(ann), self.get_end(ann)
657657
return (ann_s < range_s < ann_e) or (ann_s < range_e < ann_e) or (ann_s > range_s and ann_e < range_e)
658658

659-
def get_annotations_between_time(self, start: Union[int, float], end: Union[int, float],
660-
time_unit: str = "ms") -> Iterator[Annotation]:
659+
def get_annotations_between_time(self, start: Union[int, float], end: Union[int, float], time_unit: str = "ms",
660+
at_types: List[Union[ThingTypesBase, str]] = []) -> Iterator[Annotation]:
661661
"""
662662
Finds annotations that are anchored between the given time points.
663663
664664
:param start: the start time point in the unit of `input_unit`
665665
:param end: the end time point in the unit of `input_unit`
666666
:param time_unit: the unit of the input time points. Default is `ms`.
667+
:param at_types: a list of annotation types to filter with. Any type in this list will be included in the return.
667668
:return: an iterator of Annotation objects that are anchored between the given time points
668669
"""
669670
assert start < end, f"Start time point must be smaller than the end time point, given {start} and {end}"
@@ -673,6 +674,7 @@ def get_annotations_between_time(self, start: Union[int, float], end: Union[int,
673674
from mmif.utils.timeunit_helper import convert
674675

675676
time_anchors_in_range = []
677+
at_types = set(at_types)
676678

677679
for view in self.get_all_views_contain(AnnotationTypes.TimeFrame) + self.get_all_views_contain(AnnotationTypes.TimePoint):
678680
time_unit_in_view = view.metadata.contains.get(AnnotationTypes.TimeFrame)["timeUnit"]
@@ -684,9 +686,11 @@ def get_annotations_between_time(self, start: Union[int, float], end: Union[int,
684686
time_anchors_in_range.append(ann)
685687
time_anchors_in_range.sort(key=lambda x: self.get_start(x))
686688
for time_anchor in time_anchors_in_range:
687-
yield time_anchor
689+
if not at_types or time_anchor.at_type in at_types:
690+
yield time_anchor
688691
for aligned in time_anchor.get_all_aligned():
689-
yield aligned
692+
if not at_types or aligned.at_type in at_types:
693+
yield aligned
690694

691695
def _get_linear_anchor_point(self, ann: Annotation, targets_sorted=False, start: bool = True) -> Union[int, float]:
692696
# TODO (krim @ 2/5/24): Update the return type once timeunits are unified to `ms` as integers (https://github.com/clamsproject/mmif/issues/192)

tests/test_serialize.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ def test_get_annotations_between_time(self):
361361
# .[] |
362362
# select(."@type"=="http://vocab.lappsgrid.org/Token")] |
363363
# sort_by(.properties.id | ltrimstr("t") | tonumber) |
364-
# map(.properties.text)' <examples>.json
364+
# map(.properties.word)' <examples>.json
365365
tokens_in_order = ["Hello",
366366
",",
367367
"this",
@@ -405,11 +405,13 @@ def test_get_annotations_between_time(self):
405405
self.assertFalse(ann.is_type(token_type))
406406

407407
# Test case 3(a): Partial tokens are selected (involve partial overlap)
408-
selected_token_anns = mmif_obj.get_annotations_between_time(7, 10, time_unit="seconds")
408+
selected_token_anns = mmif_obj.get_annotations_between_time(7, 10, time_unit="seconds",
409+
at_types=['http://vocab.lappsgrid.org/Token'])
409410
self.assertEqual(tokens_in_order[3:9], [ann.get_property("word") for ann in selected_token_anns])
410411

411412
# Test case 3(b): Partial tokens are selected (only full overlap)
412-
selected_token_anns = mmif_obj.get_annotations_between_time(11500, 14600)
413+
selected_token_anns = mmif_obj.get_annotations_between_time(11500, 14600,
414+
at_types=['http://vocab.lappsgrid.org/Token'])
413415
self.assertEqual(tokens_in_order[12:17], [ann.get_property("word") for ann in selected_token_anns])
414416

415417
def test_add_document(self):

0 commit comments

Comments
 (0)