Skip to content

Commit 30f67ff

Browse files
authored
Merge pull request #283 from clamsproject/develop
releasing 1.0.15
2 parents b24722a + c42234f commit 30f67ff

File tree

4 files changed

+200
-114
lines changed

4 files changed

+200
-114
lines changed

Diff for: mmif/serialize/mmif.py

+120-112
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,104 @@
2424
__all__ = ['Mmif']
2525

2626

27+
class MmifMetadata(MmifObject):
28+
"""
29+
Basic MmifObject class to contain the top-level metadata of a MMIF file.
30+
31+
:param metadata_obj: the JSON data
32+
"""
33+
34+
def __init__(self, metadata_obj: Optional[Union[bytes, str, dict]] = None) -> None:
35+
# TODO (krim @ 10/7/20): there could be a better name and a better way to give a value to this
36+
self.mmif: str = f"http://mmif.clams.ai/{mmif.__specver__}"
37+
self._required_attributes = ["mmif"]
38+
super().__init__(metadata_obj)
39+
40+
41+
class DocumentsList(DataList[Document]):
42+
"""
43+
DocumentsList object that implements :class:`mmif.serialize.model.DataList`
44+
for :class:`mmif.serialize.document.Document`.
45+
"""
46+
_items: Dict[str, Document]
47+
48+
def _deserialize(self, input_list: list) -> None: # pytype: disable=signature-mismatch
49+
"""
50+
Extends base ``_deserialize`` method to initialize ``items`` as a dict from
51+
document IDs to :class:`mmif.serialize.document.Document` objects.
52+
53+
:param input_list: the JSON data that defines the list of documents
54+
:return: None
55+
"""
56+
self._items = {item['properties']['id']: Document(item) for item in input_list}
57+
58+
def append(self, value: Document, overwrite=False) -> None:
59+
"""
60+
Appends a document to the list.
61+
62+
Fails if there is already a document with the same ID
63+
in the list, unless ``overwrite`` is set to True.
64+
65+
:param value: the :class:`mmif.serialize.document.Document`
66+
object to add
67+
:param overwrite: if set to True, will overwrite an
68+
existing document with the same ID
69+
:raises KeyError: if ``overwrite`` is set to False and
70+
a document with the same ID exists
71+
in the list
72+
:return: None
73+
"""
74+
super()._append_with_key(value.id, value, overwrite)
75+
76+
77+
class ViewsList(DataList[View]):
78+
"""
79+
ViewsList object that implements :class:`mmif.serialize.model.DataList`
80+
for :class:`mmif.serialize.view.View`.
81+
"""
82+
_items: Dict[str, View]
83+
84+
def __init__(self, mmif_obj: Optional[Union[bytes, str, list]] = None):
85+
super().__init__(mmif_obj)
86+
87+
def _deserialize(self, input_list: list) -> None: # pytype: disable=signature-mismatch
88+
"""
89+
Extends base ``_deserialize`` method to initialize ``items`` as a dict from
90+
view IDs to :class:`mmif.serialize.view.View` objects.
91+
92+
:param input_list: the JSON data that defines the list of views
93+
:return: None
94+
"""
95+
if input_list:
96+
self._items = {item['id']: View(item) for item in input_list}
97+
98+
def append(self, value: View, overwrite=False) -> None:
99+
"""
100+
Appends a view to the list.
101+
102+
Fails if there is already a view with the same ID
103+
in the list, unless ``overwrite`` is set to True.
104+
105+
:param value: the :class:`mmif.serialize.view.View`
106+
object to add
107+
:param overwrite: if set to True, will overwrite an
108+
existing view with the same ID
109+
:raises KeyError: if ``overwrite`` is set to False and
110+
a view with the same ID exists
111+
in the list
112+
:return: None
113+
"""
114+
super()._append_with_key(value.id, value, overwrite)
115+
116+
def get_last(self) -> Optional[View]:
117+
"""
118+
Returns the last view appended to the list.
119+
"""
120+
for view in reversed(self._items.values()):
121+
if 'error' not in view.metadata and 'warning' not in view.metadata:
122+
return view
123+
124+
27125
class Mmif(MmifObject):
28126
"""
29127
MmifObject that represents a full MMIF file.
@@ -560,131 +658,41 @@ def get_end(self, annotation: Annotation) -> Union[int, float]:
560658
"""
561659
return self._get_linear_anchor_point(annotation, start=False)
562660

563-
# pytype: disable=bad-return-type
564-
def __getitem__(self, item: str) -> Union[Document, View, Annotation]:
661+
def __getitem__(self, item: str) \
662+
-> Union[Document, View, Annotation, MmifMetadata, DocumentsList, ViewsList]:
565663
"""
566-
getitem implementation for Mmif. When nothing is found, this will raise an error
567-
rather than returning a None (although pytype doesn't think so...)
664+
getitem implementation for Mmif. This will try to find any object, given an identifier or an immediate
665+
attribute name. When nothing is found, this will raise an error rather than returning a None
568666
569667
:raises KeyError: if the item is not found or if the search results are ambiguous
570-
:param item: the search string, a document ID, a view ID, or a view-scoped annotation ID
668+
:param item: an attribute name or an object identifier (a document ID, a view ID, or an annotation ID). When
669+
annotation ID is given as a "short" ID (without view ID prefix), the method will try to find a
670+
match from the first view, and return immediately if found.
571671
:return: the object searched for
672+
:raise KeyError: if the item is not found or multiple objects are found with the same ID
572673
"""
573674
if item in self._named_attributes():
574675
return self.__dict__[item]
575676
split_attempt = item.split(self.id_delimiter)
576677

577-
document_result = self.documents.get(split_attempt[0])
578-
view_result = self.views.get(split_attempt[0])
678+
found = []
579679

580680
if len(split_attempt) == 1:
581-
anno_result = None
582-
elif view_result:
583-
anno_result = view_result[split_attempt[1]]
681+
found.append(self.documents.get(split_attempt[0]))
682+
found.append(self.views.get(split_attempt[0]))
683+
for view in self.views:
684+
found.append(view.annotations.get(split_attempt[0]))
685+
elif len(split_attempt) == 2:
686+
v = self.get_view_by_id(split_attempt[0])
687+
if v is not None:
688+
found.append(v.annotations.get(split_attempt[1]))
584689
else:
585690
raise KeyError("Tried to subscript into a view that doesn't exist")
691+
found = [x for x in found if x is not None]
586692

587-
if view_result and document_result:
693+
if len(found) > 1:
588694
raise KeyError("Ambiguous ID search result")
589-
if not (view_result or document_result):
695+
elif len(found) == 0:
590696
raise KeyError("ID not found: %s" % item)
591-
return anno_result or view_result or document_result
592-
# pytype: enable=bad-return-type
593-
594-
595-
class MmifMetadata(MmifObject):
596-
"""
597-
Basic MmifObject class to contain the top-level metadata of a MMIF file.
598-
599-
:param metadata_obj: the JSON data
600-
"""
601-
602-
def __init__(self, metadata_obj: Optional[Union[bytes, str, dict]] = None) -> None:
603-
# TODO (krim @ 10/7/20): there could be a better name and a better way to give a value to this
604-
self.mmif: str = f"http://mmif.clams.ai/{mmif.__specver__}"
605-
self._required_attributes = ["mmif"]
606-
super().__init__(metadata_obj)
607-
608-
609-
class DocumentsList(DataList[Document]):
610-
"""
611-
DocumentsList object that implements :class:`mmif.serialize.model.DataList`
612-
for :class:`mmif.serialize.document.Document`.
613-
"""
614-
_items: Dict[str, Document]
615-
616-
def _deserialize(self, input_list: list) -> None: # pytype: disable=signature-mismatch
617-
"""
618-
Extends base ``_deserialize`` method to initialize ``items`` as a dict from
619-
document IDs to :class:`mmif.serialize.document.Document` objects.
620-
621-
:param input_list: the JSON data that defines the list of documents
622-
:return: None
623-
"""
624-
self._items = {item['properties']['id']: Document(item) for item in input_list}
625-
626-
def append(self, value: Document, overwrite=False) -> None:
627-
"""
628-
Appends a document to the list.
629-
630-
Fails if there is already a document with the same ID
631-
in the list, unless ``overwrite`` is set to True.
632-
633-
:param value: the :class:`mmif.serialize.document.Document`
634-
object to add
635-
:param overwrite: if set to True, will overwrite an
636-
existing document with the same ID
637-
:raises KeyError: if ``overwrite`` is set to False and
638-
a document with the same ID exists
639-
in the list
640-
:return: None
641-
"""
642-
super()._append_with_key(value.id, value, overwrite)
643-
644-
645-
class ViewsList(DataList[View]):
646-
"""
647-
ViewsList object that implements :class:`mmif.serialize.model.DataList`
648-
for :class:`mmif.serialize.view.View`.
649-
"""
650-
_items: Dict[str, View]
651-
652-
def __init__(self, mmif_obj: Optional[Union[bytes, str, list]] = None):
653-
super().__init__(mmif_obj)
654-
655-
def _deserialize(self, input_list: list) -> None: # pytype: disable=signature-mismatch
656-
"""
657-
Extends base ``_deserialize`` method to initialize ``items`` as a dict from
658-
view IDs to :class:`mmif.serialize.view.View` objects.
659-
660-
:param input_list: the JSON data that defines the list of views
661-
:return: None
662-
"""
663-
if input_list:
664-
self._items = {item['id']: View(item) for item in input_list}
665-
666-
def append(self, value: View, overwrite=False) -> None:
667-
"""
668-
Appends a view to the list.
669-
670-
Fails if there is already a view with the same ID
671-
in the list, unless ``overwrite`` is set to True.
672-
673-
:param value: the :class:`mmif.serialize.view.View`
674-
object to add
675-
:param overwrite: if set to True, will overwrite an
676-
existing view with the same ID
677-
:raises KeyError: if ``overwrite`` is set to False and
678-
a view with the same ID exists
679-
in the list
680-
:return: None
681-
"""
682-
super()._append_with_key(value.id, value, overwrite)
683-
684-
def get_last(self) -> Optional[View]:
685-
"""
686-
Returns the last view appended to the list.
687-
"""
688-
for view in reversed(self._items.values()):
689-
if 'error' not in view.metadata and 'warning' not in view.metadata:
690-
return view
697+
else:
698+
return found[-1]

Diff for: mmif/utils/video_document_helper.py

+40-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import mmif
77
from mmif import Annotation, Document, Mmif
88
from mmif.utils.timeunit_helper import convert
9-
from mmif.vocabulary import DocumentTypes
9+
from mmif.vocabulary import DocumentTypes, AnnotationTypes
1010

1111
for cv_dep in ('cv2', 'ffmpeg', 'PIL'):
1212
try:
@@ -83,14 +83,16 @@ def extract_frames_as_images(video_document: Document, framenums: List[int], as_
8383
frames = []
8484
video = capture(video_document)
8585
cur_f = 0
86+
tot_fcount = video_document.get_property(FRAMECOUNT_DOCPROP_KEY)
8687
while True:
87-
if not framenums or cur_f > video_document.get_property(FRAMECOUNT_DOCPROP_KEY):
88+
if not framenums or cur_f > tot_fcount:
8889
break
8990
ret, frame = video.read()
9091
if cur_f == framenums[0]:
9192
if not ret:
9293
sec = convert(cur_f, 'f', 's', video_document.get_property(FPS_DOCPROP_KEY))
9394
warnings.warn(f'Frame #{cur_f} ({sec}s) could not be read from the video {video_document.id}.')
95+
cur_f += 1
9496
continue
9597
frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame)
9698
framenums.pop(0)
@@ -125,6 +127,42 @@ def extract_mid_frame(mmif: Mmif, time_frame: Annotation, as_PIL: bool = False):
125127
return extract_frames_as_images(vd, [get_mid_framenum(mmif, time_frame)], as_PIL=as_PIL)[0]
126128

127129

130+
def get_representative_framenum(mmif: Mmif, time_frame: Annotation):
131+
"""
132+
Calculates the representative frame number from an annotation.
133+
134+
:param mmif: :py:class:`~mmif.serialize.mmif.Mmif` instance
135+
:param time_frame: :py:class:`~mmif.serialize.annotation.Annotation` instance that holds a time interval annotation containing a `representatives` property (``"@type": ".../TimeFrame/..."``)
136+
:return: representative frame number as an integer
137+
"""
138+
if 'representatives' not in time_frame.properties:
139+
raise ValueError(f'The time frame {time_frame.id} does not have a representative.')
140+
timeunit = time_frame.get_property('timeUnit')
141+
video_document = mmif[time_frame.get_property('document')]
142+
fps = get_framerate(video_document)
143+
representatives = time_frame.get_property('representatives')
144+
top_representative_id = representatives[0]
145+
try:
146+
representative_timepoint_anno = mmif[time_frame._parent_view_id+time_frame.id_delimiter+top_representative_id]
147+
except KeyError:
148+
raise ValueError(f'Representative timepoint {top_representative_id} not found in any view.')
149+
return convert(representative_timepoint_anno.get_property('timePoint'), timeunit, 'frame', fps)
150+
151+
152+
def extract_representative_frame(mmif: Mmif, time_frame: Annotation, as_PIL: bool = False):
153+
"""
154+
Extracts the representative frame of an annotation as a numpy ndarray or PIL Image.
155+
156+
:param mmif: :py:class:`~mmif.serialize.mmif.Mmif` instance
157+
:param time_frame: :py:class:`~mmif.serialize.annotation.Annotation` instance that holds a time interval annotation (``"@type": ".../TimeFrame/..."``)
158+
:param as_PIL: return :py:class:`~PIL.Image.Image` instead of :py:class:`~numpy.ndarray`
159+
:return: frame as a :py:class:`numpy.ndarray` or :py:class:`PIL.Image.Image`
160+
"""
161+
video_document = mmif[time_frame.get_property('document')]
162+
rep_frame_num = get_representative_framenum(mmif, time_frame)
163+
return extract_frames_as_images(video_document, [rep_frame_num], as_PIL=as_PIL)[0]
164+
165+
128166
def sample_frames(start_frame: int, end_frame: int, sample_rate: float = 1) -> List[int]:
129167
"""
130168
Helper function to sample frames from a time interval.

Diff for: tests/test_serialize.py

+23
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,29 @@ def test_mmif_getitem_document(self):
516516
except KeyError:
517517
self.fail("didn't get document 'm1'")
518518

519+
def test_mmif_getitem_idconflict(self):
520+
m = Mmif(validate=False)
521+
v1 = m.new_view()
522+
v1.id = 'v1'
523+
v2 = m.new_view()
524+
v2.id = 'v1'
525+
with pytest.raises(KeyError):
526+
_ = m['v1']
527+
528+
m = Mmif(validate=False)
529+
v1 = m.new_view()
530+
v1a = v1.new_annotation(AnnotationTypes.Annotation, id='a1')
531+
v2 = m.new_view()
532+
v2a = v2.new_annotation(AnnotationTypes.Annotation, id='a1')
533+
self.assertIsNotNone(m[v1.id])
534+
self.assertIsNotNone(m[v2.id])
535+
# conflict short IDs
536+
self.assertEqual(v1a.id, v2a.id)
537+
with pytest.raises(KeyError):
538+
_ = m[v1a.id]
539+
self.assertIsNotNone(m[v1a.long_id])
540+
self.assertIsNotNone(m[v2a.long_id])
541+
519542
def test_mmif_getitem_view(self):
520543
try:
521544
v1 = self.mmif_obj['v1']

Diff for: tests/test_utils.py

+17
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,23 @@ def test_extract_mid_frame(self):
4444
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=0, end=3, timeUnit='seconds', document='d1')
4545
self.assertEqual(vdh.convert(1.5, 's', 'f', self.fps), vdh.get_mid_framenum(self.mmif_obj, tf))
4646

47+
def test_extract_representative_frame(self):
48+
tp = self.a_view.new_annotation(AnnotationTypes.TimePoint, timePoint=1500, timeUnit='milliseconds', document='d1')
49+
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=1000, end=2000, timeUnit='milliseconds', document='d1')
50+
tf.add_property('representatives', [tp.id])
51+
rep_frame_num = vdh.get_representative_framenum(self.mmif_obj, tf)
52+
expected_frame_num = vdh.millisecond_to_framenum(self.video_doc, tp.get_property('timePoint'))
53+
self.assertEqual(expected_frame_num, rep_frame_num)
54+
# check there is an error if no representatives
55+
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=1000, end=2000, timeUnit='milliseconds', document='d1')
56+
with pytest.raises(ValueError):
57+
vdh.get_representative_framenum(self.mmif_obj, tf)
58+
# check there is an error if there is a representative referencing a timepoint that
59+
# does not exist
60+
tf.add_property('representatives', ['fake_tp_id'])
61+
with pytest.raises(ValueError):
62+
vdh.get_representative_framenum(self.mmif_obj, tf)
63+
4764
def test_get_framerate(self):
4865
self.assertAlmostEqual(29.97, vdh.get_framerate(self.video_doc), places=0)
4966

0 commit comments

Comments
 (0)