Skip to content

Commit ab40d2b

Browse files
committed
Merge branch 'develop' into 281-improved-rep-frame-extr
2 parents d085cfb + af83e13 commit ab40d2b

File tree

13 files changed

+180
-47
lines changed

13 files changed

+180
-47
lines changed

documentation/autodoc/mmif.serialize.rst

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,33 @@
1-
mmif.serialize module
2-
=====================
1+
mmif.serialize package
2+
======================
33

4-
mmif.serialize.model module
4+
Core package to provide serialization and deserialization of MMIF format.
5+
6+
``model`` module
57
---------------------------
68

79
.. automodule:: mmif.serialize.model
810
:members:
911
:undoc-members:
1012
:show-inheritance:
1113

12-
mmif.serialize.mmif module
14+
``mmif`` module
1315
--------------------------
1416

1517
.. automodule:: mmif.serialize.mmif
1618
:members:
1719
:undoc-members:
1820
:show-inheritance:
1921

20-
mmif.serialize.view module
22+
``view`` module
2123
--------------------------
2224

2325
.. automodule:: mmif.serialize.view
2426
:members:
2527
:undoc-members:
2628
:show-inheritance:
2729

28-
mmif.serialize.annotation module
30+
``annotation`` module
2931
--------------------------------
3032

3133
.. automodule:: mmif.serialize.annotation

documentation/autodoc/mmif.utils.rst

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,39 @@
1-
mmif.utils module
2-
=================
1+
mmif.utils package
2+
==================
33

4-
mmif.utils.video_document_helper module
4+
Package containing utility modules for handling different types of source
5+
documents, and general implementation of common data structures and
6+
algorithms.
7+
8+
``video_document_helper`` module
59
----------------------------------------
610

711
.. automodule:: mmif.utils.video_document_helper
812
:members:
913
:undoc-members:
1014
:show-inheritance:
1115

12-
mmif.utils.sequence_helper module
16+
``text_document_helper`` module
17+
---------------------------------
18+
19+
.. automodule:: mmif.utils.sequence_helper
20+
:members:
21+
:undoc-members:
22+
:show-inheritance:
23+
24+
``sequence_helper`` module
1325
---------------------------------
1426

1527
.. automodule:: mmif.utils.sequence_helper
1628
:members:
1729
:undoc-members:
1830
:show-inheritance:
31+
32+
``sequence_helper`` module
33+
---------------------------------
34+
35+
.. automodule:: mmif.utils.sequence_helper
36+
:members:
37+
:undoc-members:
38+
:show-inheritance:
39+
Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,28 @@
11
mmif.vocabulary module
22
======================
33

4-
mmif.vocabulary.annotation\_types module
5-
----------------------------------------
4+
Module contains Enum-like classes for CLAMS vocabulary.
65

7-
.. automodule:: mmif.vocabulary.annotation_types
6+
.. autoclass:: mmif.vocabulary.ThingTypesBase
7+
:show-inheritance:
8+
.. autoclass:: mmif.vocabulary.ThingType
89
:members:
910
:undoc-members:
1011
:show-inheritance:
1112

12-
mmif.vocabulary.document\_types module
13-
--------------------------------------
13+
.. autoclass:: mmif.vocabulary.ClamsTypesBase
14+
:show-inheritance:
15+
.. autoclass:: mmif.vocabulary.AnnotationTypesBase
16+
:show-inheritance:
17+
.. autoclass:: mmif.vocabulary.DocumentTypesBase
18+
:show-inheritance:
19+
20+
.. autoclass:: mmif.vocabulary.AnnotationTypes
21+
:members:
22+
:undoc-members:
23+
:show-inheritance:
1424

15-
.. automodule:: mmif.vocabulary.document_types
25+
.. autoclass:: mmif.vocabulary.DocumentTypes
1626
:members:
1727
:undoc-members:
1828
:show-inheritance:

documentation/plugins.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ To add a document location handler plugin, you need to implement a Python `"pack
2828

2929
#. the package must be named ``mmif_docloc_<SCHEME>``. For example, to implement a handler for ``s3`` scheme, the package name must be ``mmif_docloc_s3``. The prefix is important as it's used in the plugin discovery process from the core ``mmif-python`` modules.
3030
#. the top module of the package must have a function named ``resolve``. The function must take a single argument, which is a :class:`str` of the document location URI. The function must return a :class:`str` of the local file path. For example, if the document location is ``s3://mybucket/myfile.mp4``, a Python user should be able to to something like this;
31+
#. Optionally (but highly recommended), the top module also can provide another function named ``help``. The function must take no arguments and return a :class:`str` that explains how the input string to the ``resolve`` function should be formatted.
3132

3233
.. code-block:: python
3334
@@ -76,6 +77,11 @@ And the plugin code.
7677
else:
7778
raise ValueError(f'cannot handle document location scheme: {docloc}')
7879
80+
def help():
81+
return "location format: `<DOCUMENT_ID>.video`"
82+
83+
84+
7985
Bulit-in Document Location Scheme Plugins
8086
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
8187

mmif/serialize/annotation.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,10 @@ def add_property(self, name: str,
323323
only keep the latest value (in order of appearances in views list) of
324324
the property, effectively overwriting the previous values.
325325
"""
326+
# we don't checking if this k-v already exists in _original (new props) or _ephemeral (read from existing MMIF)
327+
# because it is impossible to keep the _original updated when a new annotation is added (via `new_annotation`)
328+
# without look across other views and top-level documents list. Also see
329+
# ``mmif.serialize.mmif.Mmif.generate_capital_annotations`` for where the "de-duplication" happens.
326330
if name == "text":
327331
self.properties.text = Text(value)
328332
elif name == "mime":

mmif/serialize/mmif.py

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,24 @@ def append(self, value: View, overwrite=False) -> None:
115115
"""
116116
super()._append_with_key(value.id, value, overwrite)
117117

118-
def get_last(self) -> Optional[View]:
118+
def get_last_contentful_view(self) -> Optional[View]:
119119
"""
120-
Returns the last view appended to the list.
120+
Returns the last view that is contentful, i.e., has no error or warning .
121121
"""
122122
for view in reversed(self._items.values()):
123-
if 'error' not in view.metadata and 'warning' not in view.metadata:
123+
if 'error' not in view.metadata and 'warnings' not in view.metadata:
124124
return view
125+
126+
def get_last_view(self) -> Optional[View]:
127+
"""
128+
Returns the last view appended.
129+
"""
130+
if self._items:
131+
return self._items[list(self._items.keys())[-1]]
132+
133+
def get_last(self) -> Optional[View]:
134+
warnings.warn('get_last() is deprecated, use get_last_contentful_view() instead.', DeprecationWarning)
135+
return self.get_last_contentful_view()
125136

126137

127138
class Mmif(MmifObject):
@@ -278,13 +289,23 @@ def generate_capital_annotations(self):
278289
See https://github.com/clamsproject/mmif-python/issues/226 for rationale
279290
behind this behavior and discussion.
280291
"""
281-
# this view will be the default kitchen sink for all generated annotations
282-
last_view = self.views.get_last()
292+
# this view will be the default kitchen sink for all generated annotations.
293+
last_view = self.views.get_last_contentful_view()
294+
283295
# proceed only when there's at least one view
284296
if last_view:
297+
298+
# this app name is used to check a view is generated by the "currently running" app.
299+
# knowing the currently running app is important so that properties of `Document` objects generated by the
300+
# current app can be properly recorded inside the `Document` objects (since they are "writable" to the
301+
# current app), instead of being recorded in a separate `Annotation` object.
302+
current_app = last_view.metadata.app
303+
285304
# to avoid duplicate property recording, this will be populated with
286305
# existing Annotation objects from all existing views
287306
existing_anns = defaultdict(lambda: defaultdict(dict))
307+
# ideally, if we can "de-duplicate" props at `add_property()` time, that'd be more efficient,
308+
# but that is impossible without looking for the target `document` across other views and top documents list
288309

289310
# new properties to record in the current serialization call
290311
anns_to_write = defaultdict(dict)
@@ -296,20 +317,30 @@ def generate_capital_annotations(self):
296317
for ann in view.get_annotations(AnnotationTypes.Annotation):
297318
if doc_id is None:
298319
doc_id = ann.get_property('document')
320+
# only if we are sure that the document ID is unique across all views... (with v_id prefix)
321+
# TODO (krim @ 7/15/24): update id checking once https://github.com/clamsproject/mmif/issues/228 is resolved
322+
if not any([doc_id == doc.id for doc in self.documents]) and Mmif.id_delimiter not in doc_id:
323+
doc_id = f"{view.id}{Mmif.id_delimiter}{doc_id}"
299324
existing_anns[doc_id].update(ann.properties)
300325
for doc in view.get_documents():
301-
anns_to_write[doc.id].update(doc._props_pending)
326+
anns_to_write[doc.long_id].update(doc._props_pending)
302327
for doc in self.documents:
303-
anns_to_write[doc.id].update(doc._props_pending)
328+
anns_to_write[doc.long_id].update(doc._props_pending)
304329
# additional iteration of views, to find a proper view to add the
305330
# generated annotations. If none found, use the last view as the kitchen sink
306331
last_view_for_docs = defaultdict(lambda: last_view)
307332
doc_ids = set(anns_to_write.keys())
308333
for doc_id in doc_ids:
334+
if len(last_view.annotations) == 0:
335+
# meaning, this new app didn't generate any annotation except for these document properties
336+
# thus, we should add capital annotations to the last (empty) view
337+
last_view_for_docs[doc_id] = last_view
338+
break
309339
for view in reversed(self.views):
310340
# first try to find out if this view "contains" any annotation to the doc
311341
# then, check for individual annotations
312-
if [cont for cont in view.metadata.contains.values() if cont.get('document', None) == doc_id] \
342+
# TODO (krim @ 7/15/24): update id checking once https://github.com/clamsproject/mmif/issues/228 is resolved
343+
if [cont for cont in view.metadata.contains.values() if doc_id.endswith(cont.get('document', 'TODO:this endswith test is a temporal solution we use until long_id is forced everywhere'))] \
313344
or list(view.get_annotations(document=doc_id)):
314345
last_view_for_docs[doc_id] = view
315346
break
@@ -323,14 +354,18 @@ def generate_capital_annotations(self):
323354
if k != 'id' and existing_anns[doc_id][k] != v:
324355
props[k] = v
325356
if props:
326-
if len(anns_to_write) == 1:
327-
# if there's only one document, we can record the doc_id in the contains metadata
328-
last_view_for_docs[doc_id].metadata.new_contain(AnnotationTypes.Annotation, document=doc_id)
329-
props.pop('document', None)
357+
view_to_write = last_view_for_docs[doc_id]
358+
if view_to_write.metadata.app == current_app and view_to_write.annotations.get(doc_id) is not None:
359+
view_to_write.get_document_by_id(doc_id).properties.update(props)
330360
else:
331-
# otherwise, doc_id needs to be recorded in the annotation property
332-
props['document'] = doc_id
333-
last_view_for_docs[doc_id].new_annotation(AnnotationTypes.Annotation, **props)
361+
if len(anns_to_write) == 1:
362+
# if there's only one document, we can record the doc_id in the contains metadata
363+
view_to_write.metadata.new_contain(AnnotationTypes.Annotation, document=doc_id)
364+
props.pop('document', None)
365+
else:
366+
# otherwise, doc_id needs to be recorded in the annotation property
367+
props['document'] = doc_id
368+
view_to_write.new_annotation(AnnotationTypes.Annotation, **props)
334369

335370
def sanitize(self):
336371
"""

mmif/serialize/view.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from typing import Dict, Union, Optional, Generator, List, cast
1111

1212
from mmif import DocumentTypes, AnnotationTypes, ThingTypesBase, ClamsTypesBase
13+
from mmif.serialize import model
1314
from mmif.serialize.annotation import Annotation, Document
1415
from mmif.serialize.model import PRMTV_TYPES, MmifObject, DataList, DataDict
1516

@@ -439,6 +440,16 @@ def append(self, value: Union[Annotation, Document], overwrite=False) -> None:
439440
:return: None
440441
"""
441442
super()._append_with_key(value.id, value, overwrite)
443+
444+
def __getitem__(self, key: str):
445+
"""
446+
specialized getter implementation to workaround https://github.com/clamsproject/mmif/issues/228
447+
# TODO (krim @ 7/12/24): annotation ids must be in the long form in the future, so this check will be unnecessary once https://github.com/clamsproject/mmif/issues/228 is resolved.
448+
"""
449+
if ":" in key:
450+
_, aid = key.split(":")
451+
return self._items.__getitem__(aid)
452+
return self._items.get(key, None)
442453

443454

444455
class ContainsDict(DataDict[ThingTypesBase, Contain]):

mmif/utils/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +0,0 @@
1-
from mmif.utils import video_document_helper

mmif/utils/video_document_helper.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,18 +84,19 @@ def extract_frames_as_images(video_document: Document, framenums: List[int], as_
8484
video = capture(video_document)
8585
cur_f = 0
8686
tot_fcount = video_document.get_property(FRAMECOUNT_DOCPROP_KEY)
87+
framenums_copy = framenums.copy()
8788
while True:
88-
if not framenums or cur_f > tot_fcount:
89+
if not framenums_copy or cur_f > tot_fcount:
8990
break
9091
ret, frame = video.read()
91-
if cur_f == framenums[0]:
92+
if cur_f == framenums_copy[0]:
9293
if not ret:
9394
sec = convert(cur_f, 'f', 's', video_document.get_property(FPS_DOCPROP_KEY))
9495
warnings.warn(f'Frame #{cur_f} ({sec}s) could not be read from the video {video_document.id}.')
9596
cur_f += 1
9697
continue
9798
frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame)
98-
framenums.pop(0)
99+
framenums_copy.pop(0)
99100
cur_f += 1
100101
return frames
101102

mmif_docloc_http/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,7 @@ def resolve(docloc):
1010
raise ValueError(f'cannot handle document location scheme: {docloc}')
1111
except urllib.error.URLError as e:
1212
raise e
13+
14+
15+
def help():
16+
return "location must be a URL string."

tests/black-2997fps.mp4

63.1 KB
Binary file not shown.

0 commit comments

Comments
 (0)