Skip to content

Commit c42234f

Browse files
authored
Merge pull request #282 from clamsproject/279-mmif-getitem-for-annID
`mmif.__getitem__()` now works with short annotation ID
2 parents 6cec53f + 90ec32f commit c42234f

File tree

2 files changed

+143
-112
lines changed

2 files changed

+143
-112
lines changed

mmif/serialize/mmif.py

+120-112
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,104 @@
2424
__all__ = ['Mmif']
2525

2626

27+
class MmifMetadata(MmifObject):
28+
"""
29+
Basic MmifObject class to contain the top-level metadata of a MMIF file.
30+
31+
:param metadata_obj: the JSON data
32+
"""
33+
34+
def __init__(self, metadata_obj: Optional[Union[bytes, str, dict]] = None) -> None:
35+
# TODO (krim @ 10/7/20): there could be a better name and a better way to give a value to this
36+
self.mmif: str = f"http://mmif.clams.ai/{mmif.__specver__}"
37+
self._required_attributes = ["mmif"]
38+
super().__init__(metadata_obj)
39+
40+
41+
class DocumentsList(DataList[Document]):
42+
"""
43+
DocumentsList object that implements :class:`mmif.serialize.model.DataList`
44+
for :class:`mmif.serialize.document.Document`.
45+
"""
46+
_items: Dict[str, Document]
47+
48+
def _deserialize(self, input_list: list) -> None: # pytype: disable=signature-mismatch
49+
"""
50+
Extends base ``_deserialize`` method to initialize ``items`` as a dict from
51+
document IDs to :class:`mmif.serialize.document.Document` objects.
52+
53+
:param input_list: the JSON data that defines the list of documents
54+
:return: None
55+
"""
56+
self._items = {item['properties']['id']: Document(item) for item in input_list}
57+
58+
def append(self, value: Document, overwrite=False) -> None:
59+
"""
60+
Appends a document to the list.
61+
62+
Fails if there is already a document with the same ID
63+
in the list, unless ``overwrite`` is set to True.
64+
65+
:param value: the :class:`mmif.serialize.document.Document`
66+
object to add
67+
:param overwrite: if set to True, will overwrite an
68+
existing document with the same ID
69+
:raises KeyError: if ``overwrite`` is set to False and
70+
a document with the same ID exists
71+
in the list
72+
:return: None
73+
"""
74+
super()._append_with_key(value.id, value, overwrite)
75+
76+
77+
class ViewsList(DataList[View]):
78+
"""
79+
ViewsList object that implements :class:`mmif.serialize.model.DataList`
80+
for :class:`mmif.serialize.view.View`.
81+
"""
82+
_items: Dict[str, View]
83+
84+
def __init__(self, mmif_obj: Optional[Union[bytes, str, list]] = None):
85+
super().__init__(mmif_obj)
86+
87+
def _deserialize(self, input_list: list) -> None: # pytype: disable=signature-mismatch
88+
"""
89+
Extends base ``_deserialize`` method to initialize ``items`` as a dict from
90+
view IDs to :class:`mmif.serialize.view.View` objects.
91+
92+
:param input_list: the JSON data that defines the list of views
93+
:return: None
94+
"""
95+
if input_list:
96+
self._items = {item['id']: View(item) for item in input_list}
97+
98+
def append(self, value: View, overwrite=False) -> None:
99+
"""
100+
Appends a view to the list.
101+
102+
Fails if there is already a view with the same ID
103+
in the list, unless ``overwrite`` is set to True.
104+
105+
:param value: the :class:`mmif.serialize.view.View`
106+
object to add
107+
:param overwrite: if set to True, will overwrite an
108+
existing view with the same ID
109+
:raises KeyError: if ``overwrite`` is set to False and
110+
a view with the same ID exists
111+
in the list
112+
:return: None
113+
"""
114+
super()._append_with_key(value.id, value, overwrite)
115+
116+
def get_last(self) -> Optional[View]:
117+
"""
118+
Returns the last view appended to the list.
119+
"""
120+
for view in reversed(self._items.values()):
121+
if 'error' not in view.metadata and 'warning' not in view.metadata:
122+
return view
123+
124+
27125
class Mmif(MmifObject):
28126
"""
29127
MmifObject that represents a full MMIF file.
@@ -560,131 +658,41 @@ def get_end(self, annotation: Annotation) -> Union[int, float]:
560658
"""
561659
return self._get_linear_anchor_point(annotation, start=False)
562660

563-
# pytype: disable=bad-return-type
564-
def __getitem__(self, item: str) -> Union[Document, View, Annotation]:
661+
def __getitem__(self, item: str) \
662+
-> Union[Document, View, Annotation, MmifMetadata, DocumentsList, ViewsList]:
565663
"""
566-
getitem implementation for Mmif. When nothing is found, this will raise an error
567-
rather than returning a None (although pytype doesn't think so...)
664+
getitem implementation for Mmif. This will try to find any object, given an identifier or an immediate
665+
attribute name. When nothing is found, this will raise an error rather than returning a None
568666
569667
:raises KeyError: if the item is not found or if the search results are ambiguous
570-
:param item: the search string, a document ID, a view ID, or a view-scoped annotation ID
668+
:param item: an attribute name or an object identifier (a document ID, a view ID, or an annotation ID). When
669+
annotation ID is given as a "short" ID (without view ID prefix), the method will try to find a
670+
match from the first view, and return immediately if found.
571671
:return: the object searched for
672+
:raise KeyError: if the item is not found or multiple objects are found with the same ID
572673
"""
573674
if item in self._named_attributes():
574675
return self.__dict__[item]
575676
split_attempt = item.split(self.id_delimiter)
576677

577-
document_result = self.documents.get(split_attempt[0])
578-
view_result = self.views.get(split_attempt[0])
678+
found = []
579679

580680
if len(split_attempt) == 1:
581-
anno_result = None
582-
elif view_result:
583-
anno_result = view_result[split_attempt[1]]
681+
found.append(self.documents.get(split_attempt[0]))
682+
found.append(self.views.get(split_attempt[0]))
683+
for view in self.views:
684+
found.append(view.annotations.get(split_attempt[0]))
685+
elif len(split_attempt) == 2:
686+
v = self.get_view_by_id(split_attempt[0])
687+
if v is not None:
688+
found.append(v.annotations.get(split_attempt[1]))
584689
else:
585690
raise KeyError("Tried to subscript into a view that doesn't exist")
691+
found = [x for x in found if x is not None]
586692

587-
if view_result and document_result:
693+
if len(found) > 1:
588694
raise KeyError("Ambiguous ID search result")
589-
if not (view_result or document_result):
695+
elif len(found) == 0:
590696
raise KeyError("ID not found: %s" % item)
591-
return anno_result or view_result or document_result
592-
# pytype: enable=bad-return-type
593-
594-
595-
class MmifMetadata(MmifObject):
596-
"""
597-
Basic MmifObject class to contain the top-level metadata of a MMIF file.
598-
599-
:param metadata_obj: the JSON data
600-
"""
601-
602-
def __init__(self, metadata_obj: Optional[Union[bytes, str, dict]] = None) -> None:
603-
# TODO (krim @ 10/7/20): there could be a better name and a better way to give a value to this
604-
self.mmif: str = f"http://mmif.clams.ai/{mmif.__specver__}"
605-
self._required_attributes = ["mmif"]
606-
super().__init__(metadata_obj)
607-
608-
609-
class DocumentsList(DataList[Document]):
610-
"""
611-
DocumentsList object that implements :class:`mmif.serialize.model.DataList`
612-
for :class:`mmif.serialize.document.Document`.
613-
"""
614-
_items: Dict[str, Document]
615-
616-
def _deserialize(self, input_list: list) -> None: # pytype: disable=signature-mismatch
617-
"""
618-
Extends base ``_deserialize`` method to initialize ``items`` as a dict from
619-
document IDs to :class:`mmif.serialize.document.Document` objects.
620-
621-
:param input_list: the JSON data that defines the list of documents
622-
:return: None
623-
"""
624-
self._items = {item['properties']['id']: Document(item) for item in input_list}
625-
626-
def append(self, value: Document, overwrite=False) -> None:
627-
"""
628-
Appends a document to the list.
629-
630-
Fails if there is already a document with the same ID
631-
in the list, unless ``overwrite`` is set to True.
632-
633-
:param value: the :class:`mmif.serialize.document.Document`
634-
object to add
635-
:param overwrite: if set to True, will overwrite an
636-
existing document with the same ID
637-
:raises KeyError: if ``overwrite`` is set to False and
638-
a document with the same ID exists
639-
in the list
640-
:return: None
641-
"""
642-
super()._append_with_key(value.id, value, overwrite)
643-
644-
645-
class ViewsList(DataList[View]):
646-
"""
647-
ViewsList object that implements :class:`mmif.serialize.model.DataList`
648-
for :class:`mmif.serialize.view.View`.
649-
"""
650-
_items: Dict[str, View]
651-
652-
def __init__(self, mmif_obj: Optional[Union[bytes, str, list]] = None):
653-
super().__init__(mmif_obj)
654-
655-
def _deserialize(self, input_list: list) -> None: # pytype: disable=signature-mismatch
656-
"""
657-
Extends base ``_deserialize`` method to initialize ``items`` as a dict from
658-
view IDs to :class:`mmif.serialize.view.View` objects.
659-
660-
:param input_list: the JSON data that defines the list of views
661-
:return: None
662-
"""
663-
if input_list:
664-
self._items = {item['id']: View(item) for item in input_list}
665-
666-
def append(self, value: View, overwrite=False) -> None:
667-
"""
668-
Appends a view to the list.
669-
670-
Fails if there is already a view with the same ID
671-
in the list, unless ``overwrite`` is set to True.
672-
673-
:param value: the :class:`mmif.serialize.view.View`
674-
object to add
675-
:param overwrite: if set to True, will overwrite an
676-
existing view with the same ID
677-
:raises KeyError: if ``overwrite`` is set to False and
678-
a view with the same ID exists
679-
in the list
680-
:return: None
681-
"""
682-
super()._append_with_key(value.id, value, overwrite)
683-
684-
def get_last(self) -> Optional[View]:
685-
"""
686-
Returns the last view appended to the list.
687-
"""
688-
for view in reversed(self._items.values()):
689-
if 'error' not in view.metadata and 'warning' not in view.metadata:
690-
return view
697+
else:
698+
return found[-1]

tests/test_serialize.py

+23
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,29 @@ def test_mmif_getitem_document(self):
516516
except KeyError:
517517
self.fail("didn't get document 'm1'")
518518

519+
def test_mmif_getitem_idconflict(self):
520+
m = Mmif(validate=False)
521+
v1 = m.new_view()
522+
v1.id = 'v1'
523+
v2 = m.new_view()
524+
v2.id = 'v1'
525+
with pytest.raises(KeyError):
526+
_ = m['v1']
527+
528+
m = Mmif(validate=False)
529+
v1 = m.new_view()
530+
v1a = v1.new_annotation(AnnotationTypes.Annotation, id='a1')
531+
v2 = m.new_view()
532+
v2a = v2.new_annotation(AnnotationTypes.Annotation, id='a1')
533+
self.assertIsNotNone(m[v1.id])
534+
self.assertIsNotNone(m[v2.id])
535+
# conflict short IDs
536+
self.assertEqual(v1a.id, v2a.id)
537+
with pytest.raises(KeyError):
538+
_ = m[v1a.id]
539+
self.assertIsNotNone(m[v1a.long_id])
540+
self.assertIsNotNone(m[v2a.long_id])
541+
519542
def test_mmif_getitem_view(self):
520543
try:
521544
v1 = self.mmif_obj['v1']

0 commit comments

Comments
 (0)