Skip to content

Commit 59695d8

Browse files
committed
Annotation.id now gets/sets long form ID (with parent_view_id prefix)
1 parent 8a6a942 commit 59695d8

File tree

5 files changed

+60
-39
lines changed

5 files changed

+60
-39
lines changed

mmif/serialize/annotation.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,18 @@ def _add_prop_aliases(self, key_to_add, val_to_add):
116116
in `TimeFrame` and `BoundingBox` respectively.
117117
"""
118118
prop_aliases = AnnotationTypes._prop_aliases.get(self._type.shortname, {})
119-
for alias_reprep, alias_group in prop_aliases.items():
119+
for alias_rep, alias_group in prop_aliases.items():
120120
if key_to_add in alias_group:
121121
for alias in alias_group:
122122
if alias != key_to_add:
123123
self._props_ephemeral[alias] = val_to_add
124124
if alias in self.properties.keys():
125-
warning_msg = f'Found both "{key_to_add}" and "{alias}" in the properties of "{self.id}" annotation in "{self.parent}" view. '
126-
if alias == alias_reprep:
127-
warning_msg += f'However "{key_to_add}" is an alias of "{alias_reprep}".'
125+
warning_msg = (f'Both "{key_to_add}" and "{alias}" are in the properties of "{self.id}", '
126+
f'however ')
127+
if alias == alias_rep:
128+
warning_msg += f'"{key_to_add}" is an alias of "{alias_rep}".'
128129
else:
129-
warning_msg += f'However "{key_to_add}" and "{alias}" are boath aliases of "{alias_reprep}".'
130+
warning_msg += f'"{key_to_add}" and "{alias}" are both aliases of "{alias_rep}".'
130131
warning_msg += f'Having two synonyms in the same annotation can cause unexpected behavior. '
131132
warnings.warn(warning_msg, UserWarning)
132133

@@ -159,25 +160,39 @@ def parent(self, parent_view_id: str) -> None:
159160

160161
@property
161162
def id(self) -> str:
162-
return self.properties.id
163+
return self.long_id
163164

164165
@id.setter
165166
def id(self, aid: str) -> None:
166-
self.properties.id = aid
167+
self.long_id = aid
167168

168169
@property
169170
def long_id(self) -> str:
170-
if self.parent is not None and len(self.parent) > 0:
171-
return f"{self.parent}{self.id_delimiter}{self.id}"
171+
if len(self.parent) > 0:
172+
return f"{self.parent}{self.id_delimiter}{self.properties.id}"
172173
else:
173-
return self.id
174+
return self.properties.id
174175

175176
@long_id.setter
176177
def long_id(self, long_id: str) -> None:
177178
if self.id_delimiter in long_id:
178-
self.parent, self.id = long_id.split(self.id_delimiter)
179+
pid, aid = long_id.split(self.id_delimiter)
180+
if self.parent == '' or self.parent == pid:
181+
self.parent, self.properties.id = long_id.split(self.id_delimiter)
182+
else:
183+
raise ValueError(f'Parent view ID does not match, refusing the alter parent ID '
184+
f'("{self.parent}" vs. "{pid}", given long ID "{long_id}"')
179185
else:
180-
self.id = long_id
186+
self.properties.id = long_id
187+
188+
@property
189+
def _short_id(self) -> str:
190+
"""
191+
Method to directly get "short" form of the ID, not supposed to be used for general purpose, since
192+
we want to force usage of "long" form ID when recording and referring annotations.
193+
:return:
194+
"""
195+
return self.properties.id
181196

182197
@staticmethod
183198
def check_prop_value_is_simple_enough(

mmif/serialize/mmif.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,14 @@ def _deserialize(self, input_dict: dict) -> None:
239239
for prop_key, prop_value in ann.properties.items():
240240
self.get_document_by_id(doc_id)._props_ephemeral[prop_key] = prop_value
241241
except KeyError:
242-
warnings.warn(f"Annotation {ann.id} (in view {view.id}) has a document ID {doc_id} that "
242+
warnings.warn(f"Annotation {ann.id} has a document ID {doc_id} that "
243243
f"does not exist in the MMIF object. Skipping.", RuntimeWarning)
244244

245245
## caching start and end points for time-based annotations
246246
# add quick access to `start` and `end` values if the annotation is using `targets` property
247247
if 'targets' in ann.properties:
248248
if 'start' in ann.properties or 'end' in ann.properties:
249-
raise ValueError(f"Annotation {ann.id} (in view {view.id}) has `targets` and `start`/`end/` "
249+
raise ValueError(f"Annotation {ann.id} has `targets` and `start`/`end` "
250250
f"properties at the same time. Annotation anchors are ambiguous.")
251251
ann._props_ephemeral['start'] = self._get_linear_anchor_point(ann, start=True)
252252
ann._props_ephemeral['end'] = self._get_linear_anchor_point(ann, start=False)
@@ -318,7 +318,9 @@ def generate_capital_annotations(self):
318318
if doc_id is None:
319319
doc_id = ann.get_property('document')
320320
# only if we are sure that the document ID is unique across all views... (with v_id prefix)
321-
# TODO (krim @ 7/15/24): update id checking once https://github.com/clamsproject/mmif/issues/228 is resolved
321+
# since https://github.com/clamsproject/mmif/issues/228, all documents id should have
322+
# v_id prefix. This code is here for backward compatibility
323+
# TODO (krim @ 6/13/25): drop this checks with v2.0 release
322324
if not any([doc_id == doc.id for doc in self.documents]) and Mmif.id_delimiter not in doc_id:
323325
doc_id = f"{view.id}{Mmif.id_delimiter}{doc_id}"
324326
existing_anns[doc_id].update(ann.properties)

mmif/serialize/view.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def append(self, value: Union[Annotation, Document], overwrite=False) -> None:
439439
in the list
440440
:return: None
441441
"""
442-
super()._append_with_key(value.id, value, overwrite)
442+
super()._append_with_key(value._short_id, value, overwrite)
443443

444444
def __getitem__(self, key: str):
445445
"""

tests/test_serialize.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -511,22 +511,18 @@ def test_get_anchor_point(self):
511511
v1 = mmif.new_view()
512512
v2 = mmif.new_view()
513513
tps = []
514-
tf1_targets_wo_vid = []
515514
tf2_targets_with_vid = []
516515
for timepoint in range(5):
517516
tp = v1.new_annotation(AnnotationTypes.TimePoint, timePoint=timepoint)
518517
tps.append(tp)
519-
tf1_targets_wo_vid.append(f"{tp.id}")
520-
tf2_targets_with_vid.append(f"{v1.id}{Mmif.id_delimiter}{tp.id}")
521-
tf1 = v1.new_annotation(AnnotationTypes.TimeFrame, targets=tf1_targets_wo_vid)
518+
tf2_targets_with_vid.append(f"{tp.id}")
519+
# tf1 used to be here and was used when long_id was less forced
522520
tf2 = v2.new_annotation(AnnotationTypes.TimeFrame, targets=tf2_targets_with_vid)
523521
tf3 = v2.new_annotation(AnnotationTypes.TimeFrame, start=100, end=200)
524522
self.assertEqual(mmif.get_start(tf3), 100)
525523
self.assertEqual(mmif.get_end(tf3), 200)
526524
self.assertEqual(mmif._get_linear_anchor_point(tf3, start=True), 100)
527525
self.assertEqual(mmif._get_linear_anchor_point(tf3, start=False), 200)
528-
self.assertEqual(mmif._get_linear_anchor_point(tf1, start=True), 0)
529-
self.assertEqual(mmif._get_linear_anchor_point(tf1, start=False), 4)
530526
self.assertEqual(mmif._get_linear_anchor_point(tf2, start=True), 0)
531527
self.assertEqual(mmif._get_linear_anchor_point(tf2, start=False), 4)
532528
self.assertEqual(mmif._get_linear_anchor_point(mmif[tf2_targets_with_vid[0]], start=True), 0)
@@ -591,6 +587,7 @@ def test_mmif_getitem_document(self):
591587
self.fail("didn't get document 'm1'")
592588

593589
def test_mmif_getitem_idconflict(self):
590+
# TODO (krim @ 6/13/25): major re-write when addressing #295
594591
m = Mmif(validate=False)
595592
v1 = m.new_view()
596593
v1.id = 'v1'
@@ -607,11 +604,11 @@ def test_mmif_getitem_idconflict(self):
607604
self.assertIsNotNone(m[v1.id])
608605
self.assertIsNotNone(m[v2.id])
609606
# conflict short IDs
610-
self.assertEqual(v1a.id, v2a.id)
607+
self.assertEqual(v1a._short_id, v2a._short_id)
611608
with pytest.raises(KeyError):
612-
_ = m[v1a.id]
613-
self.assertIsNotNone(m[v1a.long_id])
614-
self.assertIsNotNone(m[v2a.long_id])
609+
_ = m[v1a._short_id]
610+
self.assertIsNotNone(m[v1a.id])
611+
self.assertIsNotNone(m[v2a.id])
615612

616613
def test_mmif_getitem_view(self):
617614
try:
@@ -1018,14 +1015,17 @@ def test_get_property_with_alias(self):
10181015
def test_id(self):
10191016
anno_obj: Annotation = self.data['everything']['mmif']['v5:bb1']
10201017

1021-
old_id = anno_obj.id
1018+
old_id = anno_obj._short_id
10221019
self.assertEqual('bb1', old_id)
10231020

10241021
def test_change_id(self):
10251022
anno_obj: Annotation = self.data['everything']['mmif']['v5:bb1']
10261023

1027-
anno_obj.id = 'bb200'
1028-
self.assertEqual('bb200', anno_obj.id)
1024+
anno_obj.id = 'v5:bb200'
1025+
self.assertEqual('v5:bb200', anno_obj.id)
1026+
self.assertEqual('bb200', anno_obj._short_id)
1027+
with self.assertRaises(ValueError):
1028+
anno_obj.id = 'v4:bb200'
10291029

10301030
serialized = json.loads(anno_obj.serialize())
10311031
new_id = serialized['properties']['id']
@@ -1074,7 +1074,11 @@ def test_get_property(self):
10741074
d.long_id = 'v1:d1'
10751075
self.assertEqual('v1:d1', d.long_id)
10761076
self.assertEqual('v1', d.parent)
1077-
self.assertEqual('d1', d.id)
1077+
self.assertEqual('d1', d._short_id)
1078+
d.id = 'v1:d1'
1079+
self.assertEqual('v1:d1', d.long_id)
1080+
self.assertEqual('v1', d.parent)
1081+
self.assertEqual('d1', d._short_id)
10781082

10791083

10801084
def test_nontext_document(self):
@@ -1261,7 +1265,7 @@ def test_capital_annotation_generation_viewfinder(self):
12611265
v = mmif.new_view()
12621266
v.id = f'v{i}'
12631267
v.metadata.app = tester_appname
1264-
v.new_annotation(AnnotationTypes.Region, document=f'doc{i}')
1268+
v.new_annotation(AnnotationTypes.Region, document=doc.id)
12651269
mmif.add_view(v)
12661270
authors = ['me', 'you']
12671271
for i in range(2):
@@ -1283,7 +1287,7 @@ def test_capital_annotation_nongeneration_for_writable_documents(self):
12831287
v = mmif.new_view()
12841288
v.metadata.app = tester_appname
12851289
vid = v.id
1286-
new_td_id = mmif[vid].new_textdocument(text='new text', document='doc0', origin='transformation').id
1290+
new_td_id = mmif[vid].new_textdocument(text='new text', document=doc.id, origin='transformation').id
12871291
doc.add_property('author', 'me')
12881292

12891293
mmif_roundtrip = Mmif(mmif.serialize())

tests/test_utils.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,22 +51,22 @@ def setUp(self):
5151
self.mmif_obj.add_document(self.video_doc)
5252

5353
def test_extract_mid_frame(self):
54-
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=100, end=200, timeUnit='frame', document='d1')
54+
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=100, end=200, timeUnit='frame', document=self.video_doc.id)
5555
self.assertEqual(150, vdh.get_mid_framenum(self.mmif_obj, tf))
56-
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=0, end=200, timeUnit='frame', document='d1')
56+
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=0, end=200, timeUnit='frame', document=self.video_doc.id)
5757
self.assertEqual(100, vdh.get_mid_framenum(self.mmif_obj, tf))
58-
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=0, end=3, timeUnit='seconds', document='d1')
58+
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=0, end=3, timeUnit='seconds', document=self.video_doc.id)
5959
self.assertEqual(vdh.convert(1.5, 's', 'f', self.fps), vdh.get_mid_framenum(self.mmif_obj, tf))
6060

6161
def test_extract_representative_frame(self):
62-
tp = self.a_view.new_annotation(AnnotationTypes.TimePoint, timePoint=1500, timeUnit='milliseconds', document='d1')
63-
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=1000, end=2000, timeUnit='milliseconds', document='d1')
62+
tp = self.a_view.new_annotation(AnnotationTypes.TimePoint, timePoint=1500, timeUnit='milliseconds', document=self.video_doc.id)
63+
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=1000, end=2000, timeUnit='milliseconds', document=self.video_doc.id)
6464
tf.add_property('representatives', [tp.id])
6565
rep_frame_num = vdh.get_representative_framenum(self.mmif_obj, tf)
6666
expected_frame_num = vdh.millisecond_to_framenum(self.video_doc, tp.get_property('timePoint'))
6767
self.assertEqual(expected_frame_num, rep_frame_num)
6868
# and should work even if no representatives are provided
69-
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=1000, end=2000, timeUnit='milliseconds', document='d1')
69+
tf = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=1000, end=2000, timeUnit='milliseconds', document=self.video_doc.id)
7070
self.assertEqual(vdh.get_representative_framenum(self.mmif_obj, tf), vdh.get_mid_framenum(self.mmif_obj, tf))
7171
# check there is an error if there is a representative referencing a timepoint that does not exist
7272
tf.add_property('representatives', ['fake_tp_id'])
@@ -111,7 +111,7 @@ def test_convert_timepoint(self):
111111
self.assertEqual(vdh.convert(3, 's', 'f', self.fps), vdh.convert_timepoint(self.mmif_obj, timepoint_ann, 'f'))
112112

113113
def test_convert_timeframe(self):
114-
self.a_view.metadata.new_contain(AnnotationTypes.TimeFrame, timeUnit='frame', document='d1')
114+
self.a_view.metadata.new_contain(AnnotationTypes.TimeFrame, timeUnit='frame', document=self.video_doc.id)
115115
timeframe_ann = self.a_view.new_annotation(AnnotationTypes.TimeFrame, start=100, end=200)
116116
for times in zip((3.337, 6.674), vdh.convert_timeframe(self.mmif_obj, timeframe_ann, 's')):
117117
self.assertAlmostEqual(*times, places=0)

0 commit comments

Comments
 (0)