Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create dataclass for reference datatype and improve validation #45 #59

Merged
merged 5 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 96 additions & 57 deletions arches_references/datatypes/datatypes.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,62 @@
import uuid
from dataclasses import asdict, dataclass

from django.db.models.fields.json import JSONField
from django.utils.translation import get_language, gettext as _

from arches.app.datatypes.base import BaseDataType
from arches.app.models.models import Node
from arches.app.models.graph import GraphValidationError

from arches_references.models import ListItem


@dataclass(kw_only=True)
class ReferenceLabel:
id: uuid.UUID
value: str
language_id: str
valuetype_id: str
list_item_id: uuid.UUID


@dataclass(kw_only=True)
class Reference:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An example of using this in a shell:

In [10]: label_types = TileModel.as_nodegroup("appellative_status", graph_slug="concept")

In [14]: obj = label_types.filter(appellative_status_ascribed_relation__isnull=False).last()

In [15]: obj.appellative_status_ascribed_name_content
Out[15]: 'Intentional Sampling'

In [17]: from pprint import pprint

In [18]: pprint(obj.appellative_status_ascribed_relation)
[Reference(uri='https://rdm.dev.fargeo.com/plugins/controlled-list-manager/item/2e3a2045-b44e-47fc-a27b-3078b17e08e4',
           labels=[ReferenceLabel(id='6ac8e471-476e-4fd0-b276-86e01a17bcc8',
                                  value='prefLabel',
                                  language_id='en',
                                  list_item_id='2e3a2045-b44e-47fc-a27b-3078b17e08e4',
                                  valuetype_id='prefLabel')])]

Copy link
Member Author

@jacobtylerwalls jacobtylerwalls Feb 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually this breaks calling save(), so I need to either allow that or just unwind this pattern and go back to plain dicts. EDIT: done

uri: str
labels: list[ReferenceLabel]
list_id: uuid.UUID


class ReferenceDataType(BaseDataType):
rest_framework_model_field = JSONField(null=True)

def to_python(self, value):
if value is None:
return None
if not value:
raise ValueError(_("Reference datatype value cannot be empty"))

references = []
for reference in value:
incoming_args = {**reference}
if labels := incoming_args.get("labels"):
incoming_args["labels"] = [
ReferenceLabel(**label) for label in incoming_args["labels"]
]
elif labels == []:
incoming_args.pop("labels")
references.append(Reference(**incoming_args))

return references

def serialize(self, value):
if isinstance(value, list):
return [
asdict(reference) if isinstance(reference, Reference) else {**reference}
for reference in value
]
return value

def validate(
self,
value,
Expand All @@ -22,66 +67,60 @@ def validate(
strict=False,
**kwargs,
):
errors = []
title = _("Invalid Reference Datatype Value")
if value is None:
return errors

if type(value) == list and len(value):
for reference in value:
if "uri" in reference and len(reference["uri"]):
pass
else:
errors.append(
{
"type": "ERROR",
"message": _(
"Reference objects require a 'uri' property and corresponding value"
),
"title": title,
}
)
if "labels" in reference:
pref_label_languages = []
for label in reference["labels"]:
if not all(
key in label
for key in ("id", "value", "language_id", "valuetype_id")
):
errors.append(
{
"type": "ERROR",
"message": _(
"Reference labels require properties: id(uuid), value(string), language_id(e.g. 'en'), and valuetype_id(e.g. 'prefLabel')"
),
"title": title,
}
)
if label["valuetype_id"] == "prefLabel":
pref_label_languages.append(label["language_id"])

if len(set(pref_label_languages)) < len(pref_label_languages):
errors.append(
{
"type": "ERROR",
"message": _(
"A reference can have only one prefLabel per language"
),
"title": title,
}
)
else:
errors.append(
{
"type": "ERROR",
"message": _("Reference value must be a list of reference objects"),
"title": title,
}
)
return errors
try:
parsed = self.to_python(value)
self.validate_pref_labels(parsed)
self.validate_multivalue(parsed, node, nodeid)
except Exception as e:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should exceptions be caught/transformed at this level? Would it be a better pattern to do this in at the callsite?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For better or worse I think this is the pattern used across datatypes in Arches. If something goes wrong, validate returns a list of errors. That list gets passed on to importers so that they can report everything that's wrong with whatever data a user is trying to load.

return [self.transform_exception(e)]
return []

def validate_pref_labels(self, references: list[Reference]):
for reference in references:
pref_label_languages = [
label.language_id
for label in reference.labels
if label.valuetype_id == "prefLabel"
]
if len(set(pref_label_languages)) < len(pref_label_languages):
msg = _("A reference can have only one prefLabel per language")
raise ValueError(msg)

def validate_multivalue(self, parsed, node, nodeid):
if not node:
if not nodeid:
raise ValueError
try:
node = Node.objects.get(nodeid=nodeid)
except Node.DoesNotExist:
return
if not node.config.get("multiValue") and len(parsed) > 1:
raise ValueError(_("This node does not allow multiple references."))

@staticmethod
def transform_exception(e):
message = _("Unknown error")
if isinstance(e, TypeError) and e.args:
# Localize the error raised by the dataclass constructor.
if "__init__() missing" in e.args[0]:
message = _(
"Missing required value(s): {}".format(e.args[0].split(": ")[-1])
)
elif "unexpected keyword argument" in e.args[0]:
message = _(
"Unexpected value: {}".format(e.args[0].split("argument ")[-1])
)
elif isinstance(e, ValueError) and e.args:
message = e.args[0]
return {
"type": "ERROR",
"message": message,
"title": _("Invalid Reference Datatype Value"),
}

def transform_value_for_tile(self, value, **kwargs):
list_id = kwargs.get("controlledList")
value = self.serialize(value)
if (
isinstance(value, list)
and isinstance(value[0], dict)
Expand Down
8 changes: 4 additions & 4 deletions arches_references/media/js/viewmodels/reference-select.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ define([
const newItem = selection.map(uri => {
return {
"labels": NAME_LOOKUP[uri].labels,
"listid": NAME_LOOKUP[uri]["listid"],
"list_id": NAME_LOOKUP[uri]["list_id"],
"uri": uri
};
});
Expand Down Expand Up @@ -90,7 +90,7 @@ define([
processResults: function(data) {
const items = data.items;
items.forEach(item => {
item["listid"] = item.id;
item["list_id"] = item.id;
item.id = item.uri;
item.disabled = item.guide;
item.labels = item.values.filter(val => self.isLabel(val));
Expand All @@ -111,7 +111,7 @@ define([

if (item.uri) {
const text = self.getPrefLabel(item.labels) || arches.translations.searching + '...';
NAME_LOOKUP[item.uri] = {"prefLabel": text, "labels": item.labels, "listid": item.list_id};
NAME_LOOKUP[item.uri] = {"prefLabel": text, "labels": item.labels, "list_id": item.list_id};
return indentation + text;
}
},
Expand All @@ -132,7 +132,7 @@ define([
NAME_LOOKUP[value.uri] = {
"prefLabel": self.getPrefLabel(value.labels),
"labels": value.labels,
"listid": value.listid
"list_id": value.list_id,
};
});

Expand Down
2 changes: 1 addition & 1 deletion arches_references/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def build_tile_value(self):
tile_value = {
"uri": self.uri or self.generate_uri(),
"labels": [label.serialize() for label in self.list_item_values.labels()],
"listid": str(self.list_id),
"list_id": str(self.list_id),
}
return tile_value

Expand Down
90 changes: 69 additions & 21 deletions tests/reference_datatype_tests.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import uuid
from types import SimpleNamespace

from django.test import TestCase
from arches.app.datatypes.datatypes import DataTypeFactory
from arches.app.models.tile import Tile
from arches_references.models import List, ListItem, ListItemValue
from django.test import TestCase
from types import SimpleNamespace

from tests.test_views import ListTests

# these tests can be run from the command line via
# python manage.py test tests.reference_datatype_tests --settings="tests.test_settings"
Expand All @@ -12,22 +16,41 @@ class ReferenceDataTypeTests(TestCase):

@classmethod
def setUpTestData(cls):
from tests.test_views import ListTests

return ListTests.setUpTestData()

def test_validate(self):
reference = DataTypeFactory().get_instance("reference")

for value in [
"",
[],
[{}], # reference has no 'uri'
[{"uri": ""}], # reference uri is empty
mock_node = SimpleNamespace(config={"multiValue": False})

for value, message in [
("", "Reference datatype value cannot be empty"),
([], "Reference datatype value cannot be empty"),
([{}], "Missing required value(s): 'uri', 'labels', and 'list_id'"),
(
[
{
"uri": "",
"labels": [], # notice [] rather than None
"list_id": str(uuid.uuid4()),
}
],
"Missing required value(s): 'labels'",
),
(
[
{
"uri": "https://www.domain.com/123",
"labels": [],
"garbage_key": "garbage_value",
}
],
"Unexpected value: 'garbage_key'",
),
]:
with self.subTest(reference_value=value):
errors = reference.validate(value)
self.assertTrue(len(errors) > 0)
errors = reference.validate(value, node=mock_node)
self.assertEqual(len(errors), 1, errors)
self.assertEqual(errors[0]["message"], message)

data = {
"uri": "https://www.domain.com/label",
Expand All @@ -36,30 +59,45 @@ def test_validate(self):
"id": "23b4efbd-2e46-4b3f-8d75-2f3b2bb96af2",
"value": "label",
"language_id": "en",
"list_item_id": str(uuid.uuid4()),
"valuetype_id": "prefLabel",
},
{
"id": "e8676242-f0c7-4e3d-b031-fded4960cd86",
"language_id": "de",
"list_item_id": str(uuid.uuid4()),
"valuetype_id": "prefLabel",
},
],
"list_id": uuid.uuid4(),
}

errors = reference.validate(value=[data]) # label missing value property
self.assertIsNotNone(errors)
# Label missing value property
errors = reference.validate(value=[data], node=mock_node)
self.assertEqual(len(errors), 1, errors)

data["labels"][1]["value"] = "a label"
data["labels"][1]["language_id"] = "en"

errors = reference.validate(value=[data]) # too many prefLabels per language
self.assertIsNotNone(errors)
# Too many prefLabels per language
errors = reference.validate(value=[data], node=mock_node)
self.assertEqual(len(errors), 1, errors)

data["labels"][1]["value"] = "ein label"
data["labels"][1]["language_id"] = "de"
data["labels"][1]["list_item_id"] = str(uuid.uuid4())

# Valid
errors = reference.validate(value=[data], node=mock_node)
self.assertEqual(errors, [])

# Too many references
errors = reference.validate(value=[data, data], node=mock_node)
self.assertEqual(len(errors), 1, errors)

errors = reference.validate(value=[data]) # data should be valid
self.assertTrue(len(errors) == 0)
# User error (missing arguments)
errors = reference.validate(value=[data])
self.assertEqual(len(errors), 1, errors)

def test_tile_clean(self):
reference = DataTypeFactory().get_instance("reference")
Expand All @@ -74,9 +112,10 @@ def test_tile_clean(self):
"value": "label",
"language_id": "en",
"valuetype_id": "prefLabel",
"list_item_id": str(uuid.uuid4()),
},
],
"listid": "fd9508dc-2aab-4c46-85ae-dccce1200035",
"list_id": "fd9508dc-2aab-4c46-85ae-dccce1200035",
}
]

Expand All @@ -96,6 +135,15 @@ def test_tile_clean(self):
reference.clean(tile1, nodeid)
self.assertIsNone(tile1.data[nodeid])

def test_dataclass_roundtrip(self):
reference = DataTypeFactory().get_instance("reference")
list1_pk = str(List.objects.get(name="list1").pk)
config = {"controlledList": list1_pk}
tile_val = reference.transform_value_for_tile("label1-pref", **config)
materialized = reference.to_python(tile_val)
tile_val_reparsed = reference.transform_value_for_tile(materialized, **config)
self.assertEqual(tile_val_reparsed, tile_val)

def test_transform_value_for_tile(self):
reference = DataTypeFactory().get_instance("reference")
list1_pk = str(List.objects.get(name="list1").pk)
Expand All @@ -105,7 +153,7 @@ def test_transform_value_for_tile(self):
self.assertTrue(isinstance(tile_value1, list))
self.assertTrue("uri" in tile_value1[0])
self.assertTrue("labels" in tile_value1[0])
self.assertTrue("listid" in tile_value1[0])
self.assertTrue("list_id" in tile_value1[0])

self.assertIsNone(reference.transform_value_for_tile(None, **config))

Expand Down Expand Up @@ -154,7 +202,7 @@ def test_get_display_value(self):
"valuetype_id": "prefLabel",
},
],
"listid": "a8da34eb-575b-498c-ada7-161ee745fd16",
"list_id": "a8da34eb-575b-498c-ada7-161ee745fd16",
}
]
},
Expand Down