Skip to content

Support xs:list inline lists, take 2 #38

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ repos:
- --multi-line=9
- --project=pydantic_xml
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.991
rev: v1.0.0
hooks:
- id: mypy
stages:
Expand Down
2 changes: 1 addition & 1 deletion examples/snippets/model_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Config:


class Vehicles(BaseXmlModel, tag='vehicles'):
items: List[Union[Car, Airplane]]
items: List[Union[Car, Airplane]] = element()
# [model-end]


Expand Down
2 changes: 1 addition & 1 deletion examples/snippets/union_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class MouseEvent(Event, tag='mouse'):


class Log(BaseXmlModel, tag='log'):
events: List[Union[KeyboardEvent, MouseEvent]]
events: List[Union[KeyboardEvent, MouseEvent]] = element()
# [model-end]


Expand Down
4 changes: 2 additions & 2 deletions examples/snippets/union_primitives.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime as dt
from typing import List, Optional, Union

from pydantic_xml import BaseXmlModel, attr
from pydantic_xml import BaseXmlModel, attr, element


# [model-start]
Expand All @@ -11,7 +11,7 @@ class Message(BaseXmlModel, tag='Message'):


class Messages(BaseXmlModel):
messages: List[Message]
messages: List[Message] = element()
# [model-end]


Expand Down
83 changes: 77 additions & 6 deletions pydantic_xml/serializers/factories/homogeneous.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import dataclasses as dc
from copy import deepcopy
from typing import Any, List, Optional, Type
from typing import Any, Collection, List, Optional, Type

import pydantic as pd

import pydantic_xml as pxml
from pydantic_xml import errors
from pydantic_xml.element import XmlElementReader, XmlElementWriter
from pydantic_xml.serializers.encoder import XmlEncoder
from pydantic_xml.serializers.serializer import Location, PydanticShapeType, Serializer
from pydantic_xml.serializers.serializer import Location, PydanticShapeType, Serializer, is_xml_model
from pydantic_xml.utils import QName, merge_nsmaps


Expand All @@ -17,6 +17,79 @@ class HomogeneousSerializerFactory:
Homogeneous collection type serializer factory.
"""

class TextSerializer(Serializer):
def __init__(
self, model: Type['pxml.BaseXmlModel'], model_field: pd.fields.ModelField, ctx: Serializer.Context,
):
assert model_field.sub_fields and len(model_field.sub_fields) == 1
if (
is_xml_model(model_field.type_) or
issubclass(model_field.type_, tuple)
):
raise errors.ModelFieldError(
model.__name__, model_field.name, "Inline list value should be of scalar type",
)

def serialize(
self, element: XmlElementWriter, value: Collection[Any], *, encoder: XmlEncoder,
skip_empty: bool = False,
) -> Optional[XmlElementWriter]:
if value is None or skip_empty and len(value) == 0:
return element

encoded = " ".join(encoder.encode(val) for val in value)
element.set_text(encoded)
return element

def deserialize(self, element: Optional[XmlElementReader]) -> Optional[List[Any]]:
if element is None:
return None

text = element.pop_text()

if text is None:
return None

return [value for value in text.split()]

class AttributeSerializer(Serializer):
def __init__(
self, model: Type['pxml.BaseXmlModel'], model_field: pd.fields.ModelField, ctx: Serializer.Context,
):
assert model_field.sub_fields and len(model_field.sub_fields) == 1
if issubclass(model_field.type_, pxml.BaseXmlModel):
raise errors.ModelFieldError(
model.__name__, model_field.name, "Inline list value should be of scalar type",
)

_, ns, nsmap = self._get_entity_info(model_field)

name = model_field.alias

self.attr_name = QName.from_alias(tag=name, ns=ns, nsmap=nsmap, is_attr=True).uri

def serialize(
self, element: XmlElementWriter, value: Collection[Any], *, encoder: XmlEncoder,
skip_empty: bool = False,
) -> Optional[XmlElementWriter]:
if value is None or skip_empty and len(value) == 0:
return element

encoded = " ".join(encoder.encode(val) for val in value)
element.set_attribute(self.attr_name, encoded)
return element

def deserialize(self, element: Optional[XmlElementReader]) -> Optional[List[Any]]:
if element is None:
return None

attribute = element.pop_attrib(self.attr_name)

if attribute is None:
return []

return [value for value in attribute.split()]

class ElementSerializer(Serializer):
def __init__(
self, model: Type['pxml.BaseXmlModel'], model_field: pd.fields.ModelField, ctx: Serializer.Context,
Expand Down Expand Up @@ -103,10 +176,8 @@ def build(
if field_location is Location.ELEMENT:
return cls.ElementSerializer(model, model_field, ctx)
elif field_location is Location.MISSING:
return cls.ElementSerializer(model, model_field, ctx)
return cls.TextSerializer(model, model_field, ctx)
elif field_location is Location.ATTRIBUTE:
raise errors.ModelFieldError(
model.__name__, model_field.name, "attributes of collection type are not supported",
)
return cls.AttributeSerializer(model, model_field, ctx)
else:
raise AssertionError("unreachable")
97 changes: 93 additions & 4 deletions tests/test_homogeneous_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,93 @@ class RootModel(BaseXmlModel, tag='model'):
assert_xml_equal(actual_xml, xml)


def test_homogeneous_definition_errors():
with pytest.raises(errors.ModelFieldError):
class TestModel(BaseXmlModel):
attr1: List[int] = attr()
def test_text_list_extraction():
class RootModel(BaseXmlModel, tag="model"):
values: List[int]

xml = '''
<model>1 2 70 -34</model>
'''

actual_obj = RootModel.from_xml(xml)
expected_obj = RootModel(
values = [1, 2, 70, -34],
)

assert actual_obj == expected_obj

actual_xml = actual_obj.to_xml()
assert_xml_equal(actual_xml, xml)


def test_text_tuple_extraction():
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like this test is redundant because is tests the same case as the previous one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment below.

class RootModel(BaseXmlModel, tag="model"):
values: Tuple[int, ...]

xml = '''
<model>1 2 70 -34</model>
'''

actual_obj = RootModel.from_xml(xml)
expected_obj = RootModel(
values=[1, 2, 70, -34],
)

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert actual_obj == expected_obj is missing I suppose

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, nice catch!
Yeah I must've gotten so happy changing the name of the test that I forgot to change the logic...

assert actual_obj == expected_obj

actual_xml = actual_obj.to_xml()
assert_xml_equal(actual_xml, xml)


def test_attr_list_extraction():
class RootModel(BaseXmlModel, tag="model"):
values: List[float] = attr()

xml = '''
<model values="3.14 -1.0 300.0"/>

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will there be support for custom delimiters for attribute values? The space char seems to be the default for now, but curious if other delimiters will be supported in the future. I can imagine values might be separated by other chars like | and , .

# comma delimiter
<model values="3.14,-1.0,300.0"/> 

# vertical bar delimiter.
<model values="3.14|-1.0|300.0"/>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current PR implements support for XML Schema lists, https://www.w3.org/TR/xmlschema11-2/#atomic-vs-list , which, afaict, only support white-space delimited values. I have no plans to implement other delimiters as this covers my (or rather my project $work's) needs. For other kinds of delimiters, I think you're better off using elements, as they are more general.

However, if someone need to consume lists with other delimiters, I'd gladly help with the implementation

Copy link

@ciaranfinn ciaranfinn Mar 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @ajoino, thank you for your reply. Yes the main issue I have is on the consumption side. At the moment my challenge is with the deserialization of the XML. I am hoping to be able to parse the pageReferences attribute and represent it as type List[int] instead of str. However, compared to your space delimited examples in this PR, I dealing with both | and , delimiters.

Example XML:

<?xml version="1.0" encoding="UTF-8"?>
<books>
    <book id="1" name="BookA" pageReferences="2,6,14"/>
    <book id="2" name="BookB" pageReferences="1,8,57"/>
</books>

Python Classes:

class Book(BaseXmlModel, tag="book"):
    id: str = attr(name="id")
    name: str = attr(name="name")
    page_references: str = attr(name="pageReferences")


class BookResponse(BaseXmlModel, tag="books"):
    books: Optional[List[Book]] = element()

I was mainly wondering if it would be possible to pass a custom delimiter param to attr(). This could act as an override to the default space delimiter whenever it is specified. Alternatively you may have a better idea how to accomplish this, but this is just my reasoning for now.

class Book(BaseXmlModel, tag="book"):
    ...
    page_references: List[int] = attr(name="pageReferences",  delimiter=",")

'''
# This will fail if scientific notation is used
# i.e. if 300 is replaced with 3e2 or 300, the deserializer
# will always use the standard notation with the added `.0`.
# While this behaviour fails the tests, it shouldn't
# matter in practice.

actual_obj = RootModel.from_xml(xml)
expected_obj = RootModel(
values=[3.14, -1.0, 3e2],
)

assert actual_obj == expected_obj

actual_xml = actual_obj.to_xml()
assert_xml_equal(actual_xml, xml)


def test_attr_tuple_extraction():
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the test is the same as previous.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm testing both tuple and list to make sure that they both work. But yes, they are essentially the same and if this test is covered by other tests I'm willing to remove it.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean they are literally identical. Based on the test name I think the model should be like this:

class RootModel(BaseXmlModel, tag="model"):
    values: Tuple[float] = attr()

class RootModel(BaseXmlModel, tag="model"):
values: Tuple[float, ...] = attr()

xml = '''
<model values="3.14 -1.0 300.0"/>
'''
# This will fail if scientific notation is used
# i.e. if 300 is replaced with 3e2 or 300, the deserializer
# will always use the standard notation with the added `.0`.
# While this behaviour fails the tests, it shouldn't
# matter in practice.

actual_obj = RootModel.from_xml(xml)
expected_obj = RootModel(
values=(3.14, -1.0, 3e2),
)

assert actual_obj == expected_obj

actual_xml = actual_obj.to_xml()
assert_xml_equal(actual_xml, xml)


def test_homogeneous_definition_errors():
with pytest.raises(errors.ModelFieldError):
class TestModel(BaseXmlModel):
attr1: List[Tuple[int, ...]]
Expand Down Expand Up @@ -156,3 +238,10 @@ class TestSubModel(BaseXmlModel):

class TestModel(BaseXmlModel):
__root__: List[TestSubModel]

with pytest.raises(errors.ModelFieldError):
class TestSubModel(BaseXmlModel):
attr: int

class TestModel(BaseXmlModel):
text: List[TestSubModel]
2 changes: 1 addition & 1 deletion tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class TestSubModel(BaseXmlModel, tag='model'):

class TestModel(BaseXmlModel, tag='model'):
model: TestSubModel
list: List[TestSubModel] = []
list: List[TestSubModel] = element(default=[])
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the reason of that change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, that model would only have one possible text field, as List fields were implicitly elements. Now, without explicitly making a List type an element, it will be interpreted as the text. That would make that model have two Python fields that represent XML texts, model and list. Since the intention of the test was to have list represent an XML element, I changed to explicitly be so.

This is one of the backwards-incompatible changes I mentioned in the old PR.

tuple: Optional[Tuple[TestSubModel, TestSubModel]] = None
attrs: Dict[str, str] = {}
wrapped: Optional[str] = wrapped('envelope')
Expand Down