Skip to content

Commit 674861c

Browse files
committed
fix multipart binary/urlencoded composed-schema matching
1 parent aef7c45 commit 674861c

5 files changed

Lines changed: 1221 additions & 63 deletions

File tree

openapi_core/deserializing/media_types/deserializers.py

Lines changed: 137 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1+
from dataclasses import dataclass
12
from typing import TYPE_CHECKING
23
from typing import Any
4+
from typing import Iterator
35
from typing import Mapping
46
from typing import Optional
57
from xml.etree.ElementTree import ParseError
68

79
from jsonschema_path import SchemaPath
810

11+
from openapi_core.deserializing.exceptions import DeserializeError
912
from openapi_core.deserializing.media_types.datatypes import (
1013
DeserializerCallable,
1114
)
@@ -23,6 +26,7 @@
2326
from openapi_core.schema.protocols import SuportsGetAll
2427
from openapi_core.schema.protocols import SuportsGetList
2528
from openapi_core.schema.schemas import get_properties
29+
from openapi_core.validation.schemas.exceptions import ValidateError
2630
from openapi_core.validation.schemas.validators import SchemaValidator
2731

2832
if TYPE_CHECKING:
@@ -63,6 +67,12 @@ def get_deserializer_callable(
6367
return self.media_type_deserializers[mimetype]
6468

6569

70+
@dataclass(frozen=True)
71+
class FormMediaSchemaMatch:
72+
schema: SchemaPath
73+
decoded_candidate: Mapping[str, Any]
74+
75+
6676
class MediaTypeDeserializer:
6777
def __init__(
6878
self,
@@ -97,7 +107,7 @@ def deserialize(self, value: bytes) -> Any:
97107
):
98108
return deserialized
99109

100-
# decode multipart request bodies if schema provided
110+
# Decode form-media bodies only when a schema is available.
101111
if self.schema is not None:
102112
return self.decode(deserialized)
103113

@@ -126,51 +136,50 @@ def evolve(
126136
schema=schema,
127137
schema_validator=schema_validator,
128138
schema_caster=schema_caster,
139+
encoding=self.encoding,
140+
**self.parameters,
129141
)
130142

131143
def decode(
132-
self, location: Mapping[str, Any], schema_only: bool = False
144+
self,
145+
location: Mapping[str, Any],
146+
schema_only: bool = False,
147+
use_defaults: bool = True,
133148
) -> Mapping[str, Any]:
134-
# schema is required for multipart
149+
# Form-media decoding always needs a schema to resolve properties.
135150
assert self.schema is not None
136151
properties: dict[str, Any] = {}
137152

138-
# For urlencoded/multipart, use caster for oneOf/anyOf detection if validator available
153+
# For form media, select composed branches from decoded candidates.
139154
if self.schema_validator is not None:
140-
one_of_schema = self.schema_validator.get_one_of_schema(
141-
location, caster=self.schema_caster
142-
)
143-
if one_of_schema is not None:
144-
one_of_properties = self.evolve(one_of_schema).decode(
145-
location, schema_only=True
155+
one_of_match = self.get_form_media_one_of_match(location)
156+
if one_of_match is not None:
157+
self.update_decoded_properties(
158+
properties,
159+
one_of_match.decoded_candidate,
146160
)
147-
properties.update(one_of_properties)
148161

149-
any_of_schemas = self.schema_validator.iter_any_of_schemas(
150-
location, caster=self.schema_caster
151-
)
152-
for any_of_schema in any_of_schemas:
153-
any_of_properties = self.evolve(any_of_schema).decode(
154-
location, schema_only=True
162+
any_of_matches = self.iter_form_media_any_of_matches(location)
163+
for any_of_match in any_of_matches:
164+
self.update_decoded_properties(
165+
properties,
166+
any_of_match.decoded_candidate,
155167
)
156-
properties.update(any_of_properties)
157168

158-
all_of_schemas = self.schema_validator.iter_all_of_schemas(
159-
location
160-
)
161-
for all_of_schema in all_of_schemas:
162-
all_of_properties = self.evolve(all_of_schema).decode(
163-
location, schema_only=True
169+
all_of_matches = self.iter_form_media_all_of_matches(location)
170+
for all_of_match in all_of_matches:
171+
self.update_decoded_properties(
172+
properties,
173+
all_of_match.decoded_candidate,
164174
)
165-
properties.update(all_of_properties)
166175

167176
for prop_name, prop_schema in get_properties(self.schema).items():
168177
try:
169178
properties[prop_name] = self.decode_property(
170179
prop_name, prop_schema, location
171180
)
172181
except KeyError:
173-
if "default" not in prop_schema:
182+
if not use_defaults or "default" not in prop_schema:
174183
continue
175184
properties[prop_name] = (prop_schema / "default").read_value()
176185

@@ -179,6 +188,108 @@ def decode(
179188

180189
return properties
181190

191+
def update_decoded_properties(
192+
self,
193+
properties: dict[str, Any],
194+
candidate: Mapping[str, Any],
195+
) -> None:
196+
for prop_name, prop_value in candidate.items():
197+
if prop_name not in properties:
198+
properties[prop_name] = prop_value
199+
continue
200+
201+
properties[prop_name] = self.merge_decoded_property_value(
202+
properties[prop_name],
203+
prop_value,
204+
)
205+
206+
def merge_decoded_property_value(self, current: Any, new: Any) -> Any:
207+
if current == new:
208+
return current
209+
210+
# Prefer lossless binary values over surrogate-decoded text when
211+
# overlapping composed branches describe the same multipart field.
212+
if isinstance(current, bytes) and isinstance(new, str):
213+
return current
214+
if isinstance(current, str) and isinstance(new, bytes):
215+
return new
216+
217+
return new
218+
219+
def get_form_media_one_of_match(
220+
self,
221+
location: Mapping[str, Any],
222+
) -> Optional[FormMediaSchemaMatch]:
223+
if self.schema is None or "oneOf" not in self.schema:
224+
return None
225+
226+
for subschema in self.schema / "oneOf":
227+
match = self.get_form_media_schema_match(subschema, location)
228+
if match is not None:
229+
return match
230+
231+
return None
232+
233+
def iter_form_media_any_of_matches(
234+
self,
235+
location: Mapping[str, Any],
236+
) -> list[FormMediaSchemaMatch]:
237+
if self.schema is None or "anyOf" not in self.schema:
238+
return []
239+
240+
return list(self.iter_form_media_schema_matches("anyOf", location))
241+
242+
def iter_form_media_all_of_matches(
243+
self,
244+
location: Mapping[str, Any],
245+
) -> list[FormMediaSchemaMatch]:
246+
if self.schema is None or "allOf" not in self.schema:
247+
return []
248+
249+
return list(self.iter_form_media_schema_matches("allOf", location))
250+
251+
def iter_form_media_schema_matches(
252+
self,
253+
keyword: str,
254+
location: Mapping[str, Any],
255+
) -> Iterator[FormMediaSchemaMatch]:
256+
assert self.schema is not None
257+
258+
for subschema in self.schema / keyword:
259+
match = self.get_form_media_schema_match(subschema, location)
260+
if match is not None:
261+
yield match
262+
263+
def get_form_media_schema_match(
264+
self,
265+
subschema: SchemaPath,
266+
location: Mapping[str, Any],
267+
) -> Optional[FormMediaSchemaMatch]:
268+
assert self.schema_validator is not None
269+
270+
deserializer = self.evolve(subschema)
271+
try:
272+
validation_decoded_candidate = deserializer.decode(
273+
location,
274+
schema_only=True,
275+
use_defaults=False,
276+
)
277+
except DeserializeError:
278+
return None
279+
280+
validator = self.schema_validator.evolve(subschema)
281+
validation_candidate = dict(location)
282+
validation_candidate.update(validation_decoded_candidate)
283+
284+
try:
285+
validator.validate(validation_candidate)
286+
except ValidateError:
287+
return None
288+
289+
decoded_candidate = deserializer.decode(location, schema_only=True)
290+
291+
return FormMediaSchemaMatch(subschema, decoded_candidate)
292+
182293
def decode_property(
183294
self,
184295
prop_name: str,

openapi_core/unmarshalling/unmarshallers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def _get_param_or_header_and_schema(
117117
def _get_content_and_schema(
118118
self, raw: Any, content: SchemaPath, mimetype: Optional[str] = None
119119
) -> Tuple[Any, Optional[SchemaPath]]:
120-
casted, schema = super()._get_content_and_schema(
120+
casted, schema = self._get_content_schema_value_and_schema(
121121
raw, content, mimetype
122122
)
123123
if schema is None:

0 commit comments

Comments
 (0)