Skip to content

Commit 6323e9a

Browse files
committed
Skip resolving recursive references in resolve_all_refs
1 parent b50af13 commit 6323e9a

File tree

2 files changed

+167
-21
lines changed

2 files changed

+167
-21
lines changed

src/hypothesis_jsonschema/_canonicalise.py

+37-21
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import re
1919
from copy import deepcopy
2020
from typing import Any, Dict, List, NoReturn, Optional, Tuple, Union
21+
from urllib.parse import urljoin
2122

2223
import jsonschema
2324
from hypothesis.errors import InvalidArgument
@@ -559,16 +560,25 @@ def resolve_remote(self, uri: str) -> NoReturn:
559560
)
560561

561562

563+
def is_recursive_reference(reference: str, resolver: LocalResolver) -> bool:
564+
"""Detect if the given reference is recursive."""
565+
# Special case: a reference to the schema's root is always recursive
566+
if reference == "#":
567+
return True
568+
# During reference resolving the scope might go to external schemas. `hypothesis-jsonschema` does not support
569+
# schemas behind remote references, but the underlying `jsonschema` library includes meta schemas for
570+
# different JSON Schema drafts that are available transparently, and they count as external schemas in this context.
571+
# For this reason we need to check the reference relatively to the base uri.
572+
full_reference = urljoin(resolver.base_uri, reference)
573+
# If a fully-qualified reference is in the resolution stack, then we encounter it for the second time.
574+
# Therefore it is a recursive reference.
575+
return full_reference in resolver._scopes_stack
576+
577+
562578
def resolve_all_refs(
563579
schema: Union[bool, Schema], *, resolver: LocalResolver = None
564580
) -> Schema:
565-
"""
566-
Resolve all references in the given schema.
567-
568-
This handles nested definitions, but not recursive definitions.
569-
The latter require special handling to convert to strategies and are much
570-
less common, so we just ignore them (and error out) for now.
571-
"""
581+
"""Resolve all non-recursive references in the given schema."""
572582
if isinstance(schema, bool):
573583
return canonicalish(schema)
574584
assert isinstance(schema, dict), schema
@@ -580,35 +590,41 @@ def resolve_all_refs(
580590
)
581591

582592
if "$ref" in schema:
583-
s = dict(schema)
584-
ref = s.pop("$ref")
585-
with resolver.resolving(ref) as got:
586-
if s == {}:
587-
return resolve_all_refs(got, resolver=resolver)
588-
m = merged([s, got])
589-
if m is None: # pragma: no cover
590-
msg = f"$ref:{ref!r} had incompatible base schema {s!r}"
591-
raise HypothesisRefResolutionError(msg)
592-
return resolve_all_refs(m, resolver=resolver)
593-
assert "$ref" not in schema
593+
# Recursive references are skipped to avoid infinite recursion.
594+
if not is_recursive_reference(schema["$ref"], resolver):
595+
s = dict(schema)
596+
ref = s.pop("$ref")
597+
with resolver.resolving(ref) as got:
598+
if s == {}:
599+
return resolve_all_refs(deepcopy(got), resolver=resolver)
600+
m = merged([s, got])
601+
if m is None: # pragma: no cover
602+
msg = f"$ref:{ref!r} had incompatible base schema {s!r}"
603+
raise HypothesisRefResolutionError(msg)
604+
# `deepcopy` is not needed, because, the schemas are copied inside the `merged` call above
605+
return resolve_all_refs(m, resolver=resolver)
594606

595607
for key in SCHEMA_KEYS:
596608
val = schema.get(key, False)
597609
if isinstance(val, list):
598610
schema[key] = [
599-
resolve_all_refs(v, resolver=resolver) if isinstance(v, dict) else v
611+
resolve_all_refs(deepcopy(v), resolver=resolver)
612+
if isinstance(v, dict)
613+
else v
600614
for v in val
601615
]
602616
elif isinstance(val, dict):
603-
schema[key] = resolve_all_refs(val, resolver=resolver)
617+
schema[key] = resolve_all_refs(deepcopy(val), resolver=resolver)
604618
else:
605619
assert isinstance(val, bool)
606620
for key in SCHEMA_OBJECT_KEYS: # values are keys-to-schema-dicts, not schemas
607621
if key in schema:
608622
subschema = schema[key]
609623
assert isinstance(subschema, dict)
610624
schema[key] = {
611-
k: resolve_all_refs(v, resolver=resolver) if isinstance(v, dict) else v
625+
k: resolve_all_refs(deepcopy(v), resolver=resolver)
626+
if isinstance(v, dict)
627+
else v
612628
for k, v in subschema.items()
613629
}
614630
assert isinstance(schema, dict)

tests/test_canonicalise.py

+130
Original file line numberDiff line numberDiff line change
@@ -522,3 +522,133 @@ def test_validators_use_proper_draft():
522522
}
523523
cc = canonicalish(schema)
524524
jsonschema.validators.validator_for(cc).check_schema(cc)
525+
526+
527+
# Reference to itself
528+
ROOT_REFERENCE = {"$ref": "#"}
529+
# One extra nesting level
530+
NESTED = {"not": {"$ref": "#/not"}}
531+
# The same as above, but includes "$id".
532+
NESTED_WITH_ID = {
533+
"not": {"$ref": "#/not"},
534+
"$id": "http://json-schema.org/draft-07/schema#",
535+
}
536+
SELF_REFERENTIAL = {"foo": {"$ref": "#foo"}, "not": {"$ref": "#foo"}}
537+
538+
539+
@pytest.mark.parametrize(
540+
"schema, expected",
541+
(
542+
(ROOT_REFERENCE, ROOT_REFERENCE),
543+
(NESTED, NESTED),
544+
(NESTED_WITH_ID, NESTED_WITH_ID),
545+
# "foo" content should be inlined as is, because "#" is recursive (special case)
546+
(
547+
{"foo": {"$ref": "#"}, "not": {"$ref": "#foo"}},
548+
{"foo": {"$ref": "#"}, "not": {"$ref": "#"}},
549+
),
550+
# "foo" content should be inlined as is, because it points to itself
551+
(
552+
SELF_REFERENTIAL,
553+
SELF_REFERENTIAL,
554+
),
555+
# The same as above, but with one extra nesting level
556+
(
557+
{"foo": {"not": {"$ref": "#foo"}}, "not": {"$ref": "#foo"}},
558+
# 1. We start from resolving "$ref" in "not"
559+
# 2. at this point we don't know this path is recursive, so we follow to "foo"
560+
# 3. inside "foo" we found a reference to "foo", which means it is recursive
561+
{"foo": {"not": {"$ref": "#foo"}}, "not": {"not": {"$ref": "#foo"}}},
562+
),
563+
# Circular reference between two schemas
564+
(
565+
{"foo": {"$ref": "#bar"}, "bar": {"$ref": "#foo"}, "not": {"$ref": "#foo"}},
566+
# 1. We start in "not" and follow to "foo"
567+
# 2. In "foo" we follow to "bar"
568+
# 3. Here we see a reference to previously seen scope, which means it is a recursive path
569+
# We take the schema where we stop and inline it to the starting point (therefore it is `{"$ref": "#foo"}`)
570+
{"foo": {"$ref": "#bar"}, "bar": {"$ref": "#foo"}, "not": {"$ref": "#foo"}},
571+
),
572+
),
573+
)
574+
def test_skip_recursive_references_simple_schemas(schema, expected):
575+
# When there is a recursive reference, it should not be resolved
576+
assert resolve_all_refs(schema) == expected
577+
578+
579+
@pytest.mark.parametrize(
580+
"schema, resolved",
581+
(
582+
# NOTE. The `resolved` fixture does not include "definitions" to save visual space here, but it is extended
583+
# with it in the test body.
584+
# The reference target is behind two references, that share the same definition path. Not a recursive reference
585+
(
586+
{
587+
"definitions": {
588+
"properties": {
589+
"foo": {"type": "string"},
590+
"bar": {"$ref": "#/definitions/properties/foo"},
591+
},
592+
},
593+
"not": {"$ref": "#/definitions/properties/bar"},
594+
},
595+
{
596+
"not": {"type": "string"},
597+
},
598+
),
599+
# Here we need to resolve multiple references while being on the same resolution scope:
600+
# "#/definitions/foo" contains two references
601+
(
602+
{
603+
"definitions": {
604+
"foo": {
605+
"properties": {
606+
"bar": {"$ref": "#/definitions/spam"},
607+
"baz": {"$ref": "#/definitions/spam"},
608+
}
609+
},
610+
"spam": {"type": "string"},
611+
},
612+
"properties": {"foo": {"$ref": "#/definitions/foo"}},
613+
},
614+
{
615+
"properties": {
616+
"foo": {
617+
"properties": {
618+
"bar": {"type": "string"},
619+
"baz": {"type": "string"},
620+
}
621+
}
622+
},
623+
},
624+
),
625+
# Similar to the one above, but recursive
626+
(
627+
{
628+
"definitions": {
629+
"foo": {
630+
"properties": {
631+
"bar": {"$ref": "#/definitions/spam"},
632+
"baz": {"$ref": "#/definitions/spam"},
633+
}
634+
},
635+
"spam": {"$ref": "#/definitions/foo"},
636+
},
637+
"properties": {"foo": {"$ref": "#/definitions/foo"}},
638+
},
639+
{
640+
"properties": {
641+
"foo": {
642+
"properties": {
643+
"bar": {"$ref": "#/definitions/foo"},
644+
"baz": {"$ref": "#/definitions/foo"},
645+
}
646+
}
647+
},
648+
},
649+
),
650+
),
651+
)
652+
def test_skip_recursive_references_complex_schemas(schema, resolved):
653+
resolved["definitions"] = schema["definitions"]
654+
assert resolve_all_refs(schema) == resolved

0 commit comments

Comments
 (0)