Skip to content

Commit 60edab9

Browse files
authored
Merge pull request #466 from python-jsonschema/fix-rebuild-bug
Fix caching issues which render remote ref caching ineffective
2 parents 0b580cc + f9645ee commit 60edab9

File tree

5 files changed

+76
-13
lines changed

5 files changed

+76
-13
lines changed

.flake8

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,5 @@
11
[flake8]
2-
exclude = .git,.tox,__pycache__,.eggs,dist,.venv*,docs,build,_build
3-
# we enforce 80 char width with `black` "loosely", so flake8 should be set to
4-
# not fail on up to 90 chars of width
2+
exclude = .git,.tox,__pycache__,dist,.venv*,docs,build
53
max-line-length = 90
6-
7-
# based on the flake8 conf for `black` itself:
8-
# https://github.com/ambv/black/blob/master/.flake8
9-
#
10-
# W503/W504 conflict, black causes E203
11-
ignore = W503,W504,E203,
4+
# black related: W503/W504 conflict, black causes E203
5+
ignore = W503,W504,E203,B019

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ repos:
2626
rev: 7.1.0
2727
hooks:
2828
- id: flake8
29+
args: ['--config', '.flake8']
2930
additional_dependencies:
3031
- 'flake8-bugbear==24.1.17'
3132
- 'flake8-typing-as-t==0.0.3'

src/check_jsonschema/schema_loader/main.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import functools
34
import pathlib
45
import typing as t
56
import urllib.error
@@ -130,11 +131,21 @@ def get_validator(
130131
instance_doc: dict[str, t.Any],
131132
format_opts: FormatOptions,
132133
fill_defaults: bool,
134+
) -> jsonschema.protocols.Validator:
135+
return self._get_validator(format_opts, fill_defaults)
136+
137+
@functools.lru_cache
138+
def _get_validator(
139+
self,
140+
format_opts: FormatOptions,
141+
fill_defaults: bool,
133142
) -> jsonschema.protocols.Validator:
134143
retrieval_uri = self.get_schema_retrieval_uri()
135144
schema = self.get_schema()
136145

137146
schema_dialect = schema.get("$schema")
147+
if schema_dialect is not None and not isinstance(schema_dialect, str):
148+
schema_dialect = None
138149

139150
# format checker (which may be None)
140151
format_checker = make_format_checker(format_opts, schema_dialect)

src/check_jsonschema/schema_loader/resolver.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
7979
else:
8080
full_uri = uri
8181

82-
if full_uri in cache._cache:
83-
return cache[uri]
82+
if full_uri in cache:
83+
return cache[full_uri]
8484

8585
full_uri_scheme = urllib.parse.urlsplit(full_uri).scheme
8686
if full_uri_scheme in ("http", "https"):
@@ -100,8 +100,8 @@ def validation_callback(content: bytes) -> None:
100100
else:
101101
parsed_object = get_local_file(full_uri)
102102

103-
cache[uri] = parsed_object
104-
return cache[uri]
103+
cache[full_uri] = parsed_object
104+
return cache[full_uri]
105105

106106
return retrieve_reference
107107

tests/acceptance/test_remote_ref_resolution.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,3 +244,60 @@ def test_ref_resolution_with_custom_base_uri(run_line, tmp_path, check_passes):
244244
assert result.exit_code == 0, output
245245
else:
246246
assert result.exit_code == 1, output
247+
248+
249+
@pytest.mark.parametrize("num_instances", (1, 2, 10))
250+
@pytest.mark.parametrize("check_passes", (True, False))
251+
def test_remote_ref_resolution_callout_count_is_scale_free_in_instancefiles(
252+
run_line, tmp_path, num_instances, check_passes
253+
):
254+
"""
255+
Test that for any N > 1, validation of a schema with a ref against N instance files
256+
has exactly the same number of callouts as validation when N=1
257+
258+
This proves that the validator and caching are working correctly, and we aren't
259+
repeating callouts to rebuild state.
260+
"""
261+
schema_uri = "https://example.org/schemas/main.json"
262+
ref_uri = "https://example.org/schemas/title_schema.json"
263+
264+
main_schema = {
265+
"$id": schema_uri,
266+
"$schema": "http://json-schema.org/draft-07/schema",
267+
"properties": {
268+
"title": {"$ref": "./title_schema.json"},
269+
},
270+
"additionalProperties": False,
271+
}
272+
title_schema = {"type": "string"}
273+
responses.add("GET", schema_uri, json=main_schema)
274+
responses.add("GET", ref_uri, json=title_schema)
275+
276+
# write N documents
277+
instance_doc = {"title": "doc one" if check_passes else 2}
278+
instance_paths = []
279+
for i in range(num_instances):
280+
instance_path = tmp_path / f"instance{i}.json"
281+
instance_path.write_text(json.dumps(instance_doc))
282+
instance_paths.append(str(instance_path))
283+
284+
result = run_line(
285+
[
286+
"check-jsonschema",
287+
"--schemafile",
288+
schema_uri,
289+
]
290+
+ instance_paths
291+
)
292+
output = f"\nstdout:\n{result.stdout}\n\nstderr:\n{result.stderr}"
293+
if check_passes:
294+
assert result.exit_code == 0, output
295+
else:
296+
assert result.exit_code == 1, output
297+
298+
# this is the moment of the "real" test run here:
299+
# no matter how many instances there were, there should only have been two calls
300+
# (one for the schema and one for the $ref)
301+
assert len(responses.calls) == 2
302+
assert len([c for c in responses.calls if c.request.url == schema_uri]) == 1
303+
assert len([c for c in responses.calls if c.request.url == ref_uri]) == 1

0 commit comments

Comments
 (0)