Skip to content

Commit 037c2b2

Browse files
committed
Refactor cache-related fixtures
Add new fixtures which expose nicer interfaces for interacting with the download caches. These are split into two variants -- one for `refs/` and one for `downloads/` -- for the relevant functions. The new fixtures cover getting the relevant cache dir as a path, getting the expected cache location for a URI as a path (without creating it), and injecting some specific file contents into said path, including creation of the parent dir structure. These new fixtures are then applied in as many locations as possible for a small but valuable reduction in test complexity and tight coupling between specific tests and the implementation.
1 parent f60971f commit 037c2b2

File tree

4 files changed

+90
-51
lines changed

4 files changed

+90
-51
lines changed

tests/acceptance/test_nonjson_schema_handling.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import responses
55

66
from check_jsonschema.parsers.json5 import ENABLED as JSON5_ENABLED
7-
from check_jsonschema.schema_loader.resolver import ref_url_to_cache_filename
87

98
SIMPLE_SCHEMA = {
109
"$schema": "http://json-schema.org/draft-07/schema",
@@ -138,7 +137,9 @@ def test_can_load_remote_yaml_schema_ref(run_line, tmp_path, passing_data):
138137
assert result.exit_code == (0 if passing_data else 1)
139138

140139

141-
def test_can_load_remote_yaml_schema_ref_from_cache(run_line, cache_dir, tmp_path):
140+
def test_can_load_remote_yaml_schema_ref_from_cache(
141+
run_line, inject_cached_ref, tmp_path
142+
):
142143
retrieval_uri = "https://example.org/retrieval/schemas/main.yaml"
143144
responses.add(
144145
"GET",
@@ -155,10 +156,7 @@ def test_can_load_remote_yaml_schema_ref_from_cache(run_line, cache_dir, tmp_pat
155156
# populate a bad schema, but then "override" that with a good cache value
156157
# this can only pass (in the success case) if the cache loading really works
157158
responses.add("GET", ref_loc, body="false")
158-
ref_cache_dir = cache_dir / "check_jsonschema" / "refs"
159-
ref_cache_dir.mkdir(parents=True)
160-
ref_path = ref_cache_dir / ref_url_to_cache_filename(ref_loc)
161-
ref_path.write_text("type: string")
159+
inject_cached_ref(ref_loc, "type: string")
162160

163161
doc = tmp_path / "doc.json"
164162
doc.write_text(json.dumps(PASSING_DOCUMENT))

tests/acceptance/test_remote_ref_resolution.py

+5-14
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import pytest
44
import responses
55

6-
from check_jsonschema.schema_loader.resolver import ref_url_to_cache_filename
7-
86
CASES = {
97
"case1": {
108
"main_schema": {
@@ -68,7 +66,7 @@ def test_remote_ref_resolution_simple_case(run_line, check_passes, casename, tmp
6866
@pytest.mark.parametrize("casename", ("case1", "case2"))
6967
@pytest.mark.parametrize("disable_cache", (True, False))
7068
def test_remote_ref_resolution_cache_control(
71-
run_line, tmp_path, cache_dir, casename, disable_cache
69+
run_line, tmp_path, get_ref_cache_loc, casename, disable_cache
7270
):
7371
main_schema_loc = "https://example.com/main.json"
7472
responses.add("GET", main_schema_loc, json=CASES[casename]["main_schema"])
@@ -92,9 +90,7 @@ def test_remote_ref_resolution_cache_control(
9290

9391
cache_locs = []
9492
for ref_loc in ref_locs:
95-
cache_locs.append(
96-
cache_dir / "check_jsonschema" / "refs" / ref_url_to_cache_filename(ref_loc)
97-
)
93+
cache_locs.append(get_ref_cache_loc(ref_loc))
9894
assert cache_locs # sanity check
9995
if disable_cache:
10096
for loc in cache_locs:
@@ -107,15 +103,11 @@ def test_remote_ref_resolution_cache_control(
107103
@pytest.mark.parametrize("casename", ("case1", "case2"))
108104
@pytest.mark.parametrize("check_passes", (True, False))
109105
def test_remote_ref_resolution_loads_from_cache(
110-
run_line, tmp_path, cache_dir, casename, check_passes
106+
run_line, tmp_path, get_ref_cache_loc, inject_cached_ref, casename, check_passes
111107
):
112108
main_schema_loc = "https://example.com/main.json"
113109
responses.add("GET", main_schema_loc, json=CASES[casename]["main_schema"])
114110

115-
# ensure the ref cache dir exists
116-
ref_cache_dir = cache_loc = cache_dir / "check_jsonschema" / "refs"
117-
ref_cache_dir.mkdir(parents=True)
118-
119111
ref_locs = []
120112
cache_locs = []
121113
for name, subschema in CASES[casename]["other_schemas"].items():
@@ -125,9 +117,8 @@ def test_remote_ref_resolution_loads_from_cache(
125117
ref_locs.append(other_schema_loc)
126118

127119
# but populate the cache with "good data"
128-
cache_loc = ref_cache_dir / ref_url_to_cache_filename(other_schema_loc)
129-
cache_locs.append(cache_loc)
130-
cache_loc.write_text(json.dumps(subschema))
120+
inject_cached_ref(other_schema_loc, json.dumps(subschema))
121+
cache_locs.append(get_ref_cache_loc(other_schema_loc))
131122

132123
instance_path = tmp_path / "instance.json"
133124
instance_path.write_text(

tests/conftest.py

+50
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,53 @@ def patch_cache_dir(monkeypatch, cache_dir):
6060
"check_jsonschema.cachedownloader._base_cache_dir", lambda: str(cache_dir)
6161
)
6262
yield m
63+
64+
65+
@pytest.fixture
66+
def downloads_cache_dir(tmp_path):
67+
return tmp_path / ".cache" / "check_jsonschema" / "downloads"
68+
69+
70+
@pytest.fixture
71+
def get_download_cache_loc(downloads_cache_dir):
72+
def _get(uri):
73+
return downloads_cache_dir / uri.split("/")[-1]
74+
75+
return _get
76+
77+
78+
@pytest.fixture
79+
def inject_cached_download(downloads_cache_dir, get_download_cache_loc):
80+
def _write(uri, content):
81+
downloads_cache_dir.mkdir(parents=True)
82+
path = get_download_cache_loc(uri)
83+
if isinstance(content, str):
84+
path.write_text(content)
85+
else:
86+
path.write_bytes(content)
87+
88+
return _write
89+
90+
91+
@pytest.fixture
92+
def refs_cache_dir(tmp_path):
93+
return tmp_path / ".cache" / "check_jsonschema" / "refs"
94+
95+
96+
@pytest.fixture
97+
def get_ref_cache_loc(refs_cache_dir):
98+
from check_jsonschema.schema_loader.resolver import ref_url_to_cache_filename
99+
100+
def _get(uri):
101+
return refs_cache_dir / ref_url_to_cache_filename(uri)
102+
103+
return _get
104+
105+
106+
@pytest.fixture
107+
def inject_cached_ref(refs_cache_dir, get_ref_cache_loc):
108+
def _write(uri, content):
109+
refs_cache_dir.mkdir(parents=True)
110+
get_ref_cache_loc(uri).write_text(content)
111+
112+
return _write

tests/unit/test_cachedownloader.py

+31-31
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,9 @@ def test_cachedownloader_cached_file(tmp_path, monkeypatch, default_response):
125125

126126

127127
@pytest.mark.parametrize("disable_cache", (True, False))
128-
def test_cachedownloader_on_success(cache_dir, disable_cache):
128+
def test_cachedownloader_on_success(get_download_cache_loc, disable_cache):
129129
add_default_response()
130-
f = cache_dir / "check_jsonschema" / "downloads" / "schema1.json"
130+
f = get_download_cache_loc("schema1.json")
131131
cd = CacheDownloader(disable_cache=disable_cache).bind(
132132
"https://example.com/schema1.json"
133133
)
@@ -151,7 +151,9 @@ def test_cachedownloader_using_alternate_target_dir(cache_dir):
151151

152152
@pytest.mark.parametrize("disable_cache", (True, False))
153153
@pytest.mark.parametrize("failures", (1, 2, requests.ConnectionError))
154-
def test_cachedownloader_succeeds_after_few_errors(cache_dir, disable_cache, failures):
154+
def test_cachedownloader_succeeds_after_few_errors(
155+
get_download_cache_loc, disable_cache, failures
156+
):
155157
if isinstance(failures, int):
156158
for _i in range(failures):
157159
responses.add(
@@ -168,7 +170,7 @@ def test_cachedownloader_succeeds_after_few_errors(cache_dir, disable_cache, fai
168170
match_querystring=None,
169171
)
170172
add_default_response()
171-
f = cache_dir / "check_jsonschema" / "downloads" / "schema1.json"
173+
f = get_download_cache_loc("schema1.json")
172174
cd = CacheDownloader(disable_cache=disable_cache).bind(
173175
"https://example.com/schema1.json"
174176
)
@@ -184,7 +186,7 @@ def test_cachedownloader_succeeds_after_few_errors(cache_dir, disable_cache, fai
184186
@pytest.mark.parametrize("disable_cache", (True, False))
185187
@pytest.mark.parametrize("connection_error", (True, False))
186188
def test_cachedownloader_fails_after_many_errors(
187-
cache_dir, disable_cache, connection_error
189+
get_download_cache_loc, disable_cache, connection_error
188190
):
189191
for _i in range(10):
190192
if connection_error:
@@ -202,7 +204,7 @@ def test_cachedownloader_fails_after_many_errors(
202204
match_querystring=None,
203205
)
204206
add_default_response() # never reached, the 11th response
205-
f = cache_dir / "check_jsonschema" / "downloads" / "schema1.json"
207+
f = get_download_cache_loc("schema1.json")
206208
cd = CacheDownloader(disable_cache=disable_cache).bind(
207209
"https://example.com/schema1.json"
208210
)
@@ -213,7 +215,7 @@ def test_cachedownloader_fails_after_many_errors(
213215

214216

215217
@pytest.mark.parametrize("disable_cache", (True, False))
216-
def test_cachedownloader_retries_on_bad_data(cache_dir, disable_cache):
218+
def test_cachedownloader_retries_on_bad_data(get_download_cache_loc, disable_cache):
217219
responses.add(
218220
"GET",
219221
"https://example.com/schema1.json",
@@ -222,7 +224,7 @@ def test_cachedownloader_retries_on_bad_data(cache_dir, disable_cache):
222224
match_querystring=None,
223225
)
224226
add_default_response()
225-
f = cache_dir / "check_jsonschema" / "downloads" / "schema1.json"
227+
f = get_download_cache_loc("schema1.json")
226228
cd = CacheDownloader(
227229
disable_cache=disable_cache,
228230
).bind(
@@ -245,20 +247,19 @@ def test_cachedownloader_retries_on_bad_data(cache_dir, disable_cache):
245247
"failure_mode", ("header_missing", "header_malformed", "time_overflow")
246248
)
247249
def test_cachedownloader_handles_bad_lastmod_header(
248-
monkeypatch, cache_dir, file_exists, failure_mode
250+
monkeypatch,
251+
get_download_cache_loc,
252+
inject_cached_download,
253+
file_exists,
254+
failure_mode,
249255
):
256+
uri = "https://example.com/schema1.json"
250257
if failure_mode == "header_missing":
251-
responses.add(
252-
"GET",
253-
"https://example.com/schema1.json",
254-
headers={},
255-
json={},
256-
match_querystring=None,
257-
)
258+
responses.add("GET", uri, headers={}, json={}, match_querystring=None)
258259
elif failure_mode == "header_malformed":
259260
responses.add(
260261
"GET",
261-
"https://example.com/schema1.json",
262+
uri,
262263
headers={"Last-Modified": "Jan 2000 00:00:01"},
263264
json={},
264265
match_querystring=None,
@@ -274,47 +275,47 @@ def fake_mktime(*args):
274275
raise NotImplementedError
275276

276277
original_file_contents = b'{"foo": "bar"}'
277-
(cache_dir / "check_jsonschema" / "downloads").mkdir(parents=True)
278-
f = cache_dir / "check_jsonschema" / "downloads" / "schema1.json"
278+
file_path = get_download_cache_loc(uri)
279279

280+
assert not file_path.exists()
280281
if file_exists:
281-
f.write_bytes(original_file_contents)
282-
else:
283-
assert not f.exists()
282+
inject_cached_download(uri, original_file_contents)
284283

285-
cd = CacheDownloader().bind("https://example.com/schema1.json", filename=str(f))
284+
cd = CacheDownloader().bind(uri)
286285

287286
# if the file already existed, it will not be overwritten by the cachedownloader
288287
# so the returned value for both the downloader and a direct file read should be the
289288
# original contents
290289
if file_exists:
291290
with cd.open() as fp:
292291
assert fp.read() == original_file_contents
293-
assert f.read_bytes() == original_file_contents
292+
assert file_path.read_bytes() == original_file_contents
294293
# otherwise, the file will have been created with new content
295294
# both reads will show that new content
296295
else:
297296
with cd.open() as fp:
298297
assert fp.read() == b"{}"
299-
assert f.read_bytes() == b"{}"
298+
assert file_path.read_bytes() == b"{}"
300299

301300
# at the end, the file always exists on disk
302-
assert f.exists()
301+
assert file_path.exists()
303302

304303

305-
def test_cachedownloader_validation_is_not_invoked_on_hit(monkeypatch, cache_dir):
304+
def test_cachedownloader_validation_is_not_invoked_on_hit(
305+
monkeypatch, inject_cached_download
306+
):
306307
"""
307308
Regression test for https://github.com/python-jsonschema/check-jsonschema/issues/453
308309
309310
This was a bug in which the validation callback was invoked eagerly, even on a cache
310311
hit. As a result, cache hits did not demonstrate their expected performance gain.
311312
"""
313+
uri = "https://example.com/schema1.json"
314+
312315
# 1: construct some perfectly good data (it doesn't really matter what it is)
313316
add_default_response()
314317
# 2: put equivalent data on disk
315-
(cache_dir / "check_jsonschema" / "downloads").mkdir(parents=True)
316-
f = cache_dir / "check_jsonschema" / "downloads" / "schema1.json"
317-
f.write_text("{}")
318+
inject_cached_download(uri, "{}")
318319

319320
# 3: construct a validator which marks that it ran in a variable
320321
validator_ran = False
@@ -327,7 +328,6 @@ def dummy_validate_bytes(data):
327328
# and use the above validation method
328329
cd = CacheDownloader().bind(
329330
"https://example.com/schema1.json",
330-
filename=str(f),
331331
validation_callback=dummy_validate_bytes,
332332
)
333333

0 commit comments

Comments
 (0)