Skip to content

Commit 175213f

Browse files
committed
Handle invalid regex
1 parent f40063a commit 175213f

File tree

3 files changed

+47
-3
lines changed

3 files changed

+47
-3
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Changelog
22

3+
- Correct handling of regex patterns which are invalid in Python (#75)
4+
35
#### 0.18.2 - 2020-11-22
46
- Remove internal caching due to hash collisions (#71)
57
- Improve performance for conditional keywords

src/hypothesis_jsonschema/_from_schema.py

+26-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import math
55
import operator
66
import re
7+
import warnings
78
from fractions import Fraction
89
from functools import partial
910
from typing import Any, Callable, Dict, List, NoReturn, Optional, Set, Union
@@ -388,6 +389,14 @@ def relative_json_pointers() -> st.SearchStrategy[str]:
388389
}
389390

390391

392+
def _warn_invalid_regex(pattern: str, err: re.error, kw: str = "pattern") -> None:
393+
warnings.warn(
394+
f"Got {kw}={pattern!r}, but this is not valid syntax for a Python regular "
395+
f"expression ({err}) so it will not be handled by the strategy. See https://"
396+
"json-schema.org/understanding-json-schema/reference/regular_expressions.html"
397+
)
398+
399+
391400
def string_schema(
392401
custom_formats: Dict[str, st.SearchStrategy[str]], schema: dict
393402
) -> st.SearchStrategy[str]:
@@ -402,14 +411,19 @@ def string_schema(
402411
# See https://json-schema.org/latest/json-schema-validation.html#format
403412
strategy = known_formats[schema["format"]]
404413
if "pattern" in schema:
405-
# This isn't really supported, but we'll do our best.
406-
strategy = strategy.filter(re.compile(schema["pattern"]).search)
414+
try:
415+
# This isn't really supported, but we'll do our best with a filter.
416+
strategy = strategy.filter(re.compile(schema["pattern"]).search)
417+
except re.error as err:
418+
_warn_invalid_regex(schema["pattern"], err)
419+
return st.nothing()
407420
elif "pattern" in schema:
408421
try:
409422
re.compile(schema["pattern"])
410423
strategy = st.from_regex(schema["pattern"])
411-
except re.error:
424+
except re.error as err:
412425
# Patterns that are invalid in Python, or just malformed
426+
_warn_invalid_regex(schema["pattern"], err)
413427
return st.nothing()
414428
# If we have size bounds but we're generating strings from a regex or pattern,
415429
# apply a filter to ensure our size bounds are respected.
@@ -524,6 +538,15 @@ def object_schema(
524538
additional = schema.get("additionalProperties", {})
525539
additional_allowed = additional != FALSEY
526540

541+
for key in list(patterns):
542+
try:
543+
re.compile(key)
544+
except re.error as err:
545+
_warn_invalid_regex(key, err, "patternProperties entry")
546+
if min_size == 0 and not required:
547+
return st.builds(dict)
548+
return st.nothing()
549+
527550
dependencies = schema.get("dependencies", {})
528551
dep_names = {k: v for k, v in dependencies.items() if isinstance(v, list)}
529552
dep_schemas = {k: v for k, v in dependencies.items() if k not in dep_names}

tests/test_from_schema.py

+19
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
)
2828
from hypothesis_jsonschema._from_schema import from_schema, rfc3339
2929

30+
# We use this as a placeholder for all schemas which resolve to nothing()
31+
# but do not canonicalise to FALSEY
32+
INVALID_REGEX_SCHEMA = {"type": "string", "pattern": "["}
33+
3034

3135
@settings(
3236
suppress_health_check=[HealthCheck.too_slow, HealthCheck.filter_too_much],
@@ -73,6 +77,20 @@ def test_invalid_schemas_raise(schema):
7377
from_schema(schema).example()
7478

7579

80+
@pytest.mark.parametrize(
81+
"schema",
82+
[
83+
INVALID_REGEX_SCHEMA,
84+
{"type": "string", "pattern": "[", "format": "color"},
85+
{"type": "object", "patternProperties": {"[": False}},
86+
{"type": "object", "patternProperties": {"[": False}, "required": ["a"]},
87+
],
88+
)
89+
def test_invalid_regex_emit_warning(schema):
90+
with pytest.warns(UserWarning):
91+
from_schema(schema).validate()
92+
93+
7694
INVALID_SCHEMAS = {
7795
# Empty list for requires, which is invalid
7896
"Release Drafter configuration file",
@@ -97,6 +115,7 @@ def test_invalid_schemas_raise(schema):
97115
# Technically valid, but using regex patterns not supported by Python
98116
"draft7/ECMA 262 regex escapes control codes with \\c and lower letter",
99117
"draft7/ECMA 262 regex escapes control codes with \\c and upper letter",
118+
"JSON Schema for mime type collections",
100119
}
101120
FLAKY_SCHEMAS = {
102121
# The following schemas refer to an `$id` rather than a JSON pointer.

0 commit comments

Comments
 (0)