Skip to content

Commit 5f4c7c5

Browse files
committed
wip: email format options
1 parent 8a0cfee commit 5f4c7c5

File tree

7 files changed

+180
-2
lines changed

7 files changed

+180
-2
lines changed

docs/usage.rst

+17
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,23 @@ follows:
198198
* - python
199199
- Require the regex to be valid in Python regex syntax.
200200

201+
``--format-email``
202+
~~~~~~~~~~~~~~~~~~
203+
204+
Set a mode for handling of the ``"email"`` and ``"idn-email"`` values for ``"format"``. The modes are as
205+
follows:
206+
207+
.. list-table:: Email Options
208+
:widths: 15 30
209+
:header-rows: 1
210+
211+
* - mode
212+
- description
213+
* - default
214+
- Require the email address to pass a basic sanity check
215+
* - full
216+
- Require the email to match RFC5321 for ``"email"`` or RFC6531 for ``"idn-email"```
217+
201218
Other Options
202219
--------------
203220

src/check_jsonschema/cli/main_command.py

+17
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str:
7474
date, date-time, email, ipv4, ipv6, regex, uuid
7575
7676
\b
77+
For the "email" and "idn-email" formats, there are multiple modes which can be specified with
78+
'--format-email':
79+
default | only check that the string contains "@"
80+
full | check the string against RFC 5321 (email) or RFC 6531 (idn-email)
81+
7782
For the "regex" format, there are multiple modes which can be specified with
7883
'--format-regex':
7984
default | check that the string is a valid ECMAScript regex
@@ -155,6 +160,16 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str:
155160
default=RegexVariantName.default.value,
156161
type=click.Choice([x.value for x in RegexVariantName], case_sensitive=False),
157162
)
163+
@click.option(
164+
"--format-email",
165+
help=(
166+
"Set the mode of format validation for email addresses. "
167+
"If `--disable-formats email` or `--disable-formats idn-email` is "
168+
"used, this option has no effect on the disabled format."
169+
),
170+
default=EmailVariantName.default.value,
171+
type=click.Choice([x.value for x in EmailVariantName], case_sensitive=False),
172+
)
158173
@click.option(
159174
"--default-filetype",
160175
help="A default filetype to assume when a file's type is not detected",
@@ -240,6 +255,7 @@ def main(
240255
no_cache: bool,
241256
cache_filename: str | None,
242257
disable_formats: tuple[list[str], ...],
258+
format_email: Literal["full", "default"],
243259
format_regex: Literal["python", "default"],
244260
default_filetype: Literal["json", "yaml", "toml", "json5"],
245261
traceback_mode: Literal["full", "short"],
@@ -267,6 +283,7 @@ def main(
267283
else:
268284
args.disable_formats = normalized_disable_formats
269285

286+
args.format_email = RegexVariantName(format_email)
270287
args.format_regex = RegexVariantName(format_regex)
271288
args.disable_cache = no_cache
272289
args.default_filetype = default_filetype

src/check_jsonschema/cli/parse_result.py

+1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def set_validator(
8383
def format_opts(self) -> FormatOptions:
8484
return FormatOptions(
8585
enabled=not self.disable_all_formats,
86+
email_variant=self.format_email,
8687
regex_variant=self.format_regex,
8788
disabled_formats=self.disable_formats,
8889
)

src/check_jsonschema/formats/__init__.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import jsonschema.validators
1010
import regress
1111

12-
from .implementations import validate_rfc3339, validate_time
12+
from .implementations import validate_rfc3339, validate_rfc5321, validate_rfc6531, validate_time
1313

1414
# all known format strings except for a selection from draft3 which have either
1515
# been renamed or removed:
@@ -39,6 +39,32 @@
3939
)
4040

4141

42+
class EmailVariantName(enum.Enum):
43+
default = "default"
44+
full = "full"
45+
46+
47+
class EmailImplementation:
48+
def __init__(self, variant: EmailVariantName) -> None:
49+
self.variant = variant
50+
51+
def check_format_email(self, instance: t.Any) -> bool:
52+
if not isinstance(instance, str):
53+
return True
54+
if self.variant == EmailVariantName.default:
55+
return "@" in instance
56+
else:
57+
return validate_rfc5321(instance)
58+
59+
def check_format_idn_email(self, instance: t.Any) -> bool:
60+
if not isinstance(instance, str):
61+
return True
62+
if self.variant == EmailVariantName.default:
63+
return "@" in instance
64+
else:
65+
return validate_rfc6531(instance)
66+
67+
4268
class RegexVariantName(enum.Enum):
4369
default = "default"
4470
python = "python"
@@ -70,10 +96,12 @@ def __init__(
7096
self,
7197
*,
7298
enabled: bool = True,
99+
email_variant: EmailVariantName = EmailVariantName.default,
73100
regex_variant: RegexVariantName = RegexVariantName.default,
74101
disabled_formats: tuple[str, ...] = (),
75102
) -> None:
76103
self.enabled = enabled
104+
self.email_variant = email_variant
77105
self.regex_variant = regex_variant
78106
self.disabled_formats = disabled_formats
79107

@@ -101,7 +129,10 @@ def make_format_checker(
101129

102130
# replace the regex check
103131
del checker.checkers["regex"]
132+
email_impl = EmailImplementation(opts.email_variant)
104133
regex_impl = RegexImplementation(opts.regex_variant)
134+
checker.checks("email")(email_impl.check_format_email)
135+
checker.checks("idn-email")(email_impl.check_format_idn_email)
105136
checker.checks("regex")(regex_impl.check_format)
106137
checker.checks("date-time")(validate_rfc3339)
107138
checker.checks("time")(validate_time)
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from .iso8601_time import validate as validate_time
22
from .rfc3339 import validate as validate_rfc3339
3+
from .rfc5321 import validate as validate_rfc5321
4+
from .rfc6531 import validate as validate_rfc6531
35

4-
__all__ = ("validate_rfc3339", "validate_time")
6+
__all__ = ("validate_rfc3339", "validate_rfc5321", "validate_rfc6531", "validate_time")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import re
2+
3+
RFC5321_REGEX = re.compile(
4+
r"""
5+
^
6+
(
7+
[!#-'*+/-9=?A-Z^-~-]+(\.[!#-'*+/-9=?A-Z^-~-]+)*
8+
|
9+
"([]!#-[^-~ \t]|(\\[\t -~]))+"
10+
)
11+
@
12+
(
13+
[!#-'*+/-9=?A-Z^-~-]+(\.[!#-'*+/-9=?A-Z^-~-]+)*
14+
|
15+
\[[\t -Z^-~]*]
16+
)
17+
$
18+
""",
19+
re.VERBOSE | re.ASCII,
20+
)
21+
22+
23+
def validate(email_str: object) -> bool:
24+
"""Validate a string as a RFC5321 email address."""
25+
if not isinstance(email_str, str):
26+
return False
27+
return not not RFC5321_REGEX.match(email_str)
28+
29+
30+
if __name__ == "__main__":
31+
import timeit
32+
33+
N = 100_000
34+
tests = (
35+
("basic", "[email protected]"),
36+
)
37+
38+
print("benchmarking")
39+
for name, val in tests:
40+
all_times = timeit.repeat(
41+
f"validate({val!r})", globals=globals(), repeat=3, number=N
42+
)
43+
print(f"{name} (valid={validate(val)}): {int(min(all_times) / N * 10**9)}ns")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import re
2+
3+
RFC6531_REGEX = re.compile(
4+
r"""
5+
^
6+
# local part
7+
(
8+
([0-9a-z!#$%&'*+-\/=?^_`\{|\}~\u{80}-\u{10FFFF}]+(\.[0-9a-z!#$%&'*+-\/=?^_`\{|\}~\u{80}-\u{10FFFF}]+)*)
9+
|
10+
# quoted string
11+
("(
12+
[\x20-\x21\x23-\x5B\x5D-\x7E\u{80}-\u{10FFFF}]
13+
|
14+
\\[\x20-\x7E]
15+
)*")
16+
)(?<!.{64,})
17+
@
18+
# Domain/address
19+
(
20+
# Address literal
21+
(\[(
22+
# IPv4
23+
(\d{1,3}(\.\d{1,3}){3})
24+
|
25+
# IPv6
26+
(IPv6:[0-9a-f]{1,4}(:[0-9a-f]{1,4}){7})
27+
|
28+
(IPv6:([0-9a-f]{1,4}(:[0-9a-f]{1,4}){0,5})?::([0-9a-f]{1,4}(:[0-9a-f]{1,4}){0,5})?)
29+
|
30+
(IPv6:[0-9a-f]{1,4}(:[0-9a-f]{1,4}){5}:\d{1,3}(\.\d{1,3}){3})
31+
|
32+
(IPv6:([0-9a-f]{1,4}(:[0-9a-f]{1,4}){0,3})?::([0-9a-f]{1,4}(:[0-9a-f]{1,4}){0,3}:)?\d{1,3}(\.\d{1,3}){3})
33+
|
34+
# General address
35+
([a-z0-9-]*[[a-z0-9]:[\x21-\x5A\x5E-\x7E]+)
36+
)\])
37+
|
38+
# Domain
39+
((?!.{256,})(([0-9a-z\u{80}-\u{10FFFF}]([0-9a-z-\u{80}-\u{10FFFF}]*[0-9a-z\u{80}-\u{10FFFF}])?))(\.([0-9a-z\u{80}-\u{10FFFF}]([0-9a-z-\u{80}-\u{10FFFF}]*[0-9a-z\u{80}-\u{10FFFF}])?))*)
40+
)
41+
$
42+
""",
43+
re.VERBOSE | re.ASCII,
44+
)
45+
46+
47+
def validate(email_str: object) -> bool:
48+
"""Validate a string as a RFC6531 email address."""
49+
if not isinstance(email_str, str):
50+
return False
51+
return not not RFC6531_REGEX.match(email_str)
52+
53+
54+
if __name__ == "__main__":
55+
import timeit
56+
57+
N = 100_000
58+
tests = (
59+
("basic", "[email protected]"),
60+
)
61+
62+
print("benchmarking")
63+
for name, val in tests:
64+
all_times = timeit.repeat(
65+
f"validate({val!r})", globals=globals(), repeat=3, number=N
66+
)
67+
print(f"{name} (valid={validate(val)}): {int(min(all_times) / N * 10**9)}ns")

0 commit comments

Comments
 (0)