python-jsonschema · trzejos · Jul 10, 2024 · Jul 10, 2024 · Jul 10, 2024 · Jul 10, 2024
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -9,6 +9,7 @@ Unreleased
 ----------
 
 .. vendor-insert-here
+- Add ``--format-email`` option to allow full validation of email/idn-email formats
 
 0.29.0
 ------

diff --git a/src/check_jsonschema/formats/__init__.py b/src/check_jsonschema/formats/__init__.py
@@ -9,7 +9,12 @@
 import jsonschema.validators
 import regress
 
-from .implementations import validate_rfc3339, validate_time
+from .implementations import (
+    validate_rfc3339,
+    validate_rfc5321,
+    validate_rfc6531,
+    validate_time,
+)
 
 # all known format strings except for a selection from draft3 which have either
 # been renamed or removed:
@@ -39,6 +44,21 @@
 )
 
 
+class EmailImplementation:
+    def __init__(self) -> None:
+        pass
+
+    def check_format_email(self, instance: t.Any) -> bool:
+        if not isinstance(instance, str):
+            return True
+        return validate_rfc5321(instance)
+
+    def check_format_idn_email(self, instance: t.Any) -> bool:
+        if not isinstance(instance, str):
+            return True
+        return validate_rfc6531(instance)
+
+
 class RegexVariantName(enum.Enum):
     default = "default"
     python = "python"
@@ -101,7 +121,10 @@ def make_format_checker(
 
     # replace the regex check
     del checker.checkers["regex"]
+    email_impl = EmailImplementation()
     regex_impl = RegexImplementation(opts.regex_variant)
+    checker.checks("email")(email_impl.check_format_email)
+    checker.checks("idn-email")(email_impl.check_format_idn_email)
     checker.checks("regex")(regex_impl.check_format)
     checker.checks("date-time")(validate_rfc3339)
     checker.checks("time")(validate_time)

diff --git a/src/check_jsonschema/formats/implementations/__init__.py b/src/check_jsonschema/formats/implementations/__init__.py
@@ -1,4 +1,6 @@
 from .iso8601_time import validate as validate_time
 from .rfc3339 import validate as validate_rfc3339
+from .rfc5321 import validate as validate_rfc5321
+from .rfc6531 import validate as validate_rfc6531
 
-__all__ = ("validate_rfc3339", "validate_time")
+__all__ = ("validate_rfc3339", "validate_rfc5321", "validate_rfc6531", "validate_time")
diff --git a/src/check_jsonschema/formats/implementations/rfc5321.py b/src/check_jsonschema/formats/implementations/rfc5321.py
@@ -0,0 +1,63 @@
+import re
+
+# ([!#-'*+/-9=?A-Z^-~-]+(\.[!#-'*+/-9=?A-Z^-~-]+)*|"([]!#-[^-~ \t]|(\\[\t -~]))+")
+# @
+# ([!#-'*+/-9=?A-Z^-~-]+(\.[!#-'*+/-9=?A-Z^-~-]+)*|\[[\t -Z^-~]*])
+#
+# [a-zA-Z0-9!#$%&'*+/=?^_`{|}~-] == Alphanumeric characters and most special characters except [ (),.:;<>@\[\]\t]
+# [a-zA-Z0-9 !#$%&'()*+,./:;<=>?@\[\]^_`{|}~\t-] == All printable characters except for " and \
+# [\t -~] == All printable characters
+# [a-zA-Z0-9 !"#$%&'()*+,./:;<=>?@^_`{|}~\t-] == All printable characters except for the following characters []\
+RFC5321_REGEX = re.compile(
+    r"""
+    ^
+    (?P<local>
+    [a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*
+    |
+    "(?:[a-zA-Z0-9 !#$%&'()*+,./:;<=>?@\[\]^_`{|}~\t-]|\\[\t -~])+"
+    )
+    @
+    (?P<domain>
+    [a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*
+    |
+    \[[a-zA-Z0-9 !"#$%&'()*+,./:;<=>?@^_`{|}~\t-]*\]
+    )
+    $
+    """,
+    re.VERBOSE | re.ASCII,
+)
+
+
+def validate(email_str: object) -> bool:
+    """Validate a string as a RFC5321 email address."""
+    if not isinstance(email_str, str):
+        return False
+    match = RFC5321_REGEX.match(email_str)
+    if not match:
+        return False
+    local, domain = match.group("local", "domain")
+    # Local part of email address is limited to 64 octets
+    if len(local) > 64:
+        return False
+    # Domain names are limited to 253 octets
+    if len(domain) > 253:
+        return False
+    for domain_part in domain.split("."):
+        # DNS Labels are limited to 63 octets
+        if len(domain_part) > 63:
+            return False
+    return True
+
+
+if __name__ == "__main__":
+    import timeit
+
+    N = 100_000
+    tests = (("basic", "[email protected]"),)
+
+    print("benchmarking")
+    for name, val in tests:
+        all_times = timeit.repeat(
+            f"validate({val!r})", globals=globals(), repeat=3, number=N
+        )
+        print(f"{name} (valid={validate(val)}): {int(min(all_times) / N * 10**9)}ns")
diff --git a/src/check_jsonschema/formats/implementations/rfc6531.py b/src/check_jsonschema/formats/implementations/rfc6531.py
@@ -0,0 +1,61 @@
+import re
+
+RFC6531_REGEX = re.compile(
+    r"""
+    ^
+    # local part
+    (
+    ([0-9a-z!#$%&'*+-\/=?^_`\{|\}~\u0080-\U0010FFFF]+(\.[0-9a-z!#$%&'*+-\/=?^_`\{|\}~\u0080-\U0010FFFF]+)*)
+    |
+    # quoted string
+    "([\x20-\x21\x23-\x5B\x5D-\x7E\u0080-\U0010FFFF]|\\[\x20-\x7E])*"
+    )
+    @
+    # Domain/address
+    (
+    # Address literal
+    (\[(
+    # IPv4
+    (\d{1,3}(\.\d{1,3}){3})
+    |
+    # IPv6
+    (IPv6:[0-9a-f]{1,4}(:[0-9a-f]{1,4}){7})
+    |
+    (IPv6:([0-9a-f]{1,4}(:[0-9a-f]{1,4}){0,5})?::([0-9a-f]{1,4}(:[0-9a-f]{1,4}){0,5})?)
+    |
+    (IPv6:[0-9a-f]{1,4}(:[0-9a-f]{1,4}){5}:\d{1,3}(\.\d{1,3}){3})
+    |
+    (IPv6:([0-9a-f]{1,4}(:[0-9a-f]{1,4}){0,3})?::([0-9a-f]{1,4}(:[0-9a-f]{1,4}){0,3}:)?\d{1,3}(\.\d{1,3}){3})
+    |
+    # General address
+    ([a-z0-9-]*[a-z0-9]:[\x21-\x5A\x5E-\x7E]+)
+    )\])
+    |
+    # Domain
+    ((?!.{256,})(([0-9a-z\u0080-\U0010FFFF]([0-9a-z-\u0080-\U0010FFFF]*[0-9a-z\u0080-\U0010FFFF])?))(\.([0-9a-z\u0080-\U0010FFFF]([0-9a-z-\u0080-\U0010FFFF]*[0-9a-z\u0080-\U0010FFFF])?))*)
+    )
+    $
+    """,
+    re.VERBOSE | re.UNICODE,
+)
+
+
+def validate(email_str: object) -> bool:
+    """Validate a string as a RFC6531 email address."""
+    if not isinstance(email_str, str):
+        return False
+    return RFC6531_REGEX.match(email_str)
+
+
+if __name__ == "__main__":
+    import timeit
+
+    N = 100_000
+    tests = (("basic", "[email protected]"),)
+
+    print("benchmarking")
+    for name, val in tests:
+        all_times = timeit.repeat(
+            f"validate({val!r})", globals=globals(), repeat=3, number=N
+        )
+        print(f"{name} (valid={validate(val)}): {int(min(all_times) / N * 10**9)}ns")
diff --git a/tests/unit/formats/test_rfc5321.py b/tests/unit/formats/test_rfc5321.py
@@ -0,0 +1,57 @@
+import pytest
+
+from check_jsonschema.formats.implementations.rfc5321 import validate
+
+
+@pytest.mark.parametrize(
+    "emailstr",
+    (
+        r"[email protected]",
+        r"[email protected]",
+        r"[email protected]",
+        r"[email protected]",
+        r"[email protected]",
+        r"[email protected]",
+        r"name/[email protected]",
+        r"admin@example",
+        r"[email protected]",
+        r'" "@example.org',
+        r'"john..doe"@example.org',
+        r"[email protected]",
+        r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com',
+        r"user%[email protected]",
+        r"[email protected]",
+        r"postmaster@[123.123.123.123]",
+        r"postmaster@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]",
+        r"_test@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]",
+    ),
+)
+def test_simple_positive_cases(emailstr):
+    assert validate(emailstr)
+
+
+@pytest.mark.parametrize(
+    "emailstr",
+    (
+        r"I❤️[email protected]",
+        r"用户@例子.广告",
+        r"ಬೆಂಬಲ@ಡೇಟಾಮೇಲ್.ಭಾರತ",
+        r"अजय@डाटा.भारत",
+        r"квіточка@пошта.укр",
+        r"χρήστης@παράδειγμα.ελ",
+        r"Dörte@Sörensen.example.com",
+        r"коля@пример.рф",
+        r"abc.example.com",
+        r"a@b@[email protected]",
+        r'a"b(c)d,e:f;g<h>i[j\k][email protected]',
+        r'just"not"[email protected]',
+        r'this is"not\[email protected]',
+        r"this\ still\"not\\[email protected]",
+        r"1234567890123456789012345678901234567890123456789012345678901234+x@example.com",
+        r"i.like.underscores@but_they_are_not_allowed_in_this_part",
+        r"trythis@123456789012345678901234567890123456789012345678901234567890123456.com",
+        r"another@12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234.com",
+    ),
+)
+def test_simple_negative_case(emailstr):
+    assert not validate(emailstr)
diff --git a/tests/unit/formats/test_rfc6531.py b/tests/unit/formats/test_rfc6531.py
@@ -0,0 +1,59 @@
+import pytest
+
+from check_jsonschema.formats.implementations.rfc6531 import validate
+
+
+@pytest.mark.parametrize(
+    "emailstr",
+    (
+        r"[email protected]",
+        r"[email protected]",
+        r"[email protected]",
+        r"[email protected]",
+        r"[email protected]",
+        r"[email protected]",
+        r"name/[email protected]",
+        r"admin@example",
+        r"[email protected]",
+        r'" "@example.org',
+        r'"john..doe"@example.org',
+        r"[email protected]",
+        (
+        r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com'
+        r"user%[email protected]"
+    ),
+        r"[email protected]",
+        r"postmaster@[123.123.123.123]",
+        r"postmaster@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]",
+        r"_test@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]",
+        r"I❤️[email protected]",
+        r"用户@例子.广告",
+        r"ಬೆಂಬಲ@ಡೇಟಾಮೇಲ್.ಭಾರತ",
+        r"अजय@डाटा.भारत",
+        r"квіточка@пошта.укр",
+        r"χρήστης@παράδειγμα.ελ",
+        r"Dörte@Sörensen.example.com",
+        r"коля@пример.рф",
+    ),
+)
+def test_simple_positive_cases(emailstr):
+    assert validate(emailstr)
+
+
+@pytest.mark.parametrize(
+    "emailstr",
+    (
+        r"abc.example.com",
+        r"a@b@[email protected]",
+        r'a"b(c)d,e:f;g<h>i[j\k][email protected]',
+        r'just"not"[email protected]',
+        r'this is"not\[email protected]',
+        r"this\ still\"not\\[email protected]",
+        r"1234567890123456789012345678901234567890123456789012345678901234+x@example.com",
+        r"i.like.underscores@but_they_are_not_allowed_in_this_part",
+        r"trythis@123456789012345678901234567890123456789012345678901234567890123456.com",
+        r"another@12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234.com",
+    ),
+)
+def test_simple_negative_case(emailstr):
+    assert not validate(emailstr)
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,7 @@ Unreleased @@
     ----------
     .. vendor-insert-here
+    - Add ``--format-email`` option to allow full validation of email/idn-email formats
 .29.0
     ------
@@ Expand Down @@