diff --git a/CHANGELOG.md b/CHANGELOG.md index 94900bcb31..eab041079d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## Unreleased + +### Added + +* Support IDNA2003. (#3229) + ## 0.27.1 (27th August, 2024) -## Added +### Added * Support for `zstd` content decoding using the python `zstandard` package is added. Installable using `httpx[zstd]`. (#3139) diff --git a/docs/api.md b/docs/api.md index d01cc649ba..0c1ae84704 100644 --- a/docs/api.md +++ b/docs/api.md @@ -126,6 +126,7 @@ what gets sent over the wire.* * `.is_ssl` - **bool** * `.is_absolute_url` - **bool** * `.is_relative_url` - **bool** +* `.strict_idna` - **bool** * `def .copy_with([scheme], [authority], [path], [query], [fragment])` - **URL** ## `Headers` diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 479c2ef8a1..17abb58005 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -199,6 +199,8 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: if ":" in host and not (host.startswith("[") and host.endswith("]")): kwargs["host"] = f"[{host}]" + strict_idna = bool(kwargs.pop("strict_idna", False)) + # If any keyword arguments are provided, ensure they are valid. # ------------------------------------------------------------- @@ -256,7 +258,7 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: # with components that are plain ASCII bytestrings. parsed_scheme: str = scheme.lower() parsed_userinfo: str = quote(userinfo, safe=SUB_DELIMS + ":") - parsed_host: str = encode_host(host) + parsed_host: str = encode_host(host, strict_idna=strict_idna) parsed_port: int | None = normalize_port(port, scheme) has_scheme = parsed_scheme != "" @@ -300,7 +302,7 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: ) -def encode_host(host: str) -> str: +def encode_host(host: str, strict_idna: bool = False) -> str: if not host: return "" @@ -342,9 +344,15 @@ def encode_host(host: str) -> str: # IDNA hostnames try: - return idna.encode(host.lower()).decode("ascii") + encoded = idna.encode(host.lower()) except idna.IDNAError: - raise InvalidURL(f"Invalid IDNA hostname: {host!r}") + if strict_idna: + raise InvalidURL(f"Invalid IDNA hostname: {host!r}") + try: + encoded = host.lower().encode("idna") + except UnicodeError: + raise InvalidURL(f"Invalid hostname: {host!r}") + return encoded.decode("ascii") def normalize_port(port: str | int | None, scheme: str) -> int | None: diff --git a/httpx/_urls.py b/httpx/_urls.py index ec4ea6b399..3230761755 100644 --- a/httpx/_urls.py +++ b/httpx/_urls.py @@ -5,6 +5,7 @@ import idna +from ._exceptions import InvalidURL from ._types import QueryParamTypes, RawURL from ._urlparse import urlencode, urlparse from ._utils import primitive_value_to_str @@ -75,6 +76,7 @@ class URL: """ def __init__(self, url: URL | str = "", **kwargs: typing.Any) -> None: + self._strict_idna = kwargs.pop("strict_idna", False) if kwargs: allowed = { "scheme": str, @@ -114,7 +116,7 @@ def __init__(self, url: URL | str = "", **kwargs: typing.Any) -> None: kwargs["query"] = None if not params else str(QueryParams(params)) if isinstance(url, str): - self._uri_reference = urlparse(url, **kwargs) + self._uri_reference = urlparse(url, strict_idna=self._strict_idna, **kwargs) elif isinstance(url, URL): self._uri_reference = url._uri_reference.copy_with(**kwargs) else: @@ -186,9 +188,12 @@ def host(self) -> str: assert url.host == "::ffff:192.168.0.1" """ host: str = self._uri_reference.host - if host.startswith("xn--"): - host = idna.decode(host) + try: + host = idna.decode(host) + except (idna.IDNAError, idna.core.InvalidCodepoint): + if self._strict_idna: + raise InvalidURL(f"Invalid IDNA host: {host!r}") return host diff --git a/tests/client/test_redirects.py b/tests/client/test_redirects.py index f65827134c..a0b46d4fea 100644 --- a/tests/client/test_redirects.py +++ b/tests/client/test_redirects.py @@ -37,7 +37,7 @@ def redirects(request: httpx.Request) -> httpx.Response: elif request.url.path == "/invalid_redirect": status_code = httpx.codes.SEE_OTHER - raw_headers = [(b"location", "https://😇/".encode("utf-8"))] + raw_headers = [(b"location", "https://�/".encode("utf-8"))] return httpx.Response(status_code, headers=raw_headers) elif request.url.path == "/no_scheme_redirect": diff --git a/tests/models/test_url.py b/tests/models/test_url.py index 523a89bf65..b9e6f0f1d5 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -349,7 +349,16 @@ def test_url_invalid_hostname(): Ensure that invalid URLs raise an `httpx.InvalidURL` exception. """ with pytest.raises(httpx.InvalidURL): - httpx.URL("https://😇/") + httpx.URL("https://😇/", strict_idna=True) + with pytest.raises(httpx.InvalidURL): + assert httpx.URL("https://xn--n3h.com/", strict_idna=True).host + + +def test_url_with_emoji(): + assert str(httpx.URL("https://😇/")) == "https://xn--l28h/" + assert httpx.URL("https://😇/") == httpx.URL("https://xn--l28h/") + assert httpx.URL("https://😇/", strict_idna=False) == httpx.URL("https://xn--l28h/") + assert str(httpx.URL("https://☃.com/")) == "https://xn--n3h.com/" def test_url_excessively_long_url(): @@ -802,7 +811,7 @@ def test_url_escaped_idna_host(): def test_url_invalid_idna_host(): with pytest.raises(httpx.InvalidURL) as exc: - httpx.URL("https://☃.com/") + httpx.URL("https://☃.com/", strict_idna=True) assert str(exc.value) == "Invalid IDNA hostname: '☃.com'"