Skip to content

Commit 0388188

Browse files
committed
Improve IDN conversion
1 parent e47fccb commit 0388188

File tree

5 files changed

+55
-37
lines changed

5 files changed

+55
-37
lines changed

src/Domain.php

+6-1
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,12 @@ public function toAscii(): self
259259
return $this;
260260
}
261261

262-
return new self($this->idnToAscii($this->domain), $this->publicSuffix->toAscii());
262+
$newDomain = $this->idnToAscii($this->domain);
263+
if ($newDomain === $this->domain) {
264+
return $this;
265+
}
266+
267+
return new self($newDomain, $this->publicSuffix->toAscii());
263268
}
264269

265270
/**

src/IDNAConverterTrait.php

+25-32
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,6 @@
1818
*/
1919
trait IDNAConverterTrait
2020
{
21-
/**
22-
* IDNA errors
23-
*
24-
* @see http://icu-project.org/apiref/icu4j/com/ibm/icu/text/IDNA.Error.html
25-
* @var array
26-
*/
27-
private static $idn_errors = [
28-
IDNA_ERROR_EMPTY_LABEL => 'a non-final domain name label (or the whole domain name) is empty',
29-
IDNA_ERROR_LABEL_TOO_LONG => 'a domain name label is longer than 63 bytes',
30-
IDNA_ERROR_DOMAIN_NAME_TOO_LONG => 'a domain name is longer than 255 bytes in its storage form',
31-
IDNA_ERROR_LEADING_HYPHEN => 'a label starts with a hyphen-minus ("-")',
32-
IDNA_ERROR_TRAILING_HYPHEN => 'a label ends with a hyphen-minus ("-")',
33-
IDNA_ERROR_HYPHEN_3_4 => 'a label contains hyphen-minus ("-") in the third and fourth positions',
34-
IDNA_ERROR_LEADING_COMBINING_MARK => 'a label starts with a combining mark',
35-
IDNA_ERROR_DISALLOWED => 'a label or domain name contains disallowed characters',
36-
IDNA_ERROR_PUNYCODE => 'a label starts with "xn--" but does not contain valid Punycode',
37-
IDNA_ERROR_LABEL_HAS_DOT => 'a label contains a dot=full stop',
38-
IDNA_ERROR_INVALID_ACE_LABEL => 'An ACE label does not contain a valid label string',
39-
IDNA_ERROR_BIDI => 'a label does not meet the IDNA BiDi requirements (for right-to-left characters)',
40-
IDNA_ERROR_CONTEXTJ => 'a label does not meet the IDNA CONTEXTJ requirements',
41-
];
42-
4321
/**
4422
* Get and format IDN conversion error message
4523
*
@@ -49,8 +27,29 @@ trait IDNAConverterTrait
4927
*/
5028
private static function getIdnErrors(int $error_bit): string
5129
{
30+
/**
31+
* IDNA errors
32+
*
33+
* @see http://icu-project.org/apiref/icu4j/com/ibm/icu/text/IDNA.Error.html
34+
*/
35+
static $idn_errors = [
36+
IDNA_ERROR_EMPTY_LABEL => 'a non-final domain name label (or the whole domain name) is empty',
37+
IDNA_ERROR_LABEL_TOO_LONG => 'a domain name label is longer than 63 bytes',
38+
IDNA_ERROR_DOMAIN_NAME_TOO_LONG => 'a domain name is longer than 255 bytes in its storage form',
39+
IDNA_ERROR_LEADING_HYPHEN => 'a label starts with a hyphen-minus ("-")',
40+
IDNA_ERROR_TRAILING_HYPHEN => 'a label ends with a hyphen-minus ("-")',
41+
IDNA_ERROR_HYPHEN_3_4 => 'a label contains hyphen-minus ("-") in the third and fourth positions',
42+
IDNA_ERROR_LEADING_COMBINING_MARK => 'a label starts with a combining mark',
43+
IDNA_ERROR_DISALLOWED => 'a label or domain name contains disallowed characters',
44+
IDNA_ERROR_PUNYCODE => 'a label starts with "xn--" but does not contain valid Punycode',
45+
IDNA_ERROR_LABEL_HAS_DOT => 'a label contains a dot=full stop',
46+
IDNA_ERROR_INVALID_ACE_LABEL => 'An ACE label does not contain a valid label string',
47+
IDNA_ERROR_BIDI => 'a label does not meet the IDNA BiDi requirements (for right-to-left characters)',
48+
IDNA_ERROR_CONTEXTJ => 'a label does not meet the IDNA CONTEXTJ requirements',
49+
];
50+
5251
$res = [];
53-
foreach (self::$idn_errors as $error => $reason) {
52+
foreach ($idn_errors as $error => $reason) {
5453
if ($error_bit & $error) {
5554
$res[] = $reason;
5655
}
@@ -75,7 +74,9 @@ private function idnToAscii(string $host): string
7574
$host = rawurldecode($host);
7675
}
7776

78-
if (!preg_match('/[\pL]+/u', $host)) {
77+
$host = strtolower($host);
78+
static $pattern = '/[\pL]+/u';
79+
if (!preg_match($pattern, $host)) {
7980
return $host;
8081
}
8182

@@ -99,14 +100,6 @@ private function idnToAscii(string $host): string
99100
*/
100101
private function idnToUnicode(string $host): string
101102
{
102-
if (false !== strpos($host, '%')) {
103-
$host = $this->idnToAscii($host);
104-
}
105-
106-
if (false === strpos($host, 'xn--')) {
107-
return $host;
108-
}
109-
110103
$output = idn_to_utf8($host, 0, INTL_IDNA_VARIANT_UTS46, $arr);
111104
if (!$arr['errors']) {
112105
return $output;

src/PublicSuffix.php

+16-3
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ public static function __set_state(array $properties): self
5757
*/
5858
public function __construct(string $publicSuffix = null, string $section = '')
5959
{
60+
if (false !== strpos((string) $publicSuffix, '%')) {
61+
$publicSuffix = rawurldecode($publicSuffix);
62+
}
63+
64+
if (null !== $publicSuffix) {
65+
$publicSuffix = strtolower($publicSuffix);
66+
}
67+
6068
$this->publicSuffix = $publicSuffix;
6169
$this->section = $section;
6270
}
@@ -146,7 +154,7 @@ public function isPrivate(): bool
146154
*/
147155
public function toUnicode(): self
148156
{
149-
if (null === $this->publicSuffix) {
157+
if (null === $this->publicSuffix || false === strpos($this->publicSuffix, 'xn--')) {
150158
return $this;
151159
}
152160

@@ -165,10 +173,15 @@ public function toUnicode(): self
165173
*/
166174
public function toAscii(): self
167175
{
168-
if (null === $this->publicSuffix) {
176+
if (null === $this->publicSuffix || false !== strpos($this->publicSuffix, 'xn--')) {
177+
return $this;
178+
}
179+
180+
$newPublicSuffix = $this->idnToAscii($this->publicSuffix);
181+
if ($newPublicSuffix === $this->publicSuffix) {
169182
return $this;
170183
}
171184

172-
return new self($this->idnToAscii($this->publicSuffix), $this->section);
185+
return new self($newPublicSuffix, $this->section);
173186
}
174187
}

src/Rules.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ private function findPublicSuffix(string $domain, string $section): PublicSuffix
210210
private function normalizeDomain(string $domain): string
211211
{
212212
try {
213-
return strtolower($this->idnToAscii($domain));
213+
return $this->idnToAscii($domain);
214214
} catch (Exception $e) {
215215
return '';
216216
}

tests/PublicSuffixTest.php

+7
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,11 @@ public function testPSToAsciiThrowsException()
2828
$this->expectException(Exception::class);
2929
(new PublicSuffix('_b%C3%A9bé.be-'))->toAscii();
3030
}
31+
32+
public function testConversionReturnsTheSameInstance()
33+
{
34+
$instance = new PublicSuffix('ac.be', Rules::ICANN_DOMAINS);
35+
$this->assertSame($instance->toUnicode(), $instance);
36+
$this->assertSame($instance->toAscii(), $instance);
37+
}
3138
}

0 commit comments

Comments
 (0)