Skip to content

Commit c1cbf66

Browse files
committed
Introduce DomainInterface
- Domain and PublicSuffix implements DomainInterface - Improve internal code for Domain PublicSuffix and Rules class - Domain validation is now supported on instanciation
1 parent bbd49b8 commit c1cbf66

15 files changed

+822
-263
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@ All Notable changes to `PHP Domain Parser` **5.x** series will be documented in
66

77
### Added
88

9+
- `Pdp\DomainInterface` interface implemented by `Pdp\Domain` and `Pdp\PublicSuffix`
910
- `Pdp\Domain::getContent` returns the Domain name value replaces `Pdp\Domain::getDomain`
1011
- `Pdp\Domain` implements the `Countable` interface.
1112

1213
### Fixed
1314

1415
- `Pdp\Domain` domain part computation (public suffix, registrable domain and sub domain)
16+
- `Pdp\Domain` and `Pdp\PublicSuffix` host validation compliance to RFC improved
1517

1618
### Deprecated
1719

data/pdp-PSL_FULL_5a3cc7f81795bb2e48e848af42d287b4.cache

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

src/Converter.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ private function addRule(array $list, array $rule_parts): array
104104
// "The domain and all rules must be canonicalized in the normal way
105105
// for hostnames - lower-case, Punycode (RFC 3492)."
106106

107-
$part = $this->idnToAscii($part);
107+
$part = $this->idnToAscii(strtolower($part));
108108
$isDomain = true;
109109
if (0 === strpos($part, '!')) {
110110
$part = substr($part, 1);

src/Domain.php

Lines changed: 88 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
namespace Pdp;
1313

14-
use Countable;
1514
use JsonSerializable;
1615

1716
/**
@@ -28,7 +27,7 @@
2827
* @author Jeremy Kendall <[email protected]>
2928
* @author Ignace Nyamagana Butera <[email protected]>
3029
*/
31-
final class Domain implements Countable, JsonSerializable
30+
final class Domain implements DomainInterface, JsonSerializable
3231
{
3332
use IDNAConverterTrait;
3433

@@ -37,6 +36,11 @@ final class Domain implements Countable, JsonSerializable
3736
*/
3837
private $domain;
3938

39+
/**
40+
* @var string[]
41+
*/
42+
private $labels;
43+
4044
/**
4145
* @var PublicSuffix
4246
*/
@@ -66,59 +70,51 @@ public static function __set_state(array $properties): self
6670
* @param string|null $domain
6771
* @param PublicSuffix $publicSuffix
6872
*/
69-
public function __construct($domain = null, PublicSuffix $publicSuffix = null)
73+
public function __construct(string $domain = null, PublicSuffix $publicSuffix = null)
7074
{
71-
$this->domain = $this->setDomain($domain);
75+
list($this->domain, $this->labels) = $this->setDomain($domain);
7276
$this->publicSuffix = $this->setPublicSuffix($publicSuffix);
77+
$this->assertValidState();
7378
$this->registrableDomain = $this->setRegistrableDomain();
7479
$this->subDomain = $this->setSubDomain();
7580
}
7681

7782
/**
78-
* Normalize the given domain.
83+
* Sets the public suffix domain part.
7984
*
80-
* @param string|null $domain
85+
* @param PublicSuffix|null $publicSuffix
8186
*
82-
* @return string|null
87+
* @return PublicSuffix
8388
*/
84-
private function setDomain(string $domain = null)
89+
private function setPublicSuffix(PublicSuffix $publicSuffix = null): PublicSuffix
8590
{
86-
if (null === $domain) {
87-
return null;
88-
}
89-
90-
if (false !== strpos($domain, '%')) {
91-
$domain = rawurldecode($domain);
91+
if (null === $publicSuffix
92+
|| null === $this->domain
93+
|| false === strpos($this->domain, '.')
94+
|| count($this->labels) === count($publicSuffix)
95+
) {
96+
return new PublicSuffix();
9297
}
9398

94-
return strtolower($domain);
99+
return $publicSuffix;
95100
}
96101

97102
/**
98-
* Sets the public suffix domain part.
99-
*
100-
* @param PublicSuffix|null $publicSuffix
103+
* assert the domain internal state is valid
101104
*
102-
* @return PublicSuffix
105+
* @throws Exception if the public suffix does not match the domain
103106
*/
104-
private function setPublicSuffix(PublicSuffix $publicSuffix = null): PublicSuffix
107+
protected function assertValidState()
105108
{
106-
$publicSuffix = $publicSuffix ?? new PublicSuffix();
107-
if (null === $publicSuffix->getContent()) {
108-
return $publicSuffix;
109-
}
110-
111-
if (null === $this->domain || false === strpos($this->domain, '.')) {
112-
return new PublicSuffix();
109+
foreach ($this->publicSuffix as $offset => $label) {
110+
if ($label !== $this->labels[$offset]) {
111+
throw new Exception(sprintf('The submitted public suffix `%s` is invalid for the given domain `%s`', $this->publicSuffix->getContent(), $this->domain));
112+
}
113113
}
114-
115-
return $publicSuffix;
116114
}
117115

118116
/**
119117
* Computes the registrable domain part.
120-
*
121-
* @return string|null
122118
*/
123119
private function setRegistrableDomain()
124120
{
@@ -129,9 +125,6 @@ private function setRegistrableDomain()
129125
$labels = explode('.', $this->domain);
130126
$countLabels = count($labels);
131127
$countPublicSuffixLabels = count($this->publicSuffix);
132-
if ($countLabels === $countPublicSuffixLabels) {
133-
return null;
134-
}
135128

136129
return implode('.', array_slice($labels, $countLabels - $countPublicSuffixLabels - 1));
137130
}
@@ -157,16 +150,30 @@ private function setSubDomain()
157150
return implode('.', array_slice($labels, 0, $countLabels - $countLabelsToRemove));
158151
}
159152

153+
/**
154+
* {@inheritdoc}
155+
*/
156+
public function getIterator()
157+
{
158+
foreach ($this->labels as $offset => $label) {
159+
yield $label;
160+
}
161+
}
162+
160163
/**
161164
* {@inheritdoc}
162165
*/
163166
public function jsonSerialize()
164167
{
165-
return array_merge([
168+
return [
166169
'domain' => $this->domain,
167170
'registrableDomain' => $this->registrableDomain,
168171
'subDomain' => $this->subDomain,
169-
], $this->publicSuffix->jsonSerialize());
172+
'publicSuffix' => $this->publicSuffix->getContent(),
173+
'isKnown' => $this->isKnown(),
174+
'isICANN' => $this->isICANN(),
175+
'isPrivate' => $this->isPrivate(),
176+
];
170177
}
171178

172179
/**
@@ -182,19 +189,11 @@ public function __debugInfo()
182189
*/
183190
public function count()
184191
{
185-
if (null === $this->domain) {
186-
return 0;
187-
}
188-
189-
return count(explode('.', $this->domain));
192+
return count($this->labels);
190193
}
191194

192195
/**
193-
* Returns the domain content.
194-
*
195-
* This method should return null on seriously malformed domain name
196-
*
197-
* @return string|null
196+
* {@inheritdoc}
198197
*/
199198
public function getContent()
200199
{
@@ -206,7 +205,7 @@ public function getContent()
206205
*
207206
* DEPRECATION WARNING! This method will be removed in the next major point release
208207
*
209-
* @deprecated deprecated since version 5.3
208+
* @deprecated 5.3 deprecated
210209
* @see Domain::getContent
211210
*
212211
* This method should return null on seriously malformed domain name
@@ -218,6 +217,26 @@ public function getDomain()
218217
return $this->getContent();
219218
}
220219

220+
/**
221+
* {@inheritdoc}
222+
*/
223+
public function getLabel(int $offset)
224+
{
225+
if ($offset < 0) {
226+
$offset += count($this->labels);
227+
}
228+
229+
return $this->labels[$offset] ?? null;
230+
}
231+
232+
/**
233+
* {@inheritdoc}
234+
*/
235+
public function keys(string $label): array
236+
{
237+
return array_keys($this->labels, $label, true);
238+
}
239+
221240
/**
222241
* Returns the registrable domain.
223242
*
@@ -258,7 +277,7 @@ public function getPublicSuffix()
258277
}
259278

260279
/**
261-
* Tells whether the public suffix has been matching rule in a Public Suffix List.
280+
* Tells whether the public suffix has a matching rule in a Public Suffix List.
262281
*
263282
* @return bool
264283
*/
@@ -288,45 +307,41 @@ public function isPrivate(): bool
288307
}
289308

290309
/**
291-
* Converts the domain to its IDNA ASCII form.
292-
*
293-
* This method MUST retain the state of the current instance, and return
294-
* an instance with is content converted to its IDNA ASCII form
295-
*
296-
* @throws Exception if the domain can not be converted to ASCII using IDN UTS46 algorithm
297-
*
298-
* @return self
310+
* {@inheritdoc}
299311
*/
300-
public function toAscii(): self
312+
public function toAscii()
301313
{
302-
if (null === $this->domain || false !== strpos($this->domain, 'xn--')) {
314+
static $pattern = '/[^\x20-\x7f]/';
315+
if (null === $this->domain || !preg_match($pattern, $this->domain)) {
303316
return $this;
304317
}
305318

306-
$newDomain = $this->idnToAscii($this->domain);
307-
if ($newDomain === $this->domain) {
308-
return $this;
309-
}
319+
$clone = clone $this;
320+
$clone->domain = $this->idnToAscii($this->domain);
321+
$clone->labels = array_reverse(explode('.', $clone->domain));
322+
$clone->publicSuffix = $this->publicSuffix->toAscii();
323+
$clone->registrableDomain = $clone->setRegistrableDomain();
324+
$clone->subDomain = $clone->setSubDomain();
310325

311-
return new self($newDomain, $this->publicSuffix->toAscii());
326+
return $clone;
312327
}
313328

314329
/**
315-
* Converts the domain to its IDNA UTF8 form.
316-
*
317-
* This method MUST retain the state of the current instance, and return
318-
* an instance with is content converted to its IDNA UTF8 form
319-
*
320-
* @throws Exception if the domain can not be converted to Unicode using IDN UTS46 algorithm
321-
*
322-
* @return self
330+
* {@inheritdoc}
323331
*/
324-
public function toUnicode(): self
332+
public function toUnicode()
325333
{
326334
if (null === $this->domain || false === strpos($this->domain, 'xn--')) {
327335
return $this;
328336
}
329337

330-
return new self($this->idnToUnicode($this->domain), $this->publicSuffix->toUnicode());
338+
$clone = clone $this;
339+
$clone->domain = $this->idnToUnicode($this->domain);
340+
$clone->labels = array_reverse(explode('.', $clone->domain));
341+
$clone->publicSuffix = $this->publicSuffix->toUnicode();
342+
$clone->registrableDomain = $clone->setRegistrableDomain();
343+
$clone->subDomain = $clone->setSubDomain();
344+
345+
return $clone;
331346
}
332347
}

src/DomainInterface.php

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
<?php
2+
/**
3+
* PHP Domain Parser: Public Suffix List based URL parsing.
4+
*
5+
* @see http://github.com/jeremykendall/php-domain-parser for the canonical source repository
6+
*
7+
* @copyright Copyright (c) 2017 Jeremy Kendall (http://jeremykendall.net)
8+
* @license http://github.com/jeremykendall/php-domain-parser/blob/master/LICENSE MIT License
9+
*/
10+
declare(strict_types=1);
11+
12+
namespace Pdp;
13+
14+
use Countable;
15+
use IteratorAggregate;
16+
17+
/**
18+
* Domain Interface
19+
*
20+
* @see https://tools.ietf.org/html/rfc1034#section-3.5
21+
* @see https://tools.ietf.org/html/rfc1123#section-2.1
22+
* @see https://tools.ietf.org/html/rfc5890
23+
*
24+
* @author Ignace Nyamagana Butera <[email protected]>
25+
*/
26+
interface DomainInterface extends Countable, IteratorAggregate
27+
{
28+
/**
29+
* Returns the domain content.
30+
*
31+
* @return string|null
32+
*/
33+
public function getContent();
34+
35+
/**
36+
* Retrieves a single domain label.
37+
*
38+
* If $offset is non-negative, the returned value will be the label at $offset position.
39+
* If $offset is negative, the returned value will be the label at $offset position from the end.
40+
*
41+
* If no label is found the submitted $offset the returned value will be null.
42+
*
43+
* @param int $offset the label offset
44+
*
45+
* @return string|null
46+
*/
47+
public function getLabel(int $offset);
48+
49+
/**
50+
* Returns the associated key for each label.
51+
*
52+
* If a value is specified only the keys associated with
53+
* the given value will be returned
54+
*
55+
* @param string $label the total number of argument given to the method
56+
*
57+
* @return int[]
58+
*/
59+
public function keys(string $label): array;
60+
61+
/**
62+
* Converts the domain to its IDNA ASCII form.
63+
*
64+
* This method MUST retain the state of the current instance, and return
65+
* an instance with its content converted to its IDNA ASCII form
66+
*
67+
* @throws Exception if the domain can not be converted to ASCII using IDN UTS46 algorithm
68+
*
69+
* @return static
70+
*/
71+
public function toAscii();
72+
73+
/**
74+
* Converts the domain to its IDNA UTF8 form.
75+
*
76+
* This method MUST retain the state of the current instance, and return
77+
* an instance with its content converted to its IDNA UTF8 form
78+
*
79+
* @throws Exception if the domain can not be converted to Unicode using IDN UTS46 algorithm
80+
*
81+
* @return static
82+
*/
83+
public function toUnicode();
84+
}

0 commit comments

Comments
 (0)