23
23
*/
24
24
class PublicSuffixListManager
25
25
{
26
- const PUBLIC_SUFFIX_LIST_URL = 'https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat ' ;
27
-
28
- /**
29
- * @var string Public Suffix List Source URL
30
- */
31
- private $ sourceUrl ;
26
+ const PSL_URL = 'https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat ' ;
32
27
33
28
/**
34
29
* @var CacheInterface PSR-16 cache adapter
@@ -45,16 +40,11 @@ class PublicSuffixListManager
45
40
*
46
41
* @param CacheInterface $cacheAdapter
47
42
* @param HttpAdapter $httpAdapter
48
- * @param string $sourceUrl
49
43
*/
50
- public function __construct (
51
- CacheInterface $ cacheAdapter ,
52
- HttpAdapter $ httpAdapter ,
53
- string $ sourceUrl = self ::PUBLIC_SUFFIX_LIST_URL
54
- ) {
44
+ public function __construct (CacheInterface $ cacheAdapter , HttpAdapter $ httpAdapter )
45
+ {
55
46
$ this ->cacheAdapter = $ cacheAdapter ;
56
47
$ this ->httpAdapter = $ httpAdapter ;
57
- $ this ->sourceUrl = $ sourceUrl ;
58
48
}
59
49
60
50
/**
@@ -64,18 +54,18 @@ public function __construct(
64
54
*
65
55
* @return PublicSuffixList
66
56
*/
67
- public function getList ($ type = PublicSuffixList::ALL_DOMAINS ): PublicSuffixList
57
+ public function getList (string $ type = PublicSuffixList::ALL_DOMAINS , string $ sourceUrl = self :: PSL_URL ): PublicSuffixList
68
58
{
69
- static $ type_lists = [
59
+ static $ availableTypes = [
70
60
PublicSuffixList::ALL_DOMAINS => PublicSuffixList::ALL_DOMAINS ,
71
61
PublicSuffixList::ICANN_DOMAINS => PublicSuffixList::ICANN_DOMAINS ,
72
62
PublicSuffixList::PRIVATE_DOMAINS => PublicSuffixList::PRIVATE_DOMAINS ,
73
63
];
74
64
75
- $ type = $ type_lists [$ type ] ?? PublicSuffixList::ALL_DOMAINS ;
65
+ $ type = $ availableTypes [$ type ] ?? PublicSuffixList::ALL_DOMAINS ;
76
66
$ list = $ this ->cacheAdapter ->get ($ type );
77
67
if ($ list === null ) {
78
- $ this ->refreshPublicSuffixList ();
68
+ $ this ->refreshPublicSuffixList ($ sourceUrl );
79
69
$ list = $ this ->cacheAdapter ->get ($ type );
80
70
}
81
71
@@ -90,64 +80,64 @@ public function getList($type = PublicSuffixList::ALL_DOMAINS): PublicSuffixList
90
80
*
91
81
* @return bool
92
82
*/
93
- public function refreshPublicSuffixList (): bool
83
+ public function refreshPublicSuffixList (string $ sourceUrl = self :: PSL_URL ): bool
94
84
{
95
- $ publicSuffixList = $ this ->httpAdapter ->getContent ($ this -> sourceUrl );
96
- $ publicSuffixListTypes = $ this ->convertListToArray ( $ publicSuffixList );
85
+ $ content = $ this ->httpAdapter ->getContent ($ sourceUrl );
86
+ $ list = $ this ->parse ( $ content );
97
87
98
- return $ this ->cacheAdapter ->setMultiple (array_map ('json_encode ' , $ publicSuffixListTypes ));
88
+ return $ this ->cacheAdapter ->setMultiple (array_map ('json_encode ' , $ list ));
99
89
}
100
90
101
91
/**
102
92
* Parses text representation of list to associative, multidimensional array.
103
93
*
104
- * @param string $publicSuffixList
94
+ * @param string $content the Public SUffix List as a SplFileObject
105
95
*
106
96
* @return array Associative, multidimensional array representation of the
107
97
* public suffx list
108
98
*/
109
- private function convertListToArray (string $ publicSuffixList ): array
99
+ private function parse (string $ content ): array
110
100
{
111
- $ addDomain = [
101
+ $ sectionList = [
102
+ PublicSuffixList::ALL_DOMAINS => true ,
112
103
PublicSuffixList::ICANN_DOMAINS => false ,
113
104
PublicSuffixList::PRIVATE_DOMAINS => false ,
114
105
];
115
106
116
- $ publicSuffixListTypes = [
107
+ $ lists = [
117
108
PublicSuffixList::ALL_DOMAINS => [],
118
109
PublicSuffixList::ICANN_DOMAINS => [],
119
110
PublicSuffixList::PRIVATE_DOMAINS => [],
120
111
];
121
112
122
- $ data = new SplTempFileObject ();
123
- $ data ->fwrite ($ publicSuffixList );
124
- $ data ->setFlags (SplTempFileObject::DROP_NEW_LINE | SplTempFileObject::READ_AHEAD | SplTempFileObject::SKIP_EMPTY );
125
- foreach ($ data as $ line ) {
126
- $ addDomain = $ this ->validateDomainAddition ($ line , $ addDomain );
127
- if (strstr ($ line , '// ' ) ! == false ) {
128
- continue ;
113
+ $ fileObj = new SplTempFileObject ();
114
+ $ fileObj ->fwrite ($ content );
115
+ $ fileObj ->setFlags (SplTempFileObject::DROP_NEW_LINE | SplTempFileObject::READ_AHEAD | SplTempFileObject::SKIP_EMPTY );
116
+ foreach ($ fileObj as $ line ) {
117
+ $ sectionList = $ this ->validateAddingSection ($ line , $ sectionList );
118
+ if (strpos ($ line , '// ' ) = == false ) {
119
+ $ lists = $ this -> convertLine ( $ line , $ lists , $ sectionList ) ;
129
120
}
130
- $ publicSuffixListTypes = $ this ->convertLineToArray ($ line , $ publicSuffixListTypes , $ addDomain );
131
121
}
132
122
133
- return $ publicSuffixListTypes ;
123
+ return $ lists ;
134
124
}
135
125
136
126
/**
137
127
* Update the addition status for a given line against the domain list (ICANN and PRIVATE).
138
128
*
139
- * @param string $line the current file line
140
- * @param array $addDomain the domain addition status
129
+ * @param string $line the current file line
130
+ * @param array $sectionList the domain addition status
141
131
*
142
132
* @return array
143
133
*/
144
- private function validateDomainAddition (string $ line , array $ addDomain ): array
134
+ private function validateAddingSection (string $ line , array $ sectionList ): array
145
135
{
146
- foreach ($ addDomain as $ section => $ status ) {
147
- $ addDomain [$ section ] = $ this ->isValidSection ($ status , $ line , $ section );
136
+ foreach ($ sectionList as $ section => $ status ) {
137
+ $ sectionList [$ section ] = $ this ->isValidSection ($ status , $ line , $ section );
148
138
}
149
139
150
- return $ addDomain ;
140
+ return $ sectionList ;
151
141
}
152
142
153
143
/**
@@ -175,24 +165,23 @@ private function isValidSection(bool $previousStatus, string $line, string $sect
175
165
/**
176
166
* Convert a line from the Public Suffix list.
177
167
*
178
- * @param string $rule Public Suffix List text line
179
- * @param array $publicSuffixListTypes Associative, multidimensional array representation of the
180
- * public suffx list
181
- * @param array $validTypes Tell which section should be converted
168
+ * @param string $rule Public Suffix List text line
169
+ * @param array $lists Associative, multidimensional array representation of the
170
+ * public suffx list
171
+ * @param array $validTypes Tell which section should be converted
182
172
*
183
173
* @return array Associative, multidimensional array representation of the
184
174
* public suffx list
185
175
*/
186
- private function convertLineToArray (string $ line , array $ publicSuffixListTypes , array $ validTypes ): array
176
+ private function convertLine (string $ line , array $ lists , array $ validTypes ): array
187
177
{
188
178
$ ruleParts = explode ('. ' , $ line );
189
179
$ validTypes = array_keys (array_filter ($ validTypes ));
190
- $ validTypes [] = PublicSuffixList::ALL_DOMAINS ;
191
180
foreach ($ validTypes as $ type ) {
192
- $ publicSuffixListTypes [$ type ] = $ this ->buildArray ( $ publicSuffixListTypes [$ type ], $ ruleParts );
181
+ $ lists [$ type ] = $ this ->addRule ( $ lists [$ type ], $ ruleParts );
193
182
}
194
183
195
- return $ publicSuffixListTypes ;
184
+ return $ lists ;
196
185
}
197
186
198
187
/**
@@ -204,15 +193,15 @@ private function convertLineToArray(string $line, array $publicSuffixListTypes,
204
193
* A copy of the Apache License, Version 2.0, is provided with this
205
194
* distribution
206
195
*
207
- * @param array $publicSuffixList Initially an empty array, this eventually
208
- * becomes the array representation of the Public Suffix List
209
- * @param array $ruleParts One line (rule) from the Public Suffix List
210
- * exploded on '.', or the remaining portion of that array during recursion
196
+ * @param array $list Initially an empty array, this eventually
197
+ * becomes the array representation of the Public Suffix List
198
+ * @param array $ruleParts One line (rule) from the Public Suffix List
199
+ * exploded on '.', or the remaining portion of that array during recursion
200
+ *
201
+ * @return array
211
202
*/
212
- private function buildArray (array $ publicSuffixList , array $ ruleParts ): array
203
+ private function addRule (array $ list , array $ ruleParts ): array
213
204
{
214
- $ isDomain = true ;
215
-
216
205
$ part = array_pop ($ ruleParts );
217
206
218
207
// Adheres to canonicalization rule from the "Formal Algorithm" section
@@ -221,17 +210,20 @@ private function buildArray(array $publicSuffixList, array $ruleParts): array
221
210
// for hostnames - lower-case, Punycode (RFC 3492)."
222
211
223
212
$ part = idn_to_ascii ($ part , 0 , INTL_IDNA_VARIANT_UTS46 );
213
+ $ isDomain = true ;
224
214
if (strpos ($ part , '! ' ) === 0 ) {
225
215
$ part = substr ($ part , 1 );
226
216
$ isDomain = false ;
227
217
}
228
218
229
- $ publicSuffixList [$ part ] = $ publicSuffixList [$ part ] ?? ($ isDomain ? [] : ['! ' => '' ]);
219
+ if (!isset ($ list [$ part ])) {
220
+ $ list [$ part ] = $ isDomain ? [] : ['! ' => '' ];
221
+ }
230
222
231
223
if ($ isDomain && !empty ($ ruleParts )) {
232
- $ publicSuffixList [$ part ] = $ this ->buildArray ( $ publicSuffixList [$ part ], $ ruleParts );
224
+ $ list [$ part ] = $ this ->addRule ( $ list [$ part ], $ ruleParts );
233
225
}
234
226
235
- return $ publicSuffixList ;
227
+ return $ list ;
236
228
}
237
229
}
0 commit comments