Skip to content

Commit 40601f6

Browse files
committed
Add idn-hostname validity and contextual rule tests
See: https://www.rfc-editor.org/rfc/rfc5891.html#section-4.2
1 parent fc05651 commit 40601f6

File tree

2 files changed

+372
-0
lines changed

2 files changed

+372
-0
lines changed

tests/draft2019-09/optional/format/idn-hostname.json

+186
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,192 @@
8282
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.2",
8383
"data": "\u0488hello",
8484
"valid": false
85+
},
86+
{
87+
"description": "Exceptions that are PVALID, left-to-right chars",
88+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.2 https://tools.ietf.org/html/rfc5892#section-2.6",
89+
"data": "\u00df\u03c2\u0f0b\u3007",
90+
"valid": true
91+
},
92+
{
93+
"description": "Exceptions that are PVALID, right-to-left chars",
94+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.2 https://tools.ietf.org/html/rfc5892#section-2.6",
95+
"data": "\u06fd\u06fe",
96+
"valid": true
97+
},
98+
{
99+
"description": "Exceptions that are DISALLOWED, right-to-left chars",
100+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.2 https://tools.ietf.org/html/rfc5892#section-2.6",
101+
"data": "\u0640\u07fa",
102+
"valid": false
103+
},
104+
{
105+
"description": "Exceptions that are DISALLOWED, left-to-right chars",
106+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.2 https://tools.ietf.org/html/rfc5892#section-2.6 Note: The two combining marks (U+302E and U+302F) are in the middle and not at the start",
107+
"data": "\u3031\u3032\u3033\u3034\u3035\u302e\u302f\u303b",
108+
"valid": false
109+
},
110+
{
111+
"description": "MIDDLE DOT with no preceding 'l'",
112+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.3",
113+
"data": "a\u00b7l",
114+
"valid": false
115+
},
116+
{
117+
"description": "MIDDLE DOT with nothing preceding",
118+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.3",
119+
"data": "\u00b7l",
120+
"valid": false
121+
},
122+
{
123+
"description": "MIDDLE DOT with no following 'l'",
124+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.3",
125+
"data": "l\u00b7a",
126+
"valid": false
127+
},
128+
{
129+
"description": "MIDDLE DOT with nothing following",
130+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.3",
131+
"data": "l\u00b7",
132+
"valid": false
133+
},
134+
{
135+
"description": "MIDDLE DOT with surrounding 'l's",
136+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.3",
137+
"data": "l\u00b7l",
138+
"valid": true
139+
},
140+
{
141+
"description": "Greek KERAIA not followed by Greek",
142+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.4",
143+
"data": "\u03b1\u0375S",
144+
"valid": false
145+
},
146+
{
147+
"description": "Greek KERAIA not followed by anything",
148+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.4",
149+
"data": "\u03b1\u0375",
150+
"valid": false
151+
},
152+
{
153+
"description": "Greek KERAIA followed by Greek",
154+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.4",
155+
"data": "\u03b1\u0375\u03b2",
156+
"valid": true
157+
},
158+
{
159+
"description": "Hebrew GERESH not preceded by Hebrew",
160+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.5",
161+
"data": "A\u05f3\u05d1",
162+
"valid": false
163+
},
164+
{
165+
"description": "Hebrew GERESH not preceded by anything",
166+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.5",
167+
"data": "\u05f3\u05d1",
168+
"valid": false
169+
},
170+
{
171+
"description": "Hebrew GERESH preceded by Hebrew",
172+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.5",
173+
"data": "\u05d0\u05f3\u05d1",
174+
"valid": true
175+
},
176+
{
177+
"description": "Hebrew GERSHAYIM not preceded by Hebrew",
178+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.6",
179+
"data": "A\u05f4\u05d1",
180+
"valid": false
181+
},
182+
{
183+
"description": "Hebrew GERSHAYIM not preceded by anything",
184+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.6",
185+
"data": "\u05f4\u05d1",
186+
"valid": false
187+
},
188+
{
189+
"description": "Hebrew GERSHAYIM preceded by Hebrew",
190+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.6",
191+
"data": "\u05d0\u05f4\u05d1",
192+
"valid": true
193+
},
194+
{
195+
"description": "KATAKANA MIDDLE DOT with no Hiragana, Katakana, or Han",
196+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7",
197+
"data": "def\u30fbabc",
198+
"valid": false
199+
},
200+
{
201+
"description": "KATAKANA MIDDLE DOT with no other characters",
202+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7",
203+
"data": "\u30fb",
204+
"valid": false
205+
},
206+
{
207+
"description": "KATAKANA MIDDLE DOT with Hiragana",
208+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7",
209+
"data": "\u30fb\u3041",
210+
"valid": true
211+
},
212+
{
213+
"description": "KATAKANA MIDDLE DOT with Katakana",
214+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7",
215+
"data": "\u30fb\u30a1",
216+
"valid": true
217+
},
218+
{
219+
"description": "KATAKANA MIDDLE DOT with Han",
220+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7",
221+
"data": "\u30fb\u4e08",
222+
"valid": true
223+
},
224+
{
225+
"description": "Arabic-Indic digits mixed with Extended Arabic-Indic digits",
226+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.8",
227+
"data": "\u0660\u06f0",
228+
"valid": false
229+
},
230+
{
231+
"description": "Arabic-Indic digits not mixed with Extended Arabic-Indic digits",
232+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.8",
233+
"data": "\u0628\u0660\u0628",
234+
"valid": true
235+
},
236+
{
237+
"description": "Extended Arabic-Indic digits not mixed with Arabic-Indic digits",
238+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.9",
239+
"data": "\u06f00",
240+
"valid": true
241+
},
242+
{
243+
"description": "ZERO WIDTH JOINER not preceded by Virama",
244+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.2 https://www.unicode.org/review/pr-37.pdf",
245+
"data": "\u0915\u200d\u0937",
246+
"valid": false
247+
},
248+
{
249+
"description": "ZERO WIDTH JOINER not preceded by anything",
250+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.2 https://www.unicode.org/review/pr-37.pdf",
251+
"data": "\u200d\u0937",
252+
"valid": false
253+
},
254+
{
255+
"description": "ZERO WIDTH JOINER preceded by Virama",
256+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.2 https://www.unicode.org/review/pr-37.pdf",
257+
"data": "\u0915\u094d\u200d\u0937",
258+
"valid": true
259+
},
260+
{
261+
"description": "ZERO WIDTH NON-JOINER preceded by Virama",
262+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.1",
263+
"data": "\u0915\u094d\u200c\u0937",
264+
"valid": true
265+
},
266+
{
267+
"description": "ZERO WIDTH NON-JOINER not preceded by Virama but matches regexp",
268+
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.1 https://www.w3.org/TR/alreq/#h_disjoining_enforcement",
269+
"data": "\u0628\u064a\u200c\u0628\u064a",
270+
"valid": true
85271
}
86272
]
87273
}

0 commit comments

Comments
 (0)