|
82 | 82 | "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.2",
|
83 | 83 | "data": "\u0488hello",
|
84 | 84 | "valid": false
|
| 85 | + }, |
| 86 | + { |
| 87 | + "description": "Exceptions that are PVALID, left-to-right chars", |
| 88 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.2 https://tools.ietf.org/html/rfc5892#section-2.6", |
| 89 | + "data": "\u00df\u03c2\u0f0b\u3007", |
| 90 | + "valid": true |
| 91 | + }, |
| 92 | + { |
| 93 | + "description": "Exceptions that are PVALID, right-to-left chars", |
| 94 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.2 https://tools.ietf.org/html/rfc5892#section-2.6", |
| 95 | + "data": "\u06fd\u06fe", |
| 96 | + "valid": true |
| 97 | + }, |
| 98 | + { |
| 99 | + "description": "Exceptions that are DISALLOWED, right-to-left chars", |
| 100 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.2 https://tools.ietf.org/html/rfc5892#section-2.6", |
| 101 | + "data": "\u0640\u07fa", |
| 102 | + "valid": false |
| 103 | + }, |
| 104 | + { |
| 105 | + "description": "Exceptions that are DISALLOWED, left-to-right chars", |
| 106 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.2 https://tools.ietf.org/html/rfc5892#section-2.6 Note: The two combining marks (U+302E and U+302F) are in the middle and not at the start", |
| 107 | + "data": "\u3031\u3032\u3033\u3034\u3035\u302e\u302f\u303b", |
| 108 | + "valid": false |
| 109 | + }, |
| 110 | + { |
| 111 | + "description": "MIDDLE DOT with no preceding 'l'", |
| 112 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.3", |
| 113 | + "data": "a\u00b7l", |
| 114 | + "valid": false |
| 115 | + }, |
| 116 | + { |
| 117 | + "description": "MIDDLE DOT with nothing preceding", |
| 118 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.3", |
| 119 | + "data": "\u00b7l", |
| 120 | + "valid": false |
| 121 | + }, |
| 122 | + { |
| 123 | + "description": "MIDDLE DOT with no following 'l'", |
| 124 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.3", |
| 125 | + "data": "l\u00b7a", |
| 126 | + "valid": false |
| 127 | + }, |
| 128 | + { |
| 129 | + "description": "MIDDLE DOT with nothing following", |
| 130 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.3", |
| 131 | + "data": "l\u00b7", |
| 132 | + "valid": false |
| 133 | + }, |
| 134 | + { |
| 135 | + "description": "MIDDLE DOT with surrounding 'l's", |
| 136 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.3", |
| 137 | + "data": "l\u00b7l", |
| 138 | + "valid": true |
| 139 | + }, |
| 140 | + { |
| 141 | + "description": "Greek KERAIA not followed by Greek", |
| 142 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.4", |
| 143 | + "data": "\u03b1\u0375S", |
| 144 | + "valid": false |
| 145 | + }, |
| 146 | + { |
| 147 | + "description": "Greek KERAIA not followed by anything", |
| 148 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.4", |
| 149 | + "data": "\u03b1\u0375", |
| 150 | + "valid": false |
| 151 | + }, |
| 152 | + { |
| 153 | + "description": "Greek KERAIA followed by Greek", |
| 154 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.4", |
| 155 | + "data": "\u03b1\u0375\u03b2", |
| 156 | + "valid": true |
| 157 | + }, |
| 158 | + { |
| 159 | + "description": "Hebrew GERESH not preceded by Hebrew", |
| 160 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.5", |
| 161 | + "data": "A\u05f3\u05d1", |
| 162 | + "valid": false |
| 163 | + }, |
| 164 | + { |
| 165 | + "description": "Hebrew GERESH not preceded by anything", |
| 166 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.5", |
| 167 | + "data": "\u05f3\u05d1", |
| 168 | + "valid": false |
| 169 | + }, |
| 170 | + { |
| 171 | + "description": "Hebrew GERESH preceded by Hebrew", |
| 172 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.5", |
| 173 | + "data": "\u05d0\u05f3\u05d1", |
| 174 | + "valid": true |
| 175 | + }, |
| 176 | + { |
| 177 | + "description": "Hebrew GERSHAYIM not preceded by Hebrew", |
| 178 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.6", |
| 179 | + "data": "A\u05f4\u05d1", |
| 180 | + "valid": false |
| 181 | + }, |
| 182 | + { |
| 183 | + "description": "Hebrew GERSHAYIM not preceded by anything", |
| 184 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.6", |
| 185 | + "data": "\u05f4\u05d1", |
| 186 | + "valid": false |
| 187 | + }, |
| 188 | + { |
| 189 | + "description": "Hebrew GERSHAYIM preceded by Hebrew", |
| 190 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.6", |
| 191 | + "data": "\u05d0\u05f4\u05d1", |
| 192 | + "valid": true |
| 193 | + }, |
| 194 | + { |
| 195 | + "description": "KATAKANA MIDDLE DOT with no Hiragana, Katakana, or Han", |
| 196 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7", |
| 197 | + "data": "def\u30fbabc", |
| 198 | + "valid": false |
| 199 | + }, |
| 200 | + { |
| 201 | + "description": "KATAKANA MIDDLE DOT with no other characters", |
| 202 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7", |
| 203 | + "data": "\u30fb", |
| 204 | + "valid": false |
| 205 | + }, |
| 206 | + { |
| 207 | + "description": "KATAKANA MIDDLE DOT with Hiragana", |
| 208 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7", |
| 209 | + "data": "\u30fb\u3041", |
| 210 | + "valid": true |
| 211 | + }, |
| 212 | + { |
| 213 | + "description": "KATAKANA MIDDLE DOT with Katakana", |
| 214 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7", |
| 215 | + "data": "\u30fb\u30a1", |
| 216 | + "valid": true |
| 217 | + }, |
| 218 | + { |
| 219 | + "description": "KATAKANA MIDDLE DOT with Han", |
| 220 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7", |
| 221 | + "data": "\u30fb\u4e08", |
| 222 | + "valid": true |
| 223 | + }, |
| 224 | + { |
| 225 | + "description": "Arabic-Indic digits mixed with Extended Arabic-Indic digits", |
| 226 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.8", |
| 227 | + "data": "\u0660\u06f0", |
| 228 | + "valid": false |
| 229 | + }, |
| 230 | + { |
| 231 | + "description": "Arabic-Indic digits not mixed with Extended Arabic-Indic digits", |
| 232 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.8", |
| 233 | + "data": "\u0628\u0660\u0628", |
| 234 | + "valid": true |
| 235 | + }, |
| 236 | + { |
| 237 | + "description": "Extended Arabic-Indic digits not mixed with Arabic-Indic digits", |
| 238 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.9", |
| 239 | + "data": "\u06f00", |
| 240 | + "valid": true |
| 241 | + }, |
| 242 | + { |
| 243 | + "description": "ZERO WIDTH JOINER not preceded by Virama", |
| 244 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.2 https://www.unicode.org/review/pr-37.pdf", |
| 245 | + "data": "\u0915\u200d\u0937", |
| 246 | + "valid": false |
| 247 | + }, |
| 248 | + { |
| 249 | + "description": "ZERO WIDTH JOINER not preceded by anything", |
| 250 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.2 https://www.unicode.org/review/pr-37.pdf", |
| 251 | + "data": "\u200d\u0937", |
| 252 | + "valid": false |
| 253 | + }, |
| 254 | + { |
| 255 | + "description": "ZERO WIDTH JOINER preceded by Virama", |
| 256 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.2 https://www.unicode.org/review/pr-37.pdf", |
| 257 | + "data": "\u0915\u094d\u200d\u0937", |
| 258 | + "valid": true |
| 259 | + }, |
| 260 | + { |
| 261 | + "description": "ZERO WIDTH NON-JOINER preceded by Virama", |
| 262 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.1", |
| 263 | + "data": "\u0915\u094d\u200c\u0937", |
| 264 | + "valid": true |
| 265 | + }, |
| 266 | + { |
| 267 | + "description": "ZERO WIDTH NON-JOINER not preceded by Virama but matches regexp", |
| 268 | + "comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.1 https://www.w3.org/TR/alreq/#h_disjoining_enforcement", |
| 269 | + "data": "\u0628\u064a\u200c\u0628\u064a", |
| 270 | + "valid": true |
85 | 271 | }
|
86 | 272 | ]
|
87 | 273 | }
|
|
0 commit comments