Skip to content

Commit a54a27e

Browse files
committed
Add remaining Unicode properties
1 parent cc5ec01 commit a54a27e

5 files changed

+67
-3
lines changed

.eslintrc.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ module.exports = {
1313
},
1414
overrides: [
1515
{
16-
files: ["acorn/src/bin/*.js", "bin/generate-identifier-regex.js"],
16+
files: ["acorn/src/bin/*.js", "bin/generate-identifier-regex.js", "bin/generate-unicode-script-values.js"],
1717
rules: {
1818
"no-console": "off"
1919
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
// This file was generated by "bin/generate-unicode-script-values.js". Do not modify manually!
2+
export default "Gara Garay Gukh Gurung_Khema Hrkt Katakana_Or_Hiragana Kawi Kirat_Rai Krai Nag_Mundari Nagm Ol_Onal Onao Sunu Sunuwar Todhri Todr Tulu_Tigalari Tutg Unknown Zzzz"

acorn/src/unicode-property-data.js

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import {wordsRegexp} from "./util.js"
2+
import scriptValuesAddedInUnicode from "./generated/scriptValuesAddedInUnicode.js"
23

34
// This file contains Unicode properties extracted from the ECMAScript specification.
45
// The lists are extracted like so:
@@ -42,7 +43,7 @@ const ecma10ScriptValues = ecma9ScriptValues + " Dogra Dogr Gunjala_Gondi Gong H
4243
const ecma11ScriptValues = ecma10ScriptValues + " Elymaic Elym Nandinagari Nand Nyiakeng_Puachue_Hmong Hmnp Wancho Wcho"
4344
const ecma12ScriptValues = ecma11ScriptValues + " Chorasmian Chrs Diak Dives_Akuru Khitan_Small_Script Kits Yezi Yezidi"
4445
const ecma13ScriptValues = ecma12ScriptValues + " Cypro_Minoan Cpmn Old_Uyghur Ougr Tangsa Tnsa Toto Vithkuqi Vith"
45-
const ecma14ScriptValues = ecma13ScriptValues + " Hrkt Katakana_Or_Hiragana Kawi Nag_Mundari Nagm Unknown Zzzz"
46+
const ecma14ScriptValues = ecma13ScriptValues + " " + scriptValuesAddedInUnicode
4647

4748
const unicodeScriptValues = {
4849
9: ecma9ScriptValues,

bin/generate-unicode-script-values.js

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"use strict"
2+
3+
const fs = require("fs")
4+
const path = require("path")
5+
6+
import("../acorn/src/unicode-property-data.js")
7+
.then(m => {
8+
return m.default[13].nonBinary.Script
9+
})
10+
.then(async(reScriptValuesAddedInES) => {
11+
const scriptValues = new Set()
12+
for await (const value of getLatestUnicodeScriptValues()) {
13+
scriptValues.add(value)
14+
}
15+
const scriptValuesAddedInUnicode = "export default " +
16+
JSON.stringify(
17+
[...scriptValues]
18+
// The unicode script values now follow the Unicode spec as of ES2023,
19+
// but prior to ES2022 they were listed in the ES2022 spec.
20+
// The generated file lists all the unicode script values except those listed before ES2022.
21+
.filter(value => !reScriptValuesAddedInES.test(value))
22+
.sort()
23+
.join(" ")
24+
)
25+
26+
writeGeneratedFile("scriptValuesAddedInUnicode", scriptValuesAddedInUnicode)
27+
28+
console.log("Done. The generated files must be committed.")
29+
})
30+
31+
function writeGeneratedFile(filename, content) {
32+
const comment = "// This file was generated by \"bin/" + path.basename(__filename) + "\". Do not modify manually!"
33+
fs.writeFileSync(path.resolve("./acorn/src/generated", filename + ".js"), comment + "\n" + content + "\n", "utf8")
34+
}
35+
36+
/**
37+
* Gets the all unicode script values from the latest PropertyValueAliases.
38+
*/
39+
async function * getLatestUnicodeScriptValues() {
40+
const response = await fetch("https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt")
41+
const lines = (await response.text()).split("\n")
42+
for (const line of lines) {
43+
if (!line || line.startsWith("#")) {
44+
continue
45+
}
46+
const [propertyAlias, alias, canonical, ...remaining] = line
47+
.split("#")[0] // strip comments
48+
.split(";") // split by semicolon
49+
.map((x) => x.trim()) // trim
50+
51+
if (propertyAlias !== "sc") {
52+
continue
53+
}
54+
55+
yield canonical
56+
yield alias
57+
yield * remaining
58+
}
59+
}

package.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525
"build:loose": "rollup -c acorn-loose/rollup.config.mjs",
2626
"build:main": "rollup -c acorn/rollup.config.mjs",
2727
"build:walk": "rollup -c acorn-walk/rollup.config.mjs",
28-
"generate": "node bin/generate-identifier-regex.js",
28+
"generate": "npm run generate:identifier-regex && npm run generate:unicode-script-values",
29+
"generate:identifier-regex": "node bin/generate-identifier-regex.js",
30+
"generate:unicode-script-values": "node bin/generate-unicode-script-values.js",
2931
"lint": "eslint .",
3032
"prepare": "npm run test",
3133
"pretest": "npm run build:main && npm run build:loose",

0 commit comments

Comments
 (0)