Skip to content

Commit adb73f6

Browse files
committed
Extract CDDL definitions
Needed for w3c/webref#1353. With this update, Reffy now looks for and extracts CDDL content defined in `<pre class="cddl">` block. The logic is vastly similar to the logic used for IDL. Shared code was factored out accordingly. Something specific about CDDL: on top of generating text extracts, the goal is also to create one extract per CDDL module that the spec defines. To associate a `<pre>` block with one or more CDDL module, the code looks for a possible `data-cddl-module` module, or for module names in the `class` attribute (prefixed by `cddl-` or suffixed by `-cddl`). The former isn't used by any spec but is the envisioned mechanism in Bikeshed to define the association, the latter is the convention currently used in the WebDriver BiDi specification. When a spec defines modules, CDDL defined in a `<pre>` block with no explicit module annotation is considered to be defined for all modules (not doing so would essentially mean the CDDL would not be defined for any module, which seems weird). When there is CDDL, the extraction produces: 1. an extract that contains all CDDL definitions: `cddl/[shortname].cddl` 2. one extract per CDDL module: `cddl/[shortname]-[modulename].cddl` (I'm going to assume that no one is ever going to define a module name that would make `[shortname]-[modulename]` collide with the shortname of another spec). Note: some specs that define CDDL do not flag the `<pre>` blocks in any way (Open Screen Protocol, WebAuthn). Extraction won't work for them for now. Also, there are a couple of places in the WebDriver BiDi spec that use a `<pre class="cddl">` block to *reference* a CDDL construct defined elsewhere. Extraction will happily include these references as well, leading to CDDL extracts that contain invalid CDDL. These need fixing in the specs.
1 parent 0ab2937 commit adb73f6

9 files changed

+435
-49
lines changed

src/browserlib/extract-cddl.mjs

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import getCodeElements from './get-code-elements.mjs';
2+
import trimSpaces from './trim-spaces.mjs';
3+
4+
/**
5+
* Extract the list of CDDL definitions in the current spec.
6+
*
7+
* A spec may define more that one CDDL module. For example, the WebDriver BiDi
8+
* spec has CDDL definitions that apply to either of both the local end and the
9+
* remote end. The functions returns an array that lists all CDDL modules.
10+
*
11+
* Each CDDL module is represented as an object with the following keys whose
12+
* values are strings:
13+
* - shortname: the CDDL module shortname. Shortname is "" if there are no
14+
* - label: A full name for the CDDL module.
15+
* - cddl: A dump of the CDDL definitions.
16+
*
17+
* If the spec defines more than one module, the first item in the array is the
18+
* "all" module that contains a dump of all CDDL definitions, regardless of the
19+
* module they are actually defined for (the assumption is that looking at the
20+
* union of all CDDL modules defined in a spec will always make sense, and that
21+
* a spec will never reuse the same rule name with a different definition for
22+
* different CDDL modules).
23+
*
24+
* @function
25+
* @public
26+
* @return {Promise} The promise to get a dump of the CDDL definitions per
27+
* CDDL module, or an empty array if the spec does not contain any CDDL.
28+
*/
29+
export default function () {
30+
// Specs with CDDL are either recent enough that they all use the same
31+
// `<pre class="cddl">` convention, or they don't flag CDDL blocks in any
32+
// way, making it impossible to extract them.
33+
const cddlSelector = 'pre.cddl:not(.exclude):not(.extract)';
34+
const indexSelector = '#cddl-index';
35+
36+
// Retrieve all elements that contains CDDL content
37+
const cddlEls = getCodeElements([cddlSelector], [indexSelector]);
38+
39+
// By convention, CDDL defined without specifying a module is defined
40+
// for all modules (that CDDL would essentially be lost otherwise, there's
41+
// no reason for a spec to define CDDL for no module if it uses modules).
42+
// Start by assembled the list of modules
43+
const modules = {};
44+
for (const el of cddlEls) {
45+
const elModules = getModules(el);
46+
for (const name of elModules) {
47+
modules[name] = [];
48+
}
49+
}
50+
51+
// Assemble the CDDL per module
52+
const mergedCddl = [];
53+
for (const el of cddlEls) {
54+
const cddl = trimSpaces(el.textContent);
55+
if (!cddl) {
56+
continue;
57+
}
58+
mergedCddl.push(cddl);
59+
let elModules = getModules(el);
60+
if (elModules.length === 0) {
61+
// No module means the CDDL is defined for all modules
62+
elModules = Object.keys(modules);
63+
}
64+
for (const name of elModules) {
65+
if (!modules[name]) {
66+
modules[name] = [];
67+
}
68+
modules[name].push(cddl);
69+
}
70+
}
71+
72+
if (mergedCddl.length === 0) {
73+
return [];
74+
}
75+
const res = [ { name: "", cddl: mergedCddl.join('\n\n') } ];
76+
for (const [name, cddl] of Object.entries(modules)) {
77+
res.push({ name, cddl: cddl.join('\n\n') });
78+
}
79+
// Remove trailing spaces and use spaces throughout
80+
for (const cddlModule of res) {
81+
cddlModule.cddl = cddlModule.cddl
82+
.replace(/\s+$/gm, '\n')
83+
.replace(/\t/g, ' ')
84+
.trim();
85+
}
86+
return res;
87+
}
88+
89+
90+
/**
91+
* Retrieve the list of CDDL module shortnames that the element references.
92+
*
93+
* This list of modules is either specified in a `data-cddl-module` attribute
94+
* or directly within the class attribute prefixed by `cddl-` or suffixed by
95+
* `-cddl`.
96+
*/
97+
function getModules(el) {
98+
const moduleAttr = el.getAttribute('data-cddl-module');
99+
if (moduleAttr) {
100+
return moduleAttr.split(',').map(str => str.trim());
101+
}
102+
103+
const list = [];
104+
const classes = el.classList.values()
105+
for (const name of classes) {
106+
const match = name.match(/^(.*)-cddl$|^cddl-(.*)$/);
107+
if (match) {
108+
const shortname = match[1] ?? match[2];
109+
if (!list.includes(shortname)) {
110+
list.push(shortname);
111+
}
112+
}
113+
}
114+
return list;
115+
}

src/browserlib/extract-webidl.mjs

+13-48
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import getGenerator from './get-generator.mjs';
2-
import informativeSelector from './informative-selector.mjs';
3-
import cloneAndClean from './clone-and-clean.mjs';
2+
import getCodeElements from './get-code-elements.mjs';
3+
import trimSpaces from './trim-spaces.mjs';
44

55
/**
66
* Extract the list of WebIDL definitions in the current spec
@@ -70,56 +70,21 @@ function extractBikeshedIdl() {
7070
* sure that it only extracts elements once.
7171
*/
7272
function extractRespecIdl() {
73-
// Helper function that trims individual lines in an IDL block,
74-
// removing as much space as possible from the beginning of the page
75-
// while preserving indentation. Rules followed:
76-
// - Always trim the first line
77-
// - Remove whitespaces from the end of each line
78-
// - Replace lines that contain spaces with empty lines
79-
// - Drop same number of leading whitespaces from all other lines
80-
const trimIdlSpaces = idl => {
81-
const lines = idl.trim().split('\n');
82-
const toRemove = lines
83-
.slice(1)
84-
.filter(line => line.search(/\S/) > -1)
85-
.reduce(
86-
(min, line) => Math.min(min, line.search(/\S/)),
87-
Number.MAX_VALUE);
88-
return lines
89-
.map(line => {
90-
let firstRealChat = line.search(/\S/);
91-
if (firstRealChat === -1) {
92-
return '';
93-
}
94-
else if (firstRealChat === 0) {
95-
return line.replace(/\s+$/, '');
96-
}
97-
else {
98-
return line.substring(toRemove).replace(/\s+$/, '');
99-
}
100-
})
101-
.join('\n');
102-
};
103-
104-
// Detect the IDL index appendix if there's one (to exclude it)
105-
const idlEl = document.querySelector('#idl-index pre') ||
106-
document.querySelector('.chapter-idl pre'); // SVG 2 draft
107-
108-
let idl = [
73+
const idlSelectors = [
10974
'pre.idl:not(.exclude):not(.extract):not(#actual-idl-index)',
11075
'pre:not(.exclude):not(.extract) > code.idl-code:not(.exclude):not(.extract)',
11176
'pre:not(.exclude):not(.extract) > code.idl:not(.exclude):not(.extract)',
11277
'div.idl-code:not(.exclude):not(.extract) > pre:not(.exclude):not(.extract)',
11378
'pre.widl:not(.exclude):not(.extract)'
114-
]
115-
.map(sel => [...document.querySelectorAll(sel)])
116-
.reduce((res, elements) => res.concat(elements), [])
117-
.filter(el => el !== idlEl)
118-
.filter((el, idx, self) => self.indexOf(el) === idx)
119-
.filter(el => !el.closest(informativeSelector))
120-
.map(cloneAndClean)
121-
.map(el => trimIdlSpaces(el.textContent))
122-
.join('\n\n');
79+
];
12380

124-
return idl;
81+
const indexSelectors = [
82+
'#idl-index',
83+
'.chapter-idl'
84+
];
85+
86+
const idlElements = getCodeElements(idlSelectors, indexSelectors);
87+
return idlElements
88+
.map(el => trimSpaces(el.textContent))
89+
.join('\n\n');
12590
}

src/browserlib/get-code-elements.mjs

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import informativeSelector from './informative-selector.mjs';
2+
import cloneAndClean from './clone-and-clean.mjs';
3+
4+
/**
5+
* Helper function that returns a set of code elements in document order based
6+
* on a given set of selectors, excluding elements that are within an index.
7+
*
8+
* The function excludes elements defined in informative sections.
9+
*
10+
* The code elements are cloned and cleaned before they are returned to strip
11+
* annotations and other asides.
12+
*/
13+
export default function getCodeElements(codeSelectors, excludeSelectors) {
14+
return [...document.querySelectorAll(codeSelectors.join(', '))]
15+
// Only keep the elements that are not within the index at the end of
16+
// the specification and that are defined in a normative section.
17+
.filter(el => !el.closest((excludeSelectors ?? []).join(', ')))
18+
.filter(el => !el.closest(informativeSelector))
19+
20+
// Clone and clean the elements
21+
.map(cloneAndClean);
22+
}

src/browserlib/reffy.json

+4
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,9 @@
6262
"href": "./extract-ids.mjs",
6363
"property": "ids",
6464
"needsIdToHeadingMap": true
65+
},
66+
{
67+
"href": "./extract-cddl.mjs",
68+
"property": "cddl"
6569
}
6670
]

src/browserlib/trim-spaces.mjs

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/**
2+
* Helper function that trims individual lines in a code block, removing as
3+
* much space as possible from the beginning of the page while preserving
4+
* indentation.
5+
*
6+
* Typically useful for CDDL and IDL extracts
7+
*
8+
* Rules followed:
9+
* - Always trim the first line
10+
* - Remove whitespaces from the end of each line
11+
* - Replace lines that contain spaces with empty lines
12+
* - Drop same number of leading whitespaces from all other lines
13+
*/
14+
export default function trimSpaces(code) {
15+
const lines = code.trim().split('\n');
16+
const toRemove = lines
17+
.slice(1)
18+
.filter(line => line.search(/\S/) > -1)
19+
.reduce(
20+
(min, line) => Math.min(min, line.search(/\S/)),
21+
Number.MAX_VALUE);
22+
return lines
23+
.map(line => {
24+
let firstRealChat = line.search(/\S/);
25+
if (firstRealChat === -1) {
26+
return '';
27+
}
28+
else if (firstRealChat === 0) {
29+
return line.replace(/\s+$/, '');
30+
}
31+
else {
32+
return line.substring(toRemove).replace(/\s+$/, '');
33+
}
34+
})
35+
.join('\n');
36+
}

src/lib/specs-crawler.js

+29-1
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,29 @@ async function saveSpecResults(spec, settings) {
251251
return `css/${spec.shortname}.json`;
252252
};
253253

254+
async function saveCddl(spec) {
255+
let cddlHeader = `
256+
; GENERATED CONTENT - DO NOT EDIT
257+
; Content was automatically extracted by Reffy into webref
258+
; (https://github.com/w3c/webref)
259+
; Source: ${spec.title} (${spec.crawled})`;
260+
cddlHeader = cddlHeader.replace(/^\s+/gm, '').trim() + '\n\n';
261+
const res = [];
262+
for (const cddlModule of spec.cddl) {
263+
const cddl = cddlHeader + cddlModule.cddl + '\n';
264+
const filename = spec.shortname +
265+
(cddlModule.name ? `-${cddlModule.name}` : '') +
266+
'.cddl';
267+
await fs.promises.writeFile(
268+
path.join(folders.cddl, filename), cddl);
269+
res.push({
270+
name: cddlModule.name,
271+
file: `cddl/${filename}`
272+
});
273+
}
274+
return res;
275+
};
276+
254277
// Save IDL dumps
255278
if (spec.idl) {
256279
spec.idl = await saveIdl(spec);
@@ -283,9 +306,14 @@ async function saveSpecResults(spec, settings) {
283306
(typeof thing == 'object') && (Object.keys(thing).length === 0);
284307
}
285308

309+
// Save CDDL extracts (text files, multiple modules possible)
310+
if (!isEmpty(spec.cddl)) {
311+
spec.cddl = await saveCddl(spec);
312+
}
313+
286314
// Save all other extracts from crawling modules
287315
const remainingModules = modules.filter(mod =>
288-
!mod.metadata && mod.property !== 'css' && mod.property !== 'idl');
316+
!mod.metadata && !['cddl', 'css', 'idl'].includes(mod.property));
289317
for (const mod of remainingModules) {
290318
await saveExtract(spec, mod.property, spec => !isEmpty(spec[mod.property]));
291319
}

src/lib/util.js

+30
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,36 @@ async function expandSpecResult(spec, baseFolder, properties) {
796796
return;
797797
}
798798

799+
// Treat CDDL extracts separately, one spec may have multiple CDDL
800+
// extracts (actual treatment is similar to IDL extracts otherwise)
801+
if (property === 'cddl') {
802+
if (!spec[property]) {
803+
return;
804+
}
805+
for (const cddlModule of spec[property]) {
806+
if (!cddlModule.file) {
807+
continue;
808+
}
809+
if (baseFolder.startsWith('https:')) {
810+
const url = (new URL(cddlModule.file, baseFolder)).toString();
811+
const response = await fetch(url, { nolog: true });
812+
contents = await response.text();
813+
}
814+
else {
815+
const filename = path.join(baseFolder, cddlModule.file);
816+
contents = await fs.readFile(filename, 'utf8');
817+
}
818+
if (contents.startsWith('; GENERATED CONTENT - DO NOT EDIT')) {
819+
// Normalize newlines to avoid off-by-one slices when we remove
820+
// the trailing newline that was added by saveCddl
821+
contents = contents.replace(/\r/g, '');
822+
const endOfHeader = contents.indexOf('\n\n');
823+
contents = contents.substring(endOfHeader + 2).slice(0, -1);
824+
}
825+
cddlModule.cddl = contents;
826+
}
827+
}
828+
799829
// Only consider properties that link to an extract, i.e. an IDL
800830
// or JSON file in subfolder.
801831
if (!spec[property] ||

tests/crawl-test.json

+3
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
},
2525
"title": "WOFF2",
2626
"algorithms": [],
27+
"cddl": [],
2728
"css": {
2829
"atrules": [],
2930
"properties": [],
@@ -99,6 +100,7 @@
99100
"title": "No Title",
100101
"generator": "respec",
101102
"algorithms": [],
103+
"cddl": [],
102104
"css": {
103105
"atrules": [],
104106
"properties": [],
@@ -224,6 +226,7 @@
224226
},
225227
"title": "[No title found for https://w3c.github.io/accelerometer/]",
226228
"algorithms": [],
229+
"cddl": [],
227230
"css": {
228231
"atrules": [],
229232
"properties": [],

0 commit comments

Comments
 (0)