Skip to content

Commit 0320bd4

Browse files
Lint: Add additional checks and improve HTML parsing
New checks: - Validate that constraint table links from steps are reasonable. - Validate that constraint tables use links not styles for params. Improved checks: - Make duplicate word detection case insensitive and match at start/end of lines. Internal changes: - Massage HTML to allow node-html-parser to handle tables. - Improve output when matching at line boundaries. - Comment checks with "Generic" or "WebNN".
1 parent 6e19654 commit 0320bd4

File tree

1 file changed

+89
-34
lines changed

1 file changed

+89
-34
lines changed

Diff for: tools/lint.mjs

+89-34
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,20 @@ log(`loading generated HTML "${options.html}"...`);
6363
let file = await fs.readFile(options.html, 'utf8');
6464

6565
log('massaging HTML...');
66-
// node-html-parser doesn't understand that DT and DD are mutually self-closing;
66+
// node-html-parser doesn't understand that some elements are mutually self-closing;
6767
// tweak the source using regex magic.
68-
file = file.replaceAll(
69-
/(<(dt|dd)\b[^>]*>)(.*?)(?=<(:?dt|dd|\/dl)\b)/sg,
70-
(_, opener, tag, content) => `${opener}${content}</${tag}>`);
68+
[{tags: ['dt', 'dd'], containers: ['dl']},
69+
{tags: ['thead', 'tbody', 'tfoot'], containers: ['table']},
70+
{tags: ['tr'], containers: ['thead', 'tbody', 'tfoot', 'table']},
71+
].forEach(({tags, containers}) => {
72+
const re = new RegExp(
73+
'(<(' + tags.join('|') + ')\\b[^>]*>)' +
74+
'(.*?)' +
75+
'(?=<(:?' + tags.join('|') + '|/(' + containers.join('|') + '))\\b)',
76+
'sg');
77+
file = file.replaceAll(
78+
re, (_, opener, tag, content) => `${opener}${content}</${tag}>`);
79+
});
7180

7281
log('parsing HTML...');
7382
const root = parse(file, {
@@ -102,13 +111,25 @@ function error(message) {
102111

103112
function format(match) {
104113
const CONTEXT = 20;
105-
const prefix = match.input.substring(match.index - CONTEXT, match.index)
114+
115+
let prefix = match.input.substring(match.index - CONTEXT, match.index)
106116
.split(/\n/)
107117
.pop();
108-
const suffix = match.input.substr(match.index + match[0].length, CONTEXT)
118+
let suffix = match.input.substr(match.index + match[0].length, CONTEXT)
109119
.split(/\n/)
110120
.shift();
111-
return (prefix.length === CONTEXT ? '...' : '') + prefix + match[0] + suffix +
121+
let infix = match[0];
122+
123+
if (infix.startsWith('\n')) {
124+
prefix = '';
125+
infix = infix.slice(1);
126+
}
127+
if (infix.endsWith('\n')) {
128+
suffix = '';
129+
infix = infix.slice(0, -1);
130+
}
131+
132+
return (prefix.length === CONTEXT ? '...' : '') + prefix + infix + suffix +
112133
(suffix.length === CONTEXT ? '...' : '');
113134
}
114135

@@ -128,22 +149,26 @@ const ALGORITHM_STEP_SELECTOR = '.algorithm li > p:not(.issue)';
128149
// * `text` - rendered text content
129150
// * `root.querySelectorAll()` - operate on DOM-like nodes
130151

131-
// Look for merge markers
152+
// Checks are marked with one of these tags:
153+
// * [Generic] - could apply to any spec
154+
// * [WebNN] - very specific to the WebNN spec
155+
156+
// [Generic] Look for merge markers
132157
for (const match of source.matchAll(/<{7}|>{7}|^={7}$/mg)) {
133158
error(`Merge conflict marker: ${format(match)}`);
134159
}
135160

136-
// Look for residue of unterminated auto-links in rendered text
161+
// [Generic] Look for residue of unterminated auto-links in rendered text
137162
for (const match of text.matchAll(/({{|}}|\[=|=\])/g)) {
138163
error(`Unterminated autolink: ${format(match)}`);
139164
}
140165

141-
// Look for duplicate words (in source, since [=realm=] |realm| is okay)
142-
for (const match of html.matchAll(/ (\w+) \1 /g)) {
166+
// [Generic] Look for duplicate words (in source, since [=realm=] |realm| is okay)
167+
for (const match of html.matchAll(/(?:^|\s)(\w+) \1(?:$|\s)/ig)) {
143168
error(`Duplicate word: ${format(match)}`);
144169
}
145170

146-
// Verify IDL lines wrap to avoid horizontal scrollbars
171+
// [Generic] Verify IDL lines wrap to avoid horizontal scrollbars
147172
const MAX_IDL_WIDTH = 88;
148173
for (const idl of root.querySelectorAll('pre.idl')) {
149174
idl.innerText.split(/\n/).forEach(line => {
@@ -154,19 +179,19 @@ for (const idl of root.querySelectorAll('pre.idl')) {
154179
});
155180
}
156181

157-
// Look for undesired punctuation
182+
// [WebNN] Look for undesired punctuation
158183
for (const match of text.matchAll(/(::|×|÷||)/g)) {
159184
error(`Bad punctuation: ${format(match)}`);
160185
}
161186

162-
// Look for undesired entity usage
187+
// [WebNN] Look for undesired entity usage
163188
for (const match of source.matchAll(/&(\w+);/g)) {
164189
if (!['amp', 'lt', 'gt', 'quot'].includes(match[1])) {
165190
error(`Avoid entities: ${format(match)}`);
166191
}
167192
}
168193

169-
// Look for undesired phrasing
194+
// [WebNN] Look for undesired phrasing
170195
for (const match of source.matchAll(/the (\[=.*?=\]) of (\|.*?\|)[^,]/g)) {
171196
error(`Prefer "x's y" to "y of x": ${format(match)}`);
172197
}
@@ -180,17 +205,17 @@ for (const match of text.matchAll(/\bthe \S+ argument\b/g)) {
180205
error(`Drop 'the' and 'argument': ${format(match)}`);
181206
}
182207

183-
// Look for incorrect use of shape for an MLOperandDescriptor
208+
// [WebNN] Look for incorrect use of shape for an MLOperandDescriptor
184209
for (const match of source.matchAll(/(\|\w*desc\w*\|)'s \[=MLOperand\/shape=\]/ig)) {
185210
error(`Use ${match[1]}.{{MLOperandDescriptor/dimensions}} not shape: ${format(match)}`);
186211
}
187212

188-
// Look for missing dict-member dfns
213+
// [Generic] Look for missing dict-member dfns
189214
for (const element of root.querySelectorAll('.idl dfn[data-dfn-type=dict-member]')) {
190215
error(`Dictionary member missing dfn: ${element.innerText}`);
191216
}
192217

193-
// Look for suspicious stuff in algorithm steps
218+
// [WebNN] Look for suspicious stuff in algorithm steps
194219
for (const element of root.querySelectorAll(ALGORITHM_STEP_SELECTOR)) {
195220
// [] used for anything but indexing, slots, and refs
196221
// Exclude \w[ for indexing (e.g. shape[n])
@@ -205,7 +230,7 @@ for (const element of root.querySelectorAll(ALGORITHM_STEP_SELECTOR)) {
205230
}
206231
}
207232

208-
// Ensure vars are method/algorithm arguments, or initialized correctly
233+
// [Generic] Ensure vars are method/algorithm arguments, or initialized correctly
209234
for (const algorithm of root.querySelectorAll('.algorithm')) {
210235
const vars = algorithm.querySelectorAll('var');
211236
const seen = new Set();
@@ -253,17 +278,17 @@ for (const algorithm of root.querySelectorAll('.algorithm')) {
253278
}
254279
}
255280

256-
// Eschew vars outside of algorithms.
281+
// [Generic] Eschew vars outside of algorithms.
257282
const algorithmVars = new Set(root.querySelectorAll('.algorithm var'));
258283
for (const v of root.querySelectorAll('var').filter(v => !algorithmVars.has(v))) {
259284
error(`Variable outside of algorithm: ${v.innerText}`);
260285
}
261286

262-
263-
// Prevent accidental normative references to other specs. This reports an error
264-
// if there is a normative reference to any spec *other* than these ones. This
265-
// helps avoid an autolink like [=object=] adding an unexpected reference to
266-
// [FILEAPI]. Add to this list if a new normative reference is intended.
287+
// [WebNN] Prevent accidental normative references to other specs. This reports
288+
// an error if there is a normative reference to any spec *other* than these
289+
// ones. This helps avoid an autolink like [=object=] adding an unexpected
290+
// reference to [FILEAPI]. Add to this list if a new normative reference is
291+
// intended.
267292
const NORMATIVE_REFERENCES = new Set([
268293
'[ECMASCRIPT]',
269294
'[HTML]',
@@ -282,7 +307,7 @@ for (const term of root.querySelectorAll('#normative + dl > dt')) {
282307
}
283308
}
284309

285-
// Detect syntax errors in JS.
310+
// [Generic] Detect syntax errors in JS.
286311
for (const pre of root.querySelectorAll('pre.highlight:not(.idl)')) {
287312
const script = pre.innerText.replaceAll(/&amp;/g, '&')
288313
.replaceAll(/&lt;/g, '<')
@@ -294,20 +319,20 @@ for (const pre of root.querySelectorAll('pre.highlight:not(.idl)')) {
294319
}
295320
}
296321

297-
// Ensure algorithm steps end in '.' or ':'.
322+
// [Generic] Ensure algorithm steps end in '.' or ':'.
298323
for (const p of root.querySelectorAll(ALGORITHM_STEP_SELECTOR)) {
299324
const match = p.innerText.match(/[^.:]$/);
300325
if (match) {
301326
error(`Algorithm steps should end with '.' or ':': ${format(match)}`);
302327
}
303328
}
304329

305-
// Avoid incorrect links to list/empty.
330+
// [Generic] Avoid incorrect links to list/empty.
306331
for (const match of source.matchAll(/is( not)? \[=(list\/|stack\/|queue\/|)empty=\]/g)) {
307332
error(`Link to 'is empty' (adjective) not 'empty' (verb): ${format(match)}`);
308333
}
309334

310-
// Ensure every method dfn is correctly associated with an interface.
335+
// [Generic] Ensure every method dfn is correctly associated with an interface.
311336
const interfaces = new Set(
312337
root.querySelectorAll('dfn[data-dfn-type=interface]').map(e => e.innerText));
313338
for (const dfn of root.querySelectorAll('dfn[data-dfn-type=method]')) {
@@ -317,13 +342,13 @@ for (const dfn of root.querySelectorAll('dfn[data-dfn-type=method]')) {
317342
}
318343
}
319344

320-
// Ensure every IDL argument is linked to a definition.
345+
// [Generic] Ensure every IDL argument is linked to a definition.
321346
for (const dfn of root.querySelectorAll('pre.idl dfn[data-dfn-type=argument]')) {
322347
const dfnFor = dfn.getAttribute('data-dfn-for');
323348
error(`Missing <dfn argument for="${dfnFor}">${dfn.innerText}</dfn> (or equivalent)`);
324349
}
325350

326-
// Ensure every argument dfn is correctly associated with a method.
351+
// [Generic] Ensure every argument dfn is correctly associated with a method.
327352
// This tries to catch extraneous definitions, e.g. after an arg is removed.
328353
for (const dfn of root.querySelectorAll('dfn[data-dfn-type=argument]')) {
329354
const dfnFor = dfn.getAttribute('data-dfn-for');
@@ -332,8 +357,9 @@ for (const dfn of root.querySelectorAll('dfn[data-dfn-type=argument]')) {
332357
}
333358
}
334359

335-
// Try to catch type mismatches like |tensor|.{{MLGraph/...}}. Note that the
336-
// test is keyed on the variable name; variables listed here are not validated.
360+
// [WebNN] Try to catch type mismatches like |tensor|.{{MLGraph/...}}. Note that
361+
// the test is keyed on the variable name; variables listed here are not
362+
// validated.
337363
for (const match of source.matchAll(/\|(\w+)\|\.{{(\w+)\/.*?}}/g)) {
338364
const [_, v, i] = match;
339365
[['MLTensor', ['tensor']],
@@ -351,13 +377,42 @@ for (const match of source.matchAll(/\|(\w+)\|\.{{(\w+)\/.*?}}/g)) {
351377
});
352378
}
353379

380+
// [WebNN] Verify that linked constraints table IDs are reasonable. Bikeshed
381+
// will flag any broken links; this just tries to ensure that links within the
382+
// algorithm go to that algorithm's associated table.
383+
for (const algorithm of root.querySelectorAll(
384+
'.algorithm[data-algorithm-for=MLGraphBuilder]')) {
385+
const name = algorithm.getAttribute('data-algorithm');
386+
if (name.match(/^(\w+)\(/)) {
387+
const method = RegExp.$1;
388+
for (const href of algorithm.querySelectorAll('a')
389+
.map(a => a.getAttribute('href'))
390+
.filter(href => href.match(/#constraints-/))) {
391+
// Allow either exact case or lowercase match for table ID.
392+
if (
393+
href !== '#constraints-' + method &&
394+
href !== '#constraints-' + method.toLowerCase()) {
395+
error(`Steps for ${method}() link to ${href}`);
396+
}
397+
}
398+
}
399+
}
400+
401+
// [WebNN] Ensure constraints tables use linking not styling
402+
for (const table of root.querySelectorAll('table.data').filter(e => e.id.startsWith('constraints-'))) {
403+
for (const match of table.innerHTML.matchAll(/<em>(?!output)(\w+)<\/em>/ig)) {
404+
error(`Constraints table should link not style args: ${format(match)}`);
405+
}
406+
}
407+
354408
// TODO: Generate this from the IDL itself.
355409
const dictionaryTypes = ['MLOperandDescriptor', 'MLContextLostInfo'];
356410

357-
// Ensure JS objects are created with explicit realm
411+
// [Generic] Ensure JS objects are created with explicit realm
358412
for (const match of text.matchAll(/ a new promise\b(?! in realm)/g)) {
359413
error(`Promise creation must specify realm: ${format(match)}`);
360414
}
415+
// [Generic] Ensure JS objects are created with explicit realm
361416
for (const match of text.matchAll(/ be a new ([A-Z]\w+)\b(?! in realm)/g)) {
362417
const type = match[1];
363418
// Dictionaries are just maps, so they don't need a realm.

0 commit comments

Comments
 (0)