Skip to content

Commit 9f97276

Browse files
authored
Extract idl without html5lib (#3760)
Easier than setting up pip/venv for e.g. html5lib's `six` requirement.
1 parent 023bccf commit 9f97276

File tree

1 file changed

+61
-28
lines changed

1 file changed

+61
-28
lines changed

specs/extract-idl.py

Lines changed: 61 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from datetime import date
44
from string import Template
55
import sys
6-
import html5lib
76

87
LICENSE = """
98
// Copyright (c) $YEAR The Khronos Group Inc.
@@ -30,35 +29,69 @@
3029
"""
3130

3231
htmlfilename = sys.argv[1]
33-
htmlfile = open(htmlfilename)
34-
try:
35-
doc = html5lib.parse(htmlfile, treebuilder="dom")
36-
finally:
37-
htmlfile.close()
3832

39-
def elementHasClass(el, classArg):
40-
"""
41-
Return true if and only if classArg is one of the classes of el
42-
"""
43-
classes = [ c for c in el.getAttribute("class").split(" ") if c != "" ]
44-
return classArg in classes
33+
USE_HTML5LIB = False
34+
if not USE_HTML5LIB:
35+
import re
36+
RE_IDL = re.compile(r'<pre class="idl">\n?(.*?)</pre>', re.DOTALL)
4537

46-
def elementTextContent(el):
47-
"""
48-
Implementation of DOM Core's .textContent
49-
"""
50-
textContent = ""
51-
for child in el.childNodes:
52-
if child.nodeType == 3: # Node.TEXT_NODE
53-
textContent += child.data
54-
elif child.nodeType == 1: # Node.ELEMENT_NODE
55-
textContent += elementTextContent(child)
56-
else:
57-
# Other nodes are ignored
58-
pass
59-
return textContent
38+
IDL_EXAMPLE = '''
39+
<pre class="idl">
40+
[Exposed=(Window,Worker)]
41+
interface <dfn data-dfn-type="interface" id="WebGLContextEvent">WebGLContextEvent</dfn> : <a href="http://www.w3.org/TR/domcore/#event">Event</a> {
42+
constructor(DOMString type, optional WebGLContextEventInit eventInit = {});
43+
readonly attribute DOMString statusMessage;
44+
};
6045
61-
preList = doc.getElementsByTagName("pre")
62-
idlList = [elementTextContent(p) for p in preList if elementHasClass(p, "idl") ]
46+
// EventInit is defined in the DOM4 specification.
47+
dictionary WebGLContextEventInit : <a href="http://www.w3.org/TR/domcore/#eventinit">EventInit</a> {
48+
DOMString statusMessage = "";
49+
};</pre>
50+
'''
51+
assert RE_IDL.search(IDL_EXAMPLE)
52+
53+
54+
from pathlib import Path
55+
SRC_PATH = Path(htmlfilename)
56+
matches = RE_IDL.finditer(SRC_PATH.read_text())
57+
idlList = [m[1] for m in matches]
58+
59+
RE_TAG = re.compile(r'<(.*?)>')
60+
idlList = [re.sub(RE_TAG, '', s) for s in idlList]
61+
62+
import html
63+
idlList = [html.unescape(s) for s in idlList]
64+
else:
65+
import html5lib
66+
htmlfile = open(htmlfilename)
67+
try:
68+
doc = html5lib.parse(htmlfile, treebuilder="dom")
69+
finally:
70+
htmlfile.close()
71+
72+
def elementHasClass(el, classArg):
73+
"""
74+
Return true if and only if classArg is one of the classes of el
75+
"""
76+
classes = [ c for c in el.getAttribute("class").split(" ") if c != "" ]
77+
return classArg in classes
78+
79+
def elementTextContent(el):
80+
"""
81+
Implementation of DOM Core's .textContent
82+
"""
83+
textContent = ""
84+
for child in el.childNodes:
85+
if child.nodeType == 3: # Node.TEXT_NODE
86+
textContent += child.data
87+
elif child.nodeType == 1: # Node.ELEMENT_NODE
88+
textContent += elementTextContent(child)
89+
else:
90+
# Other nodes are ignored
91+
pass
92+
return textContent
93+
94+
preList = doc.getElementsByTagName("pre")
95+
idlList = [elementTextContent(p) for p in preList if elementHasClass(p, "idl") ]
6396
licenseTemplate = Template(LICENSE)
6497
print(licenseTemplate.substitute(YEAR=date.today().year) + "\n\n".join(idlList))

0 commit comments

Comments
 (0)