Skip to content

Commit a137d14

Browse files
kovidgoyalgsnedders
authored andcommitted
Preserve attribute order when parsing
1 parent 0a885c6 commit a137d14

File tree

7 files changed

+100
-92
lines changed

7 files changed

+100
-92
lines changed

README.rst

-4
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,6 @@ functionality:
116116
- ``chardet`` can be used as a fallback when character encoding cannot
117117
be determined.
118118

119-
- ``ordereddict`` can be used under Python 2.6
120-
(``collections.OrderedDict`` is used instead on later versions) to
121-
serialize attributes in alphabetical order.
122-
123119

124120
Bugs
125121
----

html5lib/constants.py

+67
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,73 @@
437437
(namespaces["mathml"], "mtext")
438438
])
439439

440+
adjustSVGAttributes = {
441+
"attributename": "attributeName",
442+
"attributetype": "attributeType",
443+
"basefrequency": "baseFrequency",
444+
"baseprofile": "baseProfile",
445+
"calcmode": "calcMode",
446+
"clippathunits": "clipPathUnits",
447+
"contentscripttype": "contentScriptType",
448+
"contentstyletype": "contentStyleType",
449+
"diffuseconstant": "diffuseConstant",
450+
"edgemode": "edgeMode",
451+
"externalresourcesrequired": "externalResourcesRequired",
452+
"filterres": "filterRes",
453+
"filterunits": "filterUnits",
454+
"glyphref": "glyphRef",
455+
"gradienttransform": "gradientTransform",
456+
"gradientunits": "gradientUnits",
457+
"kernelmatrix": "kernelMatrix",
458+
"kernelunitlength": "kernelUnitLength",
459+
"keypoints": "keyPoints",
460+
"keysplines": "keySplines",
461+
"keytimes": "keyTimes",
462+
"lengthadjust": "lengthAdjust",
463+
"limitingconeangle": "limitingConeAngle",
464+
"markerheight": "markerHeight",
465+
"markerunits": "markerUnits",
466+
"markerwidth": "markerWidth",
467+
"maskcontentunits": "maskContentUnits",
468+
"maskunits": "maskUnits",
469+
"numoctaves": "numOctaves",
470+
"pathlength": "pathLength",
471+
"patterncontentunits": "patternContentUnits",
472+
"patterntransform": "patternTransform",
473+
"patternunits": "patternUnits",
474+
"pointsatx": "pointsAtX",
475+
"pointsaty": "pointsAtY",
476+
"pointsatz": "pointsAtZ",
477+
"preservealpha": "preserveAlpha",
478+
"preserveaspectratio": "preserveAspectRatio",
479+
"primitiveunits": "primitiveUnits",
480+
"refx": "refX",
481+
"refy": "refY",
482+
"repeatcount": "repeatCount",
483+
"repeatdur": "repeatDur",
484+
"requiredextensions": "requiredExtensions",
485+
"requiredfeatures": "requiredFeatures",
486+
"specularconstant": "specularConstant",
487+
"specularexponent": "specularExponent",
488+
"spreadmethod": "spreadMethod",
489+
"startoffset": "startOffset",
490+
"stddeviation": "stdDeviation",
491+
"stitchtiles": "stitchTiles",
492+
"surfacescale": "surfaceScale",
493+
"systemlanguage": "systemLanguage",
494+
"tablevalues": "tableValues",
495+
"targetx": "targetX",
496+
"targety": "targetY",
497+
"textlength": "textLength",
498+
"viewbox": "viewBox",
499+
"viewtarget": "viewTarget",
500+
"xchannelselector": "xChannelSelector",
501+
"ychannelselector": "yChannelSelector",
502+
"zoomandpan": "zoomAndPan"
503+
}
504+
505+
adjustMathMLAttributes = {"definitionurl": "definitionURL"}
506+
440507
adjustForeignAttributes = {
441508
"xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
442509
"xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),

html5lib/html5parser.py

+21-83
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
from __future__ import absolute_import, division, unicode_literals
2-
from six import with_metaclass
2+
from six import with_metaclass, viewkeys, PY3
33

44
import types
55

6+
try:
7+
from collections import OrderedDict
8+
except ImportError:
9+
from ordereddict import OrderedDict
10+
611
from . import inputstream
712
from . import tokenizer
813

@@ -17,6 +22,7 @@
1722
namespaces,
1823
htmlIntegrationPointElements, mathmlTextIntegrationPointElements,
1924
adjustForeignAttributes as adjustForeignAttributesMap,
25+
adjustMathMLAttributes, adjustSVGAttributes,
2026
E,
2127
ReparseException
2228
)
@@ -273,96 +279,18 @@ def normalizeToken(self, token):
273279
""" HTML5 specific normalizations to the token stream """
274280

275281
if token["type"] == tokenTypes["StartTag"]:
276-
token["data"] = dict(token["data"][::-1])
282+
token["data"] = OrderedDict(token['data'][::-1])
277283

278284
return token
279285

280286
def adjustMathMLAttributes(self, token):
281-
replacements = {"definitionurl": "definitionURL"}
282-
for k, v in replacements.items():
283-
if k in token["data"]:
284-
token["data"][v] = token["data"][k]
285-
del token["data"][k]
287+
adjust_attributes(token, adjustMathMLAttributes)
286288

287289
def adjustSVGAttributes(self, token):
288-
replacements = {
289-
"attributename": "attributeName",
290-
"attributetype": "attributeType",
291-
"basefrequency": "baseFrequency",
292-
"baseprofile": "baseProfile",
293-
"calcmode": "calcMode",
294-
"clippathunits": "clipPathUnits",
295-
"contentscripttype": "contentScriptType",
296-
"contentstyletype": "contentStyleType",
297-
"diffuseconstant": "diffuseConstant",
298-
"edgemode": "edgeMode",
299-
"externalresourcesrequired": "externalResourcesRequired",
300-
"filterres": "filterRes",
301-
"filterunits": "filterUnits",
302-
"glyphref": "glyphRef",
303-
"gradienttransform": "gradientTransform",
304-
"gradientunits": "gradientUnits",
305-
"kernelmatrix": "kernelMatrix",
306-
"kernelunitlength": "kernelUnitLength",
307-
"keypoints": "keyPoints",
308-
"keysplines": "keySplines",
309-
"keytimes": "keyTimes",
310-
"lengthadjust": "lengthAdjust",
311-
"limitingconeangle": "limitingConeAngle",
312-
"markerheight": "markerHeight",
313-
"markerunits": "markerUnits",
314-
"markerwidth": "markerWidth",
315-
"maskcontentunits": "maskContentUnits",
316-
"maskunits": "maskUnits",
317-
"numoctaves": "numOctaves",
318-
"pathlength": "pathLength",
319-
"patterncontentunits": "patternContentUnits",
320-
"patterntransform": "patternTransform",
321-
"patternunits": "patternUnits",
322-
"pointsatx": "pointsAtX",
323-
"pointsaty": "pointsAtY",
324-
"pointsatz": "pointsAtZ",
325-
"preservealpha": "preserveAlpha",
326-
"preserveaspectratio": "preserveAspectRatio",
327-
"primitiveunits": "primitiveUnits",
328-
"refx": "refX",
329-
"refy": "refY",
330-
"repeatcount": "repeatCount",
331-
"repeatdur": "repeatDur",
332-
"requiredextensions": "requiredExtensions",
333-
"requiredfeatures": "requiredFeatures",
334-
"specularconstant": "specularConstant",
335-
"specularexponent": "specularExponent",
336-
"spreadmethod": "spreadMethod",
337-
"startoffset": "startOffset",
338-
"stddeviation": "stdDeviation",
339-
"stitchtiles": "stitchTiles",
340-
"surfacescale": "surfaceScale",
341-
"systemlanguage": "systemLanguage",
342-
"tablevalues": "tableValues",
343-
"targetx": "targetX",
344-
"targety": "targetY",
345-
"textlength": "textLength",
346-
"viewbox": "viewBox",
347-
"viewtarget": "viewTarget",
348-
"xchannelselector": "xChannelSelector",
349-
"ychannelselector": "yChannelSelector",
350-
"zoomandpan": "zoomAndPan"
351-
}
352-
for originalName in list(token["data"].keys()):
353-
if originalName in replacements:
354-
svgName = replacements[originalName]
355-
token["data"][svgName] = token["data"][originalName]
356-
del token["data"][originalName]
290+
adjust_attributes(token, adjustSVGAttributes)
357291

358292
def adjustForeignAttributes(self, token):
359-
replacements = adjustForeignAttributesMap
360-
361-
for originalName in token["data"].keys():
362-
if originalName in replacements:
363-
foreignName = replacements[originalName]
364-
token["data"][foreignName] = token["data"][originalName]
365-
del token["data"][originalName]
293+
adjust_attributes(token, adjustForeignAttributesMap)
366294

367295
def reparseTokenNormal(self, token):
368296
# pylint:disable=unused-argument
@@ -2791,6 +2719,16 @@ def processEndTag(self, token):
27912719
}
27922720

27932721

2722+
def adjust_attributes(token, replacements):
2723+
if PY3 or utils.PY27:
2724+
needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
2725+
else:
2726+
needs_adjustment = frozenset(token['data']) & frozenset(replacements)
2727+
if needs_adjustment:
2728+
token['data'] = OrderedDict((replacements.get(k, k), v)
2729+
for k, v in token['data'].items())
2730+
2731+
27942732
def impliedTagToken(name, type="EndTag", attributes=None,
27952733
selfClosing=False):
27962734
if attributes is None:

html5lib/tests/test_parser2.py

+7
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,10 @@ def test_namespace_html_elements_1_etree():
4949

5050
def test_unicode_file():
5151
assert parse(io.StringIO("a")) is not None
52+
53+
54+
def test_duplicate_attribute():
55+
# This is here because we impl it in parser and not tokenizer
56+
doc = parse('<p class=a class=b>')
57+
el = doc[1][0]
58+
assert el.get("class") == "a"

html5lib/utils.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3+
import sys
34
from types import ModuleType
45

56
from six import text_type
@@ -12,9 +13,11 @@
1213

1314
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
1415
"surrogatePairToCodepoint", "moduleFactoryFactory",
15-
"supports_lone_surrogates"]
16+
"supports_lone_surrogates", "PY27"]
1617

1718

19+
PY27 = sys.version_info[0] == 2 and sys.version_info[1] >= 7
20+
1821
# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
1922
# caught by the below test. In general this would be any platform
2023
# using UTF-16 as its encoding of unicode strings, such as

requirements-optional.txt

-4
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,3 @@ lxml ; platform_python_implementation == 'CPython'
1515
# DATrie can be used in place of our Python trie implementation for
1616
# slightly better parsing performance.
1717
datrie ; platform_python_implementation == 'CPython'
18-
19-
# Can be used to force attributes to be serialized in alphabetical
20-
# order.
21-
ordereddict ; python_version < '2.7'

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
six
22
webencodings
3+
ordereddict ; python_version < '2.7'

0 commit comments

Comments
 (0)