1
1
from __future__ import absolute_import , division , unicode_literals
2
- from six import with_metaclass
2
+ from six import with_metaclass , viewkeys , PY3
3
3
4
4
import types
5
5
6
+ try :
7
+ from collections import OrderedDict
8
+ except ImportError :
9
+ from ordereddict import OrderedDict
10
+
6
11
from . import inputstream
7
12
from . import tokenizer
8
13
9
14
from . import treebuilders
10
15
from .treebuilders ._base import Marker
11
16
12
17
from . import utils
13
- from . import constants
14
- from .constants import spaceCharacters , asciiUpper2Lower
15
- from .constants import specialElements
16
- from .constants import headingElements
17
- from .constants import cdataElements , rcdataElements
18
- from .constants import tokenTypes , ReparseException , namespaces
19
- from .constants import htmlIntegrationPointElements , mathmlTextIntegrationPointElements
20
- from .constants import adjustForeignAttributes as adjustForeignAttributesMap
21
- from .constants import E
18
+ from .constants import (
19
+ spaceCharacters , asciiUpper2Lower ,
20
+ specialElements , headingElements , cdataElements , rcdataElements ,
21
+ tokenTypes , tagTokenTypes ,
22
+ namespaces ,
23
+ htmlIntegrationPointElements , mathmlTextIntegrationPointElements ,
24
+ adjustForeignAttributes as adjustForeignAttributesMap ,
25
+ adjustMathMLAttributes , adjustSVGAttributes ,
26
+ E ,
27
+ ReparseException
28
+ )
22
29
23
30
24
31
def parse (doc , treebuilder = "etree" , encoding = None ,
@@ -272,96 +279,18 @@ def normalizeToken(self, token):
272
279
""" HTML5 specific normalizations to the token stream """
273
280
274
281
if token ["type" ] == tokenTypes ["StartTag" ]:
275
- token ["data" ] = dict (token [" data" ][::- 1 ])
282
+ token ["data" ] = OrderedDict (token [' data' ][::- 1 ])
276
283
277
284
return token
278
285
279
286
def adjustMathMLAttributes (self , token ):
280
- replacements = {"definitionurl" : "definitionURL" }
281
- for k , v in replacements .items ():
282
- if k in token ["data" ]:
283
- token ["data" ][v ] = token ["data" ][k ]
284
- del token ["data" ][k ]
287
+ adjust_attributes (token , adjustMathMLAttributes )
285
288
286
289
def adjustSVGAttributes (self , token ):
287
- replacements = {
288
- "attributename" : "attributeName" ,
289
- "attributetype" : "attributeType" ,
290
- "basefrequency" : "baseFrequency" ,
291
- "baseprofile" : "baseProfile" ,
292
- "calcmode" : "calcMode" ,
293
- "clippathunits" : "clipPathUnits" ,
294
- "contentscripttype" : "contentScriptType" ,
295
- "contentstyletype" : "contentStyleType" ,
296
- "diffuseconstant" : "diffuseConstant" ,
297
- "edgemode" : "edgeMode" ,
298
- "externalresourcesrequired" : "externalResourcesRequired" ,
299
- "filterres" : "filterRes" ,
300
- "filterunits" : "filterUnits" ,
301
- "glyphref" : "glyphRef" ,
302
- "gradienttransform" : "gradientTransform" ,
303
- "gradientunits" : "gradientUnits" ,
304
- "kernelmatrix" : "kernelMatrix" ,
305
- "kernelunitlength" : "kernelUnitLength" ,
306
- "keypoints" : "keyPoints" ,
307
- "keysplines" : "keySplines" ,
308
- "keytimes" : "keyTimes" ,
309
- "lengthadjust" : "lengthAdjust" ,
310
- "limitingconeangle" : "limitingConeAngle" ,
311
- "markerheight" : "markerHeight" ,
312
- "markerunits" : "markerUnits" ,
313
- "markerwidth" : "markerWidth" ,
314
- "maskcontentunits" : "maskContentUnits" ,
315
- "maskunits" : "maskUnits" ,
316
- "numoctaves" : "numOctaves" ,
317
- "pathlength" : "pathLength" ,
318
- "patterncontentunits" : "patternContentUnits" ,
319
- "patterntransform" : "patternTransform" ,
320
- "patternunits" : "patternUnits" ,
321
- "pointsatx" : "pointsAtX" ,
322
- "pointsaty" : "pointsAtY" ,
323
- "pointsatz" : "pointsAtZ" ,
324
- "preservealpha" : "preserveAlpha" ,
325
- "preserveaspectratio" : "preserveAspectRatio" ,
326
- "primitiveunits" : "primitiveUnits" ,
327
- "refx" : "refX" ,
328
- "refy" : "refY" ,
329
- "repeatcount" : "repeatCount" ,
330
- "repeatdur" : "repeatDur" ,
331
- "requiredextensions" : "requiredExtensions" ,
332
- "requiredfeatures" : "requiredFeatures" ,
333
- "specularconstant" : "specularConstant" ,
334
- "specularexponent" : "specularExponent" ,
335
- "spreadmethod" : "spreadMethod" ,
336
- "startoffset" : "startOffset" ,
337
- "stddeviation" : "stdDeviation" ,
338
- "stitchtiles" : "stitchTiles" ,
339
- "surfacescale" : "surfaceScale" ,
340
- "systemlanguage" : "systemLanguage" ,
341
- "tablevalues" : "tableValues" ,
342
- "targetx" : "targetX" ,
343
- "targety" : "targetY" ,
344
- "textlength" : "textLength" ,
345
- "viewbox" : "viewBox" ,
346
- "viewtarget" : "viewTarget" ,
347
- "xchannelselector" : "xChannelSelector" ,
348
- "ychannelselector" : "yChannelSelector" ,
349
- "zoomandpan" : "zoomAndPan"
350
- }
351
- for originalName in list (token ["data" ].keys ()):
352
- if originalName in replacements :
353
- svgName = replacements [originalName ]
354
- token ["data" ][svgName ] = token ["data" ][originalName ]
355
- del token ["data" ][originalName ]
290
+ adjust_attributes (token , adjustSVGAttributes )
356
291
357
292
def adjustForeignAttributes (self , token ):
358
- replacements = adjustForeignAttributesMap
359
-
360
- for originalName in token ["data" ].keys ():
361
- if originalName in replacements :
362
- foreignName = replacements [originalName ]
363
- token ["data" ][foreignName ] = token ["data" ][originalName ]
364
- del token ["data" ][originalName ]
293
+ adjust_attributes (token , adjustForeignAttributesMap )
365
294
366
295
def reparseTokenNormal (self , token ):
367
296
# pylint:disable=unused-argument
@@ -434,7 +363,7 @@ def getPhases(debug):
434
363
def log (function ):
435
364
"""Logger that records which phase processes each token"""
436
365
type_names = dict ((value , key ) for key , value in
437
- constants . tokenTypes .items ())
366
+ tokenTypes .items ())
438
367
439
368
def wrapped (self , * args , ** kwargs ):
440
369
if function .__name__ .startswith ("process" ) and len (args ) > 0 :
@@ -443,7 +372,7 @@ def wrapped(self, *args, **kwargs):
443
372
info = {"type" : type_names [token ['type' ]]}
444
373
except :
445
374
raise
446
- if token ['type' ] in constants . tagTokenTypes :
375
+ if token ['type' ] in tagTokenTypes :
447
376
info ["name" ] = token ['name' ]
448
377
449
378
self .parser .log .append ((self .parser .tokenizer .state .__name__ ,
@@ -1022,17 +951,9 @@ def __init__(self, parser, tree):
1022
951
self .endTagHandler .default = self .endTagOther
1023
952
1024
953
def isMatchingFormattingElement (self , node1 , node2 ):
1025
- if node1 .name != node2 .name or node1 .namespace != node2 .namespace :
1026
- return False
1027
- elif len (node1 .attributes ) != len (node2 .attributes ):
1028
- return False
1029
- else :
1030
- attributes1 = sorted (node1 .attributes .items ())
1031
- attributes2 = sorted (node2 .attributes .items ())
1032
- for attr1 , attr2 in zip (attributes1 , attributes2 ):
1033
- if attr1 != attr2 :
1034
- return False
1035
- return True
954
+ return (node1 .name == node2 .name and
955
+ node1 .namespace == node2 .namespace and
956
+ node1 .attributes == node2 .attributes )
1036
957
1037
958
# helper
1038
959
def addFormattingElement (self , token ):
@@ -2798,6 +2719,16 @@ def processEndTag(self, token):
2798
2719
}
2799
2720
2800
2721
2722
+ def adjust_attributes (token , replacements ):
2723
+ if PY3 or utils .PY27 :
2724
+ needs_adjustment = viewkeys (token ['data' ]) & viewkeys (replacements )
2725
+ else :
2726
+ needs_adjustment = frozenset (token ['data' ]) & frozenset (replacements )
2727
+ if needs_adjustment :
2728
+ token ['data' ] = OrderedDict ((replacements .get (k , k ), v )
2729
+ for k , v in token ['data' ].items ())
2730
+
2731
+
2801
2732
def impliedTagToken (name , type = "EndTag" , attributes = None ,
2802
2733
selfClosing = False ):
2803
2734
if attributes is None :
0 commit comments