@@ -59,18 +59,13 @@ class HTMLParser(object):
59
59
"""HTML parser. Generates a tree structure from a stream of (possibly
60
60
malformed) HTML"""
61
61
62
- def __init__ (self , tree = None , tokenizer = tokenizer .HTMLTokenizer ,
63
- strict = False , namespaceHTMLElements = True , debug = False ):
62
+ def __init__ (self , tree = None , strict = False , namespaceHTMLElements = True , debug = False ):
64
63
"""
65
64
strict - raise an exception when a parse error is encountered
66
65
67
66
tree - a treebuilder class controlling the type of tree that will be
68
67
returned. Built in treebuilders can be accessed through
69
68
html5lib.treebuilders.getTreeBuilder(treeType)
70
-
71
- tokenizer - a class that provides a stream of tokens to the treebuilder.
72
- This may be replaced for e.g. a sanitizer which converts some tags to
73
- text
74
69
"""
75
70
76
71
# Raise an exception on the first error encountered
@@ -79,7 +74,6 @@ def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
79
74
if tree is None :
80
75
tree = treebuilders .getTreeBuilder ("etree" )
81
76
self .tree = tree (namespaceHTMLElements )
82
- self .tokenizer_class = tokenizer
83
77
self .errors = []
84
78
85
79
self .phases = dict ([(name , cls (self , self .tree )) for name , cls in
@@ -91,9 +85,9 @@ def _parse(self, stream, innerHTML=False, container="div", encoding=None,
91
85
self .innerHTMLMode = innerHTML
92
86
self .container = container
93
87
self .scripting = scripting
94
- self .tokenizer = self . tokenizer_class (stream , encoding = encoding ,
95
- useChardet = useChardet ,
96
- parser = self , ** kwargs )
88
+ self .tokenizer = tokenizer . HTMLTokenizer (stream , encoding = encoding ,
89
+ useChardet = useChardet ,
90
+ parser = self , ** kwargs )
97
91
self .reset ()
98
92
99
93
try :
0 commit comments