Skip to content

Commit 75cf697

Browse files
committed
Reintroduce the old sanitizer testsuite from html5lib-tests
This is imported into this repo as its expectations are very much implementation dependent, with expectations amended to match our actual behaviour.
1 parent a2917e9 commit 75cf697

File tree

4 files changed

+494
-5
lines changed

4 files changed

+494
-5
lines changed

html5lib/serializer/htmlserializer.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,12 @@ def serialize(self, treewalker, encoding=None):
184184
if encoding and self.inject_meta_charset:
185185
from ..filters.inject_meta_charset import Filter
186186
treewalker = Filter(treewalker, encoding)
187+
# Alphabetical attributes is here under the assumption that none of
188+
# the later filters add or change order of attributes; it needs to be
189+
# before the sanitizer so escaped elements come out correctly
190+
if self.alphabetical_attributes:
191+
from ..filters.alphabeticalattributes import Filter
192+
treewalker = Filter(treewalker)
187193
# WhitespaceFilter should be used before OptionalTagFilter
188194
# for maximum efficiently of this latter filter
189195
if self.strip_whitespace:
@@ -195,11 +201,6 @@ def serialize(self, treewalker, encoding=None):
195201
if self.omit_optional_tags:
196202
from ..filters.optionaltags import Filter
197203
treewalker = Filter(treewalker)
198-
# Alphabetical attributes must be last, as other filters
199-
# could add attributes and alter the order
200-
if self.alphabetical_attributes:
201-
from ..filters.alphabeticalattributes import Filter
202-
treewalker = Filter(treewalker)
203204

204205
for token in treewalker:
205206
type = token["type"]

html5lib/tests/conftest.py

+5
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22

33
from .tree_construction import TreeConstructionFile
44
from .tokenizer import TokenizerFile
5+
from .sanitizer import SanitizerFile
56

67
_dir = os.path.abspath(os.path.dirname(__file__))
78
_testdata = os.path.join(_dir, "testdata")
89
_tree_construction = os.path.join(_testdata, "tree-construction")
910
_tokenizer = os.path.join(_testdata, "tokenizer")
11+
_sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")
1012

1113

1214
def pytest_collectstart():
@@ -24,3 +26,6 @@ def pytest_collect_file(path, parent):
2426
elif dir == _tokenizer:
2527
if path.ext == ".test":
2628
return TokenizerFile(path, parent)
29+
elif dir == _sanitizer_testdata:
30+
if path.ext == ".dat":
31+
return SanitizerFile(path, parent)

0 commit comments

Comments
 (0)