Skip to content

Commit a603dc5

Browse files
committed
fixup! fixup! fixup! fixup! Fix #120: introduce keyword arguments for encodings by source
1 parent 714dca5 commit a603dc5

File tree

2 files changed

+28
-5
lines changed

2 files changed

+28
-5
lines changed

html5lib/inputstream.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def _readFromBuffer(self, bytes):
128128
return b"".join(rv)
129129

130130

131-
def HTMLInputStream(source, override_encoding=None, **kwargs):
131+
def HTMLInputStream(source, **kwargs):
132132
# Work around Python bug #20007: read(0) closes the connection.
133133
# http://bugs.python.org/issue20007
134134
if (isinstance(source, http_client.HTTPResponse) or
@@ -142,12 +142,13 @@ def HTMLInputStream(source, override_encoding=None, **kwargs):
142142
isUnicode = isinstance(source, text_type)
143143

144144
if isUnicode:
145-
if override_encoding is not None:
146-
raise TypeError("Cannot set an override encoding with a unicode input")
145+
encodings = [x for x in kwargs if x.endswith("_encoding")]
146+
if encodings:
147+
raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
147148

148-
return HTMLUnicodeInputStream(source)
149+
return HTMLUnicodeInputStream(source, **kwargs)
149150
else:
150-
return HTMLBinaryInputStream(source, override_encoding=override_encoding, **kwargs)
151+
return HTMLBinaryInputStream(source, **kwargs)
151152

152153

153154
class HTMLUnicodeInputStream(object):

html5lib/tests/test_encoding.py

+22
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def test_parser_reparse():
4343
("iso-8859-2", b"", {"same_origin_parent_encoding": "utf-16le", "likely_encoding": "iso-8859-2"}),
4444
("iso-8859-2", b"", {"likely_encoding": "iso-8859-2", "default_encoding": "iso-8859-3"}),
4545
("iso-8859-2", b"", {"default_encoding": "iso-8859-2"}),
46+
("windows-1252", b"", {"default_encoding": "totally-bogus-string"}),
4647
("windows-1252", b"", {}),
4748
])
4849
def test_parser_args(expected, data, kwargs):
@@ -53,6 +54,27 @@ def test_parser_args(expected, data, kwargs):
5354
assert expected == p.documentEncoding
5455

5556

57+
@pytest.mark.parametrize("kwargs", [
58+
{"override_encoding": "iso-8859-2"},
59+
{"override_encoding": None},
60+
{"transport_encoding": "iso-8859-2"},
61+
{"transport_encoding": None},
62+
{"same_origin_parent_encoding": "iso-8859-2"},
63+
{"same_origin_parent_encoding": None},
64+
{"likely_encoding": "iso-8859-2"},
65+
{"likely_encoding": None},
66+
{"default_encoding": "iso-8859-2"},
67+
{"default_encoding": None},
68+
{"foo_encoding": "iso-8859-2"},
69+
{"foo_encoding": None},
70+
])
71+
def test_parser_args_raises(kwargs):
72+
with pytest.raises(TypeError) as exc_info:
73+
p = HTMLParser()
74+
p.parse("", useChardet=False, **kwargs)
75+
assert exc_info.value.message.startswith("Cannot set an encoding with a unicode input")
76+
77+
5678
def runParserEncodingTest(data, encoding):
5779
p = HTMLParser()
5880
assert p.documentEncoding is None

0 commit comments

Comments
 (0)