diff --git a/CHANGES.rst b/CHANGES.rst index 51a254c6..1f87d9ab 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -44,6 +44,8 @@ Released on XXX (instead of the tokenizer); as such, this will require amending all callers of it to use it via the treewalker API.** +* **Drop support of charade, now that chardet is supported once more.** + 0.9999999/1.0b8 ~~~~~~~~~~~~~~~ diff --git a/README.rst b/README.rst index 879dabad..e73b1639 100644 --- a/README.rst +++ b/README.rst @@ -113,9 +113,8 @@ functionality: - ``genshi`` has a treewalker (but not builder); and -- ``charade`` can be used as a fallback when character encoding cannot - be determined; ``chardet``, from which it was forked, can also be used - on Python 2. +- ``chardet`` can be used as a fallback when character encoding cannot + be determined. - ``ordereddict`` can be used under Python 2.6 (``collections.OrderedDict`` is used instead on later versions) to diff --git a/debug-info.py b/debug-info.py index b5d2bb6a..f93fbdbe 100644 --- a/debug-info.py +++ b/debug-info.py @@ -12,7 +12,7 @@ "maxsize": sys.maxsize } -search_modules = ["charade", "chardet", "datrie", "genshi", "html5lib", "lxml", "six"] +search_modules = ["chardet", "datrie", "genshi", "html5lib", "lxml", "six"] found_modules = [] for m in search_modules: diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py index 58d626c9..cfabdd86 100644 --- a/html5lib/inputstream.py +++ b/html5lib/inputstream.py @@ -468,10 +468,7 @@ def detectEncoding(self, parseMeta=True, chardet=True): if encoding is None and chardet: confidence = "tentative" try: - try: - from charade.universaldetector import UniversalDetector - except ImportError: - from chardet.universaldetector import UniversalDetector + from chardet.universaldetector import UniversalDetector buffers = [] detector = UniversalDetector() while not detector.done: diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index c5d2af12..a66a2178 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -57,12 +57,9 @@ def test_encoding(): # pylint:disable=wrong-import-position try: - try: - import charade # noqa - except ImportError: - import chardet # noqa + import chardet # noqa except ImportError: - print("charade/chardet not found, skipping chardet tests") + print("chardet not found, skipping chardet tests") else: def test_chardet(): with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp: diff --git a/requirements-optional.txt b/requirements-optional.txt index ac6539cb..781ab8c2 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -4,9 +4,9 @@ # streams. genshi -# charade can be used as a fallback in case we are unable to determine +# chardet can be used as a fallback in case we are unable to determine # the encoding of a document. -charade +chardet>=2.2 # lxml is supported with its own treebuilder ("lxml") and otherwise # uses the standard ElementTree support diff --git a/setup.py b/setup.py index b42ba400..4d5f1523 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,8 @@ import ast -import os import codecs -from setuptools import setup +from os.path import join, dirname +from setuptools import setup, find_packages classifiers = [ @@ -22,18 +22,13 @@ 'Topic :: Text Processing :: Markup :: HTML' ] -packages = ['html5lib'] + ['html5lib.' + name - for name in os.listdir(os.path.join('html5lib')) - if os.path.isdir(os.path.join('html5lib', name)) and - not name.startswith('.') and name != 'tests'] - -current_dir = os.path.dirname(__file__) -with codecs.open(os.path.join(current_dir, 'README.rst'), 'r', 'utf8') as readme_file: - with codecs.open(os.path.join(current_dir, 'CHANGES.rst'), 'r', 'utf8') as changes_file: +here = dirname(__file__) +with codecs.open(join(here, 'README.rst'), 'r', 'utf8') as readme_file: + with codecs.open(join(here, 'CHANGES.rst'), 'r', 'utf8') as changes_file: long_description = readme_file.read() + '\n' + changes_file.read() version = None -with open(os.path.join("html5lib", "__init__.py"), "rb") as init_file: +with open(join("html5lib", "__init__.py"), "rb") as init_file: t = ast.parse(init_file.read(), filename="__init__.py", mode="exec") assert isinstance(t, ast.Module) assignments = filter(lambda x: isinstance(x, ast.Assign), t.body) @@ -53,7 +48,7 @@ classifiers=classifiers, maintainer='James Graham', maintainer_email='james@hoppipolla.co.uk', - packages=packages, + packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), install_requires=[ 'six', 'webencodings', @@ -70,13 +65,13 @@ # Standard extras, will be installed when the extra is requested. "genshi": ["genshi"], - "charade": ["charade"], + "chardet": ["chardet>=2.2"], # The all extra combines a standard extra which will be used anytime # the all extra is requested, and it extends it with a conditional # extra that will be installed whenever the condition matches and the # all extra is requested. - "all": ["genshi", "charade"], + "all": ["genshi", "chardet>=2.2"], "all:platform.python_implementation == 'CPython'": ["datrie", "lxml"], }, )