Skip to content

Drop charade, cleanup setup.py #252

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 22, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ Released on XXX
(instead of the tokenizer); as such, this will require amending all
callers of it to use it via the treewalker API.**

* **Drop support of charade, now that chardet is supported once more.**


0.9999999/1.0b8
~~~~~~~~~~~~~~~
Expand Down
5 changes: 2 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,8 @@ functionality:

- ``genshi`` has a treewalker (but not builder); and

- ``charade`` can be used as a fallback when character encoding cannot
be determined; ``chardet``, from which it was forked, can also be used
on Python 2.
- ``chardet`` can be used as a fallback when character encoding cannot
be determined.

- ``ordereddict`` can be used under Python 2.6
(``collections.OrderedDict`` is used instead on later versions) to
Expand Down
2 changes: 1 addition & 1 deletion debug-info.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"maxsize": sys.maxsize
}

search_modules = ["charade", "chardet", "datrie", "genshi", "html5lib", "lxml", "six"]
search_modules = ["chardet", "datrie", "genshi", "html5lib", "lxml", "six"]
found_modules = []

for m in search_modules:
Expand Down
5 changes: 1 addition & 4 deletions html5lib/inputstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,10 +468,7 @@ def detectEncoding(self, parseMeta=True, chardet=True):
if encoding is None and chardet:
confidence = "tentative"
try:
try:
from charade.universaldetector import UniversalDetector
except ImportError:
from chardet.universaldetector import UniversalDetector
from chardet.universaldetector import UniversalDetector
buffers = []
detector = UniversalDetector()
while not detector.done:
Expand Down
7 changes: 2 additions & 5 deletions html5lib/tests/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,9 @@ def test_encoding():

# pylint:disable=wrong-import-position
try:
try:
import charade # noqa
except ImportError:
import chardet # noqa
import chardet # noqa
except ImportError:
print("charade/chardet not found, skipping chardet tests")
print("chardet not found, skipping chardet tests")
else:
def test_chardet():
with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp:
Expand Down
4 changes: 2 additions & 2 deletions requirements-optional.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
# streams.
genshi

# charade can be used as a fallback in case we are unable to determine
# chardet can be used as a fallback in case we are unable to determine
# the encoding of a document.
charade
chardet>=2.2

# lxml is supported with its own treebuilder ("lxml") and otherwise
# uses the standard ElementTree support
Expand Down
23 changes: 9 additions & 14 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import ast
import os
import codecs

from setuptools import setup
from os.path import join, dirname
from setuptools import setup, find_packages


classifiers = [
Expand All @@ -22,18 +22,13 @@
'Topic :: Text Processing :: Markup :: HTML'
]

packages = ['html5lib'] + ['html5lib.' + name
for name in os.listdir(os.path.join('html5lib'))
if os.path.isdir(os.path.join('html5lib', name)) and
not name.startswith('.') and name != 'tests']

current_dir = os.path.dirname(__file__)
with codecs.open(os.path.join(current_dir, 'README.rst'), 'r', 'utf8') as readme_file:
with codecs.open(os.path.join(current_dir, 'CHANGES.rst'), 'r', 'utf8') as changes_file:
here = dirname(__file__)
with codecs.open(join(here, 'README.rst'), 'r', 'utf8') as readme_file:
with codecs.open(join(here, 'CHANGES.rst'), 'r', 'utf8') as changes_file:
long_description = readme_file.read() + '\n' + changes_file.read()

version = None
with open(os.path.join("html5lib", "__init__.py"), "rb") as init_file:
with open(join("html5lib", "__init__.py"), "rb") as init_file:
t = ast.parse(init_file.read(), filename="__init__.py", mode="exec")
assert isinstance(t, ast.Module)
assignments = filter(lambda x: isinstance(x, ast.Assign), t.body)
Expand All @@ -53,7 +48,7 @@
classifiers=classifiers,
maintainer='James Graham',
maintainer_email='[email protected]',
packages=packages,
packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
install_requires=[
'six',
'webencodings',
Expand All @@ -70,13 +65,13 @@

# Standard extras, will be installed when the extra is requested.
"genshi": ["genshi"],
"charade": ["charade"],
"chardet": ["chardet>=2.2"],

# The all extra combines a standard extra which will be used anytime
# the all extra is requested, and it extends it with a conditional
# extra that will be installed whenever the condition matches and the
# all extra is requested.
"all": ["genshi", "charade"],
"all": ["genshi", "chardet>=2.2"],
"all:platform.python_implementation == 'CPython'": ["datrie", "lxml"],
},
)