Skip to content

Commit 246aca5

Browse files
authored
Remove Python 2 compatibility code (#401)
1 parent f92dbb2 commit 246aca5

File tree

5 files changed

+27
-130
lines changed

5 files changed

+27
-130
lines changed

LICENSE

-38
Original file line numberDiff line numberDiff line change
@@ -18,41 +18,3 @@ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
1818
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
1919
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
2020
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21-
22-
compat code from IPython py3compat.py and encoding.py, which is licensed
23-
the terms of the Modified BSD License (also known as New or
24-
Revised or 3-Clause BSD)
25-
26-
- Copyright (c) 2008-2014, IPython Development Team
27-
- Copyright (c) 2001-2007, Fernando Perez <[email protected]>
28-
- Copyright (c) 2001, Janko Hauser <[email protected]>
29-
- Copyright (c) 2001, Nathaniel Gray <[email protected]>
30-
31-
All rights reserved.
32-
33-
Redistribution and use in source and binary forms, with or without
34-
modification, are permitted provided that the following conditions are
35-
met:
36-
37-
Redistributions of source code must retain the above copyright notice,
38-
this list of conditions and the following disclaimer.
39-
40-
Redistributions in binary form must reproduce the above copyright notice,
41-
this list of conditions and the following disclaimer in the documentation
42-
and/or other materials provided with the distribution.
43-
44-
Neither the name of the IPython Development Team nor the names of its
45-
contributors may be used to endorse or promote products derived from this
46-
software without specific prior written permission.
47-
48-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
49-
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
50-
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51-
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
52-
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
53-
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
54-
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
55-
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
56-
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
57-
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
58-
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pypandoc/__init__.py

+19-31
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
import sys
66
import tempfile
77
import textwrap
8+
import typing
9+
import urllib.parse
10+
import urllib.request
811
from pathlib import Path
912
from typing import Iterable, Iterator, Union
1013

1114
from .handler import _check_log_handler, logger
1215
from .pandoc_download import DEFAULT_TARGET_FOLDER, download_pandoc
13-
from .py3compat import cast_bytes, cast_unicode, url2path, urlparse
1416

1517
__author__ = "Juho Vepsäläinen"
1618
__author_email__ = "[email protected]"
@@ -53,7 +55,7 @@
5355

5456

5557
def convert_text(
56-
source: str,
58+
source: typing.Union[str, bytes],
5759
to: str,
5860
format: str,
5961
extra_args: Iterable = (),
@@ -66,7 +68,7 @@ def convert_text(
6668
) -> str:
6769
"""Converts given `source` from `format` to `to`.
6870
69-
:param str source: Unicode string or bytes (see encoding)
71+
:param source: Unicode string or bytes (see encoding)
7072
7173
:param str to: format into which the input should be converted;
7274
can be one of `pypandoc.get_pandoc_formats()[1]`
@@ -106,7 +108,10 @@ def convert_text(
106108
if pandoc is not found; make sure it has been installed
107109
and is available at path.
108110
"""
109-
source = _as_unicode(source, encoding)
111+
112+
if isinstance(source, bytes):
113+
source = source.decode(encoding, errors="ignore")
114+
110115
return _convert_input(
111116
source,
112117
format,
@@ -286,7 +291,7 @@ def _identify_path(source) -> bool:
286291
if not is_path:
287292
try:
288293
# check if it's an URL
289-
result = urlparse(source)
294+
result = urllib.parse.urlparse(source)
290295
if result.scheme in ["http", "https"]:
291296
is_path = True
292297
elif result.scheme and result.netloc and result.path:
@@ -303,7 +308,7 @@ def _identify_path(source) -> bool:
303308
def _is_network_path(source):
304309
try:
305310
# check if it's an URL
306-
result = urlparse(source)
311+
result = urllib.parse.urlparse(source)
307312
if result.scheme in ["http", "https"]:
308313
return True
309314
elif result.scheme and result.netloc and result.path:
@@ -320,17 +325,6 @@ def _identify_format_from_path(sourcefile: str, format: str) -> str:
320325
return format or os.path.splitext(sourcefile)[1].strip(".")
321326

322327

323-
def _as_unicode(source: any, encoding: str) -> any:
324-
if encoding != "utf-8":
325-
# if a source and a different encoding is given,
326-
# try to decode the source into a string
327-
try:
328-
source = cast_unicode(source, encoding=encoding)
329-
except (UnicodeDecodeError, UnicodeEncodeError):
330-
pass
331-
return source
332-
333-
334328
def normalize_format(fmt):
335329
formats = {
336330
"dbk": "docbook",
@@ -404,7 +398,7 @@ def _validate_formats(format, to, outputfile):
404398

405399

406400
def _convert_input(
407-
source,
401+
source: str,
408402
format,
409403
input_type,
410404
to,
@@ -509,19 +503,9 @@ def _convert_input(
509503
)
510504

511505
if string_input:
512-
try:
513-
source = cast_bytes(source, encoding="utf-8")
514-
except (UnicodeDecodeError, UnicodeEncodeError):
515-
# assume that it is already a utf-8 encoded string
516-
pass
517-
try:
518-
stdout, stderr = p.communicate(source if string_input else None)
519-
except OSError:
520-
# this is happening only on Py2.6 when pandoc dies before reading all
521-
# the input. We treat that the same as when we exit with an error...
522-
raise RuntimeError(
523-
'Pandoc died with exitcode "%s" during conversion.' % (p.returncode)
524-
)
506+
if isinstance(source, str):
507+
source = source.encode("utf-8")
508+
stdout, stderr = p.communicate(source if string_input else None)
525509

526510
try:
527511
if not (to in ["odt", "docx", "epub", "epub3", "pdf"] and outputfile == "-"):
@@ -957,6 +941,10 @@ def ensure_pandoc_installed(
957941
_ensure_pandoc_path()
958942

959943

944+
def url2path(url):
945+
return urllib.request.url2pathname(urllib.parse.urlparse(url).path)
946+
947+
960948
# -----------------------------------------------------------------------------
961949
# Internal state management
962950
# -----------------------------------------------------------------------------

pypandoc/pandoc_download.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,10 @@
66
import subprocess
77
import sys
88
import tempfile
9-
import urllib
9+
import urllib.error
10+
import urllib.request
1011
from typing import Union
1112

12-
try:
13-
from urllib.request import urlopen
14-
except ImportError:
15-
from urllib import urlopen
16-
1713
from .handler import _check_log_handler, logger
1814

1915
DEFAULT_TARGET_FOLDER = {
@@ -47,13 +43,13 @@ def _get_pandoc_urls(version="latest"):
4743
)
4844
# try to open the url
4945
try:
50-
response = urlopen(url)
46+
response = urllib.request.urlopen(url)
5147
version_url_frags = response.url.split("/")
5248
version = version_url_frags[-1]
5349
except urllib.error.HTTPError:
5450
raise RuntimeError(f"Invalid pandoc version {version}.")
5551
# read the HTML content
56-
response = urlopen(
52+
response = urllib.request.urlopen(
5753
f"https://github.com/jgm/pandoc/releases/expanded_assets/{version}"
5854
)
5955
content = response.read()
@@ -264,7 +260,7 @@ def download_pandoc(
264260
else:
265261
logger.info(f"Downloading pandoc from {url} ...")
266262
# https://stackoverflow.com/questions/30627937/
267-
response = urlopen(url)
263+
response = urllib.request.urlopen(url)
268264
with open(filename, "wb") as out_file:
269265
shutil.copyfileobj(response, out_file)
270266

pypandoc/py3compat.py

-50
This file was deleted.

tests.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
import unittest
1414
import warnings
1515
from pathlib import Path
16+
from urllib.parse import urljoin
17+
from urllib.request import pathname2url
1618

1719
import pypandoc
18-
from pypandoc.py3compat import path2url
1920

2021

2122
@contextlib.contextmanager
@@ -239,7 +240,7 @@ def test_basic_conversion_from_file_url(self):
239240
expected = "some title{0}=========={0}{0}".format(os.linesep)
240241
# this keeps the : (which should be '|' on windows but pandoc
241242
# doesn't like it
242-
file_url = path2url(file_name)
243+
file_url = urljoin("file:", pathname2url(file_name))
243244
assert pypandoc._identify_path(file_url)
244245

245246
received = pypandoc.convert_file(file_url, "rst")

0 commit comments

Comments
 (0)