Skip to content

Commit fe63ade

Browse files
committed
Add type hints for parse()
1 parent 3c6dc0a commit fe63ade

File tree

3 files changed

+188
-18
lines changed

3 files changed

+188
-18
lines changed

src/html5_parser/__init__.py

Lines changed: 185 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,31 @@
99
import sys
1010
from collections import namedtuple
1111
from locale import getpreferredencoding
12+
from typing import TYPE_CHECKING
13+
14+
if TYPE_CHECKING:
15+
from typing import Literal, Optional, Union, overload, reveal_type
16+
from xml.dom.minidom import Document
17+
from xml.etree.ElementTree import Element
18+
19+
from bs4 import BeautifulSoup
20+
from lxml.etree import _Element as LxmlElement
21+
from lxml.html import HtmlElement
22+
ReturnType = Union[LxmlElement, HtmlElement, Element, Document, BeautifulSoup]
23+
else:
24+
_Element = ReturnType = HtmlElement = Element = Document = BeautifulSoup = None
25+
1226

1327
if not hasattr(sys, 'generating_docs_via_sphinx'):
1428
from lxml import etree # Must be imported before html_parser to initialize libxml
1529

1630
try:
17-
from . import html_parser
31+
from . import html_parser # type: ignore
1832
except ImportError:
1933
raise
2034
else:
2135
version = namedtuple('Version', 'major minor patch')(
22-
html_parser.MAJOR, html_parser.MINOR, html_parser.PATCH)
36+
html_parser.MAJOR, html_parser.MINOR, html_parser.PATCH) # type: ignore
2337

2438
if not hasattr(etree, 'adopt_external_document'):
2539
raise ImportError('Your version of lxml is too old, version 3.8.0 is minimum')
@@ -117,21 +131,168 @@ def normalize_treebuilder(x):
117131

118132
NAMESPACE_SUPPORTING_BUILDERS = frozenset('lxml stdlib_etree dom lxml_html'.split())
119133

134+
if TYPE_CHECKING:
135+
@overload
136+
def parse(
137+
html: Union[bytes, str], transport_encoding:Optional[str], namespace_elements: bool, treebuilder: Literal['lxml'],
138+
fallback_encoding: Optional[str] = ...,
139+
keep_doctype: bool = ...,
140+
maybe_xhtml: bool = ...,
141+
return_root: bool = ...,
142+
line_number_attr:Optional[str] = ...,
143+
sanitize_names: bool = ...,
144+
stack_size: int = ...,
145+
fragment_context: Optional[str] = ...,
146+
) -> LxmlElement: ...
147+
148+
@overload
149+
def parse(
150+
html: Union[bytes, str], transport_encoding:Optional[str], namespace_elements: bool, treebuilder: Literal['lxml_html'],
151+
fallback_encoding: Optional[str] = ...,
152+
keep_doctype: bool = ...,
153+
maybe_xhtml: bool = ...,
154+
return_root: bool = ...,
155+
line_number_attr:Optional[str] = ...,
156+
sanitize_names: bool = ...,
157+
stack_size: int = ...,
158+
fragment_context: Optional[str] = ...,
159+
) -> HtmlElement: ...
160+
161+
@overload
162+
def parse(
163+
html: Union[bytes, str], transport_encoding:Optional[str], namespace_elements: bool, treebuilder: Literal['etree'],
164+
fallback_encoding: Optional[str] = ...,
165+
keep_doctype: bool = ...,
166+
maybe_xhtml: bool = ...,
167+
return_root: bool = ...,
168+
line_number_attr:Optional[str] = ...,
169+
sanitize_names: bool = ...,
170+
stack_size: int = ...,
171+
fragment_context: Optional[str] = ...,
172+
) -> Element: ...
173+
174+
@overload
175+
def parse(
176+
html: Union[bytes, str], transport_encoding:Optional[str], namespace_elements: bool, treebuilder: Literal['dom'],
177+
fallback_encoding: Optional[str] = ...,
178+
keep_doctype: bool = ...,
179+
maybe_xhtml: bool = ...,
180+
return_root: bool = ...,
181+
line_number_attr:Optional[str] = ...,
182+
sanitize_names: bool = ...,
183+
stack_size: int = ...,
184+
fragment_context: Optional[str] = ...,
185+
) -> Document: ...
186+
187+
@overload
188+
def parse(
189+
html: Union[bytes, str], transport_encoding:Optional[str], namespace_elements: bool, treebuilder: Literal['soup'],
190+
fallback_encoding: Optional[str] = ...,
191+
keep_doctype: bool = ...,
192+
maybe_xhtml: bool = ...,
193+
return_root: bool = ...,
194+
line_number_attr:Optional[str] = ...,
195+
sanitize_names: bool = ...,
196+
stack_size: int = ...,
197+
fragment_context: Optional[str] = ...,
198+
) -> BeautifulSoup: ...
199+
200+
@overload
201+
def parse( # type:ignore
202+
html: Union[bytes, str],
203+
transport_encoding: Optional[str] = ...,
204+
namespace_elements: bool = ...,
205+
treebuilder: Literal['lxml'] = ...,
206+
fallback_encoding: Optional[str] = ...,
207+
keep_doctype: bool = ...,
208+
maybe_xhtml: bool = ...,
209+
return_root: bool = ...,
210+
line_number_attr:Optional[str] = ...,
211+
sanitize_names: bool = ...,
212+
stack_size: int = ...,
213+
fragment_context: Optional[str] = ...,
214+
) -> LxmlElement: ...
215+
216+
217+
@overload
218+
def parse(
219+
html: Union[bytes, str],
220+
transport_encoding: Optional[str] = ...,
221+
namespace_elements: bool = ...,
222+
treebuilder: Literal['lxml_html'] = ...,
223+
fallback_encoding: Optional[str] = ...,
224+
keep_doctype: bool = ...,
225+
maybe_xhtml: bool = ...,
226+
return_root: bool = ...,
227+
line_number_attr:Optional[str] = ...,
228+
sanitize_names: bool = ...,
229+
stack_size: int = ...,
230+
fragment_context: Optional[str] = ...,
231+
) -> HtmlElement: ...
232+
233+
@overload
234+
def parse( # type: ignore
235+
html: Union[bytes, str],
236+
transport_encoding: Optional[str] = ...,
237+
namespace_elements: bool = ...,
238+
treebuilder: Literal['etree'] = ...,
239+
fallback_encoding: Optional[str] = ...,
240+
keep_doctype: bool = ...,
241+
maybe_xhtml: bool = ...,
242+
return_root: bool = ...,
243+
line_number_attr:Optional[str] = ...,
244+
sanitize_names: bool = ...,
245+
stack_size: int = ...,
246+
fragment_context: Optional[str] = ...,
247+
) -> Element: ...
248+
249+
@overload
250+
def parse( # type: ignore
251+
html: Union[bytes, str],
252+
transport_encoding: Optional[str] = ...,
253+
namespace_elements: bool = ...,
254+
treebuilder: Literal['dom'] = ...,
255+
fallback_encoding: Optional[str] = ...,
256+
keep_doctype: bool = ...,
257+
maybe_xhtml: bool = ...,
258+
return_root: bool = ...,
259+
line_number_attr:Optional[str] = ...,
260+
sanitize_names: bool = ...,
261+
stack_size: int = ...,
262+
fragment_context: Optional[str] = ...,
263+
) -> Document: ...
264+
265+
@overload
266+
def parse(
267+
html: Union[bytes, str],
268+
transport_encoding: Optional[str] = ...,
269+
namespace_elements: bool = ...,
270+
treebuilder: Literal['soup'] = ...,
271+
fallback_encoding: Optional[str] = ...,
272+
keep_doctype: bool = ...,
273+
maybe_xhtml: bool = ...,
274+
return_root: bool = ...,
275+
line_number_attr:Optional[str] = ...,
276+
sanitize_names: bool = ...,
277+
stack_size: int = ...,
278+
fragment_context: Optional[str] = ...,
279+
) -> BeautifulSoup: ...
280+
120281

121282
def parse(
122-
html,
123-
transport_encoding=None,
124-
namespace_elements=False,
125-
treebuilder='lxml',
126-
fallback_encoding=None,
127-
keep_doctype=True,
128-
maybe_xhtml=False,
129-
return_root=True,
130-
line_number_attr=None,
131-
sanitize_names=True,
132-
stack_size=16 * 1024,
133-
fragment_context=None,
134-
):
283+
html: 'Union[bytes, str]',
284+
transport_encoding: 'Optional[str]' = None,
285+
namespace_elements: 'bool' = False,
286+
treebuilder: "Literal['lxml', 'lxml_html', 'etree', 'dom', 'soup']" = 'lxml',
287+
fallback_encoding: 'Optional[str]' = None,
288+
keep_doctype: 'bool' = True,
289+
maybe_xhtml: 'bool' = False,
290+
return_root: 'bool' = True,
291+
line_number_attr: 'Optional[str]' = None,
292+
sanitize_names: 'bool' = True,
293+
stack_size: 'int' = 16 * 1024,
294+
fragment_context: 'Optional[str]' = None,
295+
) -> ReturnType:
135296
'''
136297
Parse the specified :attr:`html` and return the parsed representation.
137298
@@ -229,3 +390,12 @@ def parse(
229390
return ans.getroot() if return_root else ans
230391
m = importlib.import_module('html5_parser.' + treebuilder)
231392
return m.adapt(ans, return_root=return_root)
393+
394+
395+
if TYPE_CHECKING:
396+
reveal_type(parse('a'))
397+
reveal_type(parse('a', 'x', True, 'dom'))
398+
reveal_type(parse('a', 'x', True, 'lxml', fragment_context='x'))
399+
reveal_type(parse('a', 'x', True, fragment_context='x'))
400+
reveal_type(parse('a', transport_encoding='xyz', return_root=True, fallback_encoding='moose'))
401+
reveal_type(parse('a', transport_encoding='x', return_root=False, treebuilder='etree', fragment_context='y'))

src/html5_parser/dom.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
impl = getDOMImplementation()
1212

1313
try:
14-
dict_items = dict.iteritems
14+
dict_items = dict.iteritems # type: ignore
1515
except AttributeError:
1616
dict_items = dict.items
1717

src/html5_parser/soup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@ def soup_module():
3737
import bs4
3838
soup_module.ans = bs4
3939
except ImportError:
40-
import BeautifulSoup as bs3
40+
import BeautifulSoup as bs3 # type:ignore
4141
soup_module.ans = bs3
4242
return soup_module.ans
4343

4444

45-
soup_module.ans = None
45+
soup_module.ans = None # type: ignore
4646

4747

4848
def set_soup_module(val):

0 commit comments

Comments
 (0)