diff --git a/.gitignore b/.gitignore index a8895f9..c82584b 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,6 @@ target/ # SublimeText project/workspace *.sublime* + +# Pycharm +.idea diff --git a/opengraph/opengraph.py b/opengraph/opengraph.py index fe7fec0..ee6d90c 100644 --- a/opengraph/opengraph.py +++ b/opengraph/opengraph.py @@ -11,11 +11,11 @@ class OpenGraph(object): useragent = None __data__ = {} - def __init__(self, url=None, html=None, useragent=None): + def __init__(self, url=None, html=None, useragent=None, html_parser='html.parser'): if useragent: self.useragent = useragent content = html or self._fetch(url) - self._parse(content) + self._parse(content, html_parser) def __contains__(self, item): return item in self.__data__ @@ -23,8 +23,7 @@ def __contains__(self, item): def __getattr__(self, name): if name in self.__data__: return self.__data__[name] - raise AttributeError( - 'Open Graph object has no attribute "{}"'.format(name)) + raise AttributeError('Open Graph object has no attribute "{}"'.format(name)) def __repr__(self): return self.__data__.__str__() @@ -35,14 +34,12 @@ def __str__(self): def _fetch(self, url): headers = {} if self.useragent: - headers = { - 'user-agent': self.useragent - } + headers = {'user-agent': self.useragent} response = requests.get(url, headers=headers) return response.text - def _parse(self, html): - doc = BeautifulSoup(html) + def _parse(self, html, html_parser): + doc = BeautifulSoup(html, html_parser) ogs = doc.html.head.findAll(property=re.compile(r'^og')) for og in ogs: