Skip to content

Commit 78ef2d6

Browse files
committed
Improve feed url discovery
1 parent 5c29bdd commit 78ef2d6

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

syndicatorapi/api/feedreader.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from itertools import chain
44
from pprint import pprint
55

6+
import urllib.parse
7+
68
from django.conf import settings
79

810
import requests
@@ -56,7 +58,11 @@ def __init__(self, soup):
5658

5759
def __getitem__(self, key):
5860
if key == "image":
59-
return self.soup.image.url.text if hasattr(self.soup, "image") else None
61+
return (
62+
self.soup.image.url.text
63+
if hasattr(self.soup, "image") and self.soup.image
64+
else None
65+
)
6066
return get_content(self.soup, key)
6167

6268
@property
@@ -156,9 +162,10 @@ def discover_feed(url) -> (str, str):
156162
soup = BeautifulSoup(response.text, "html.parser")
157163
head = PageHead(soup)
158164
if head.rss:
159-
feed_url = head.rss
165+
feed_url = urllib.parse.urljoin(url, head.rss)
166+
print(feed_url)
160167
response = requests.get(
161-
head.rss, headers={"User-Agent": USER_AGENT}, timeout=TIMEOUT
168+
feed_url, headers={"User-Agent": USER_AGENT}, timeout=TIMEOUT
162169
)
163170
else:
164171
raise ValueError("No feed found in HTML")

0 commit comments

Comments
 (0)