Skip to content

Commit aca1fd4

Browse files
add script to download all xkcd comics
1 parent 085ab22 commit aca1fd4

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

automation/xkcddownloader.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""
2+
Download all the comics from xkcd comics
3+
"""
4+
5+
#!/usr/bin/python3
6+
# xkcddownloader.py - Downloads every single XKCD comic.
7+
8+
import requests, os, bs4
9+
10+
url = 'http://xkcd.com'
11+
os.makedirs('xkcd', exist_ok=True) #creates a new dir 'xkcd'
12+
# if it doesnot exist
13+
14+
while not url.endswith('#'):
15+
#TODO: Download the page
16+
print('Downloading page %s....' % url)
17+
res = requests.get(url)
18+
res.raise_for_status()
19+
20+
soup = bs4.BeautifulSoup(res.text)
21+
#TODO: Find url to next page
22+
23+
comicElem = soup.select('#comic img')
24+
if comicElem == []:
25+
print("Could not find comic image.")
26+
else:
27+
comicUrl = 'http:' + comicElem[0].get('src')
28+
#TODO : Download the image
29+
print('Downloading page %s....' % comicUrl)
30+
res = requests.get(comicUrl)
31+
res.raise_for_status()
32+
33+
34+
#TODO: Save the image to ./xkcd
35+
imageFile = open(os.path.join('xkcd', os.path.basename(comicUrl)), 'wb')
36+
for chunk in res.iter_content(100000):
37+
imageFile.write(chunk)
38+
imageFile.close()
39+
40+
#TODO: Get the Prev button's url
41+
prevLink = soup.select('a[rel=prev]')[0]
42+
url = 'http://xkcd.com' + prevLink.get('href')
43+
44+
print('Done.')
45+

0 commit comments

Comments
 (0)