File tree 1 file changed +45
-0
lines changed
1 file changed +45
-0
lines changed Original file line number Diff line number Diff line change
1
+ """
2
+ Download all the comics from xkcd comics
3
+ """
4
+
5
+ #!/usr/bin/python3
6
+ # xkcddownloader.py - Downloads every single XKCD comic.
7
+
8
+ import requests , os , bs4
9
+
10
+ url = 'http://xkcd.com'
11
+ os .makedirs ('xkcd' , exist_ok = True ) #creates a new dir 'xkcd'
12
+ # if it doesnot exist
13
+
14
+ while not url .endswith ('#' ):
15
+ #TODO: Download the page
16
+ print ('Downloading page %s....' % url )
17
+ res = requests .get (url )
18
+ res .raise_for_status ()
19
+
20
+ soup = bs4 .BeautifulSoup (res .text )
21
+ #TODO: Find url to next page
22
+
23
+ comicElem = soup .select ('#comic img' )
24
+ if comicElem == []:
25
+ print ("Could not find comic image." )
26
+ else :
27
+ comicUrl = 'http:' + comicElem [0 ].get ('src' )
28
+ #TODO : Download the image
29
+ print ('Downloading page %s....' % comicUrl )
30
+ res = requests .get (comicUrl )
31
+ res .raise_for_status ()
32
+
33
+
34
+ #TODO: Save the image to ./xkcd
35
+ imageFile = open (os .path .join ('xkcd' , os .path .basename (comicUrl )), 'wb' )
36
+ for chunk in res .iter_content (100000 ):
37
+ imageFile .write (chunk )
38
+ imageFile .close ()
39
+
40
+ #TODO: Get the Prev button's url
41
+ prevLink = soup .select ('a[rel=prev]' )[0 ]
42
+ url = 'http://xkcd.com' + prevLink .get ('href' )
43
+
44
+ print ('Done.' )
45
+
You can’t perform that action at this time.
0 commit comments