|
| 1 | +import cfscrape |
| 2 | +import re |
| 3 | +import urllib.request |
| 4 | +import os |
| 5 | + |
| 6 | +def link_finder(master_page): |
| 7 | + button_regex = re.compile(r'<a href="(.*)"><span>.*</span></a') |
| 8 | + page_source = scraper.get(master_page) |
| 9 | + issues_ = button_regex.findall(page_source.text) |
| 10 | + issues = ["http://readcomiconline.to" + issue + "&readType=1" for issue in issues_] |
| 11 | + return issues |
| 12 | +def image_finder(issue_link): |
| 13 | + image_regex = re.compile(r'lstImages.push\("(\S+)"\)') |
| 14 | + page_source = scraper.get(issue_link) |
| 15 | + image_links = list(image_regex.findall(page_source.text)) |
| 16 | + return image_links |
| 17 | +def downloader_main(image_links): |
| 18 | + for x in range(0, len(image_links)): |
| 19 | + urllib.request.urlretrieve(image_links[x], str(x)+".png") |
| 20 | + |
| 21 | +scraper = cfscrape.create_scraper() |
| 22 | + |
| 23 | +def link_finder(master_page): |
| 24 | + scraper = cfscrape.create_scraper() |
| 25 | + button_regex = re.compile(r'href="(/Comic/.+/Issue.+)"') |
| 26 | + page_source = scraper.get(master_page) |
| 27 | + issues_ = button_regex.findall(page_source.text) |
| 28 | + issues = ["http://readcomiconline.to" + issue + "&readType=1" for issue in issues_] |
| 29 | + return issues |
| 30 | + |
| 31 | +def image_finder(issue_link): |
| 32 | + scraper = cfscrape.create_scraper() |
| 33 | + image_regex = re.compile(r'lstImages.push\("(\S+)"\)') |
| 34 | + page_source = scraper.get(issue_link) |
| 35 | + image_links = list(image_regex.findall(page_source.text)) |
| 36 | + return image_links |
| 37 | + |
| 38 | +def downloader_main(name, master_page): |
| 39 | + cwd = os.getcwd() |
| 40 | + download_location = os.path.join(cwd, name) |
| 41 | + if not os.path.exists(download_location): |
| 42 | + os.makedirs(download_location) |
| 43 | + issues = link_finder(master_page) |
| 44 | + print(issues) |
| 45 | + for number in range(0, len(issues)): |
| 46 | + print("Currently Downloading Issue {}".format(str(number+1))) |
| 47 | + image_links = image_finder(issues[number]) |
| 48 | + issue_location = os.path.join(download_location, "Issue "+str(number + 1)) |
| 49 | + if not os.path.exists(issue_location): |
| 50 | + os.makedirs(issue_location) |
| 51 | + for x in range(0, len(image_links)): |
| 52 | + print("Downloading Page {}".format(str(x))) |
| 53 | + urllib.request.urlretrieve(image_links[x], os.path.join(issue_location, "Page" + str(x) + ".png")) |
| 54 | + |
| 55 | +# To use, call the function downloader_main('Folder name you want the comic to be in', 'comic link') |
| 56 | +# downloader_main('Infinity Countdown', 'http://readcomiconline.to/Comic/Infinity-Countdown') |
0 commit comments