Skip to content

Commit e3f9d9d

Browse files
committed
Add parser for http://cdmnet.com.br/rss
Resolves: #9
1 parent c25e692 commit e3f9d9d

File tree

3 files changed

+151
-0
lines changed

3 files changed

+151
-0
lines changed

bin/Downloader.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import bin.Helper as helper
77
import bin.sourceparser.Mangastream as msparser
88
import bin.sourceparser.Mangafox as mxparser
9+
import bin.sourceparser.Cdmnet as cdmparser
910
from PIL import Image
1011
from PIL import ImageOps
1112
from PIL import ImageFilter
@@ -104,6 +105,24 @@ def data_processor(self):
104105
# Finish :)
105106
logging.info("Finished download of %s!", self.mangatitle)
106107

108+
# CDM Parser
109+
elif self.origin == "cdmnet.com.br":
110+
urllist = cdmparser.getPagesUrl(self.mangastarturl, self.mangapages)
111+
112+
# check if we have images to download
113+
if not len(urllist) == 0:
114+
115+
# Turn Manga pages into Image links!
116+
logging.info("Starting download of %s...", self.mangatitle)
117+
counter = 0
118+
for i in urllist:
119+
counter = counter + 1
120+
self.downloader(i, counter, cdmparser.getImageUrl)
121+
122+
123+
# Finish :)
124+
logging.info("Finished download of %s!", self.mangatitle)
125+
107126
def downloader(self, url, counter, parser):
108127
# Check if we have the Download folder
109128
helper.createFolder(self.downloadfolder)

bin/Helper.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from bin.Models import *
1212
import bin.sourceparser.Mangastream as msparser
1313
import bin.sourceparser.Mangafox as mxparser
14+
import bin.sourceparser.Cdmnet as cdmparser
1415

1516
'''
1617
@@ -475,7 +476,27 @@ def getMangaData(url, entry):
475476

476477
mangadata = [manganame, pages, chapter, title, chapter_name, chapter_pubDate]
477478

479+
# CDM Parser
480+
elif origin == "cdmnet.com.br":
481+
logging.debug("Getting Mangadata from CDM. for %s" % url)
478482

483+
# Easy Stuff
484+
title = entry.title
485+
chapter_pubDate = entry.published
486+
487+
# Load page once to hand it over to parser function
488+
logging.debug("Loading Page to gather data...")
489+
page = requests.get(url)
490+
491+
# Getting the data
492+
manganame = cdmparser.getTitle(page)
493+
pages = cdmparser.getPages(page)
494+
chapter = cdmparser.getChapter(url)
495+
chapter_name = cdmparser.getChapterName(page)
496+
497+
logging.debug("Mangadata succesfully loaded")
498+
499+
mangadata = [manganame, pages, chapter, title, chapter_name, chapter_pubDate]
479500
else:
480501
logging.error("Not supportet origin!")
481502

bin/sourceparser/Cdmnet.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#!/usr/bin/env python
2+
import logging
3+
import re
4+
import requests
5+
from urllib.parse import urlparse
6+
from bs4 import BeautifulSoup
7+
8+
'''
9+
10+
CDM Parser
11+
12+
13+
'''
14+
15+
16+
'''
17+
get Manga Title
18+
Returns: title
19+
'''
20+
def getTitle(page):
21+
title = None
22+
soup = BeautifulSoup(page.content, 'html.parser')
23+
24+
#Get Manga Titel
25+
search = re.search('<meta content="(.*?) -.*?property="og:title">', str(soup))
26+
try:
27+
title = search.group(1)
28+
except AttributeError:
29+
logging.error("No Title Fount!")
30+
31+
return title
32+
33+
34+
'''
35+
get Manga Chapter name
36+
Returns: Chapter name
37+
'''
38+
def getChapterName(page):
39+
40+
logging.debug("CDM has no Chapternames")
41+
chaptername = ""
42+
return chaptername
43+
44+
45+
'''
46+
get Manga Pages
47+
Returns: integer pages
48+
'''
49+
def getPages(page):
50+
soup = BeautifulSoup(page.content, 'html.parser')
51+
52+
#Get Manga Titel
53+
search =re.search("var pages = \[.*'(.*?)',];", str(soup))
54+
pages = search.group(1)
55+
return pages
56+
57+
58+
59+
'''
60+
get Manga chapter
61+
Returns: integer chapter
62+
'''
63+
def getChapter(url):
64+
#soup = BeautifulSoup(page.content, 'html.parser')
65+
66+
search = re.search('ler-online/(.*?)\Z', str(url))
67+
chapter = search.group(1)
68+
return chapter
69+
70+
'''
71+
get Manga Pages URL
72+
Returns: urllist
73+
'''
74+
def getPagesUrl(starturl,pages):
75+
pagesurllist=[]
76+
77+
# Split URL to create list
78+
parsed = urlparse(starturl)
79+
80+
# start url generator
81+
for page in range(pages):
82+
page = page + 1
83+
fullurl = parsed.scheme + "://" + parsed.netloc + parsed.path + "#" + str(page)
84+
pagesurllist.append(fullurl)
85+
86+
logging.debug("All pages:")
87+
logging.debug(pagesurllist)
88+
return pagesurllist
89+
90+
91+
92+
'''
93+
get Manga Image URL
94+
Returns: urllist
95+
'''
96+
def getImageUrl(pageurl):
97+
# Download Page
98+
page = requests.get(pageurl)
99+
soup = BeautifulSoup(page.content, 'html.parser')
100+
101+
# Get CDN URL suffix
102+
search =re.search("var urlSulfix = '(.*?)';", str(soup))
103+
cdnsuffix = search.group(1)
104+
105+
# Get pagenumber
106+
var = re.search('ler-online/.*?#(.*?)\Z', str(pageurl))
107+
pagenumber = var.group(1).zfill(2)
108+
109+
110+
imageurl = str(cdnsuffix + pagenumber + ".jpg")
111+
return imageurl

0 commit comments

Comments
 (0)