diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..27e9f85 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +*.pyc +.vscode +test\ Kopie.db +_test.db +test.db +__pycache__ +comic/* +data/* +kindlegen.exe +.idea +venv +main.db +log/* diff --git a/.gitignore b/.gitignore index f660662..27e9f85 100644 --- a/.gitignore +++ b/.gitignore @@ -5,8 +5,9 @@ _test.db test.db __pycache__ comic/* +data/* kindlegen.exe .idea venv main.db -log/* \ No newline at end of file +log/* diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..cca4826 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,25 @@ +FROM python:3.6-slim + +MAINTAINER Schemen + + +WORKDIR /usr/src/app + +VOLUME /usr/src/app/data + +ENTRYPOINT ["/usr/bin/dumb-init", "--"] + +COPY requirements.txt ./ +RUN apt-get update && apt-get install dumb-init gcc wget -y && \ + rm -rf /var/lib/apt/lists/* && \ + pip install --no-cache-dir -r requirements.txt && \ + apt-get purge gcc -y && apt-get autoremove -y && apt-get clean + +RUN wget http://kindlegen.s3.amazonaws.com/kindlegen_linux_2.6_i386_v2_9.tar.gz -O /tmp/kindlegen.tar.gz && \ + tar xvf /tmp/kindlegen.tar.gz -C /tmp && mv /tmp/kindlegen /usr/bin && \ + rm -r /tmp/* + + +COPY . . + +CMD [ "python","m2em.py", "--daemon", "-s"] diff --git a/README.md b/README.md index 54094df..24f4786 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,12 @@ Not living in Japan has me not really having any readable access of weekly chapt M2em let's you automatically download Mangas via RSS Feed that updates at a configurable interval (and comics in the future?), convert them into eMangas and send them off via Email (Target being the Email to Kindle function of Amazon)! +## Supported Websites + +* Mangastream +* MangaFox +* Cdmnet + # Setup M2em requires Python3 and I highly recommend working in a virtualenv. Some OS require the python-dev package! @@ -39,6 +45,24 @@ deactivate Get Kindlegen here: https://www.amazon.com/gp/feature.html?docId=1000765211 +## Docker Setup +You can use the Dockerfile or the image schemen/m2em. All options in the config.ini are available as environment variable. Make sure you write the exactly the same! + +Example Compose file: +``` +version: '2' +services: + m2em: + image: schemen/m2em:latest + environment: + - SMTPServer=mail.example.com + - EmailAddress=comic@example.com + - EmailAddressPw=verysecurepassword + volumes: + - :/usr/src/app/data + +``` + ## Concept As a concept, M2em has different workers that run in a loop. All Chapter/user data is saved in a SQLite3 Database. * RssParser - Parsing the RSS feed and saving the information of each chapter @@ -64,15 +88,16 @@ Example: if you wish to download all chapters you have saved in your database, y You can directly apply an action to one chapter with the options --download, --convert or --send. You need to pass the ID of said chapter, you can find that out with "-Lc" or "-lc". You can pass multiple IDs. -Example: + +Also, you can process N chapters with the "--process/-p" option: ``` -./m2em.py --download 100 #Downloads chapter with ID 100 +./m2em.py -p 100 #Downloads, Converts and Sends chapter with ID 100 ``` -## Supported Websites -* Mangastream -* MangaFox +``` +./m2em.py --download 100 #Downloads chapter with ID 100 +``` # Usage @@ -81,9 +106,10 @@ Example: usage: m2em.py [-h] [-af ADD_FEED] [-au] [-lm [LIST_MANGA]] [-lc] [-Lc] [-lf] [-lu] [-cd] [-s] [--send [SEND [SEND ...]]] [--convert [CONVERT [CONVERT ...]]] - [--download [DOWNLOAD [DOWNLOAD ...]]] [-a ACTION] - [-ss SWITCH_SEND] [-sc SWITCH_CHAPTER] [-dc DELETE_CHAPTER] - [-du DELETE_USER] [-df DELETE_FEED] [--daemon] [-d] [-v] + [--download [DOWNLOAD [DOWNLOAD ...]]] + [-p [PROCESS [PROCESS ...]]] [-a ACTION] [-ss SWITCH_SEND] + [-dc DELETE_CHAPTER] [-du DELETE_USER] [-df DELETE_FEED] + [--daemon] [-d] [-v] Manga to eManga - m2em @@ -112,13 +138,14 @@ optional arguments: --download [DOWNLOAD [DOWNLOAD ...]] Downloads Chapter directly by chapter ID. Multiple IDs can be given + -p [PROCESS [PROCESS ...]], --process [PROCESS [PROCESS ...]] + Processes chapter(s) by chapter ID, Download, convert, + send. Multiple IDs can be given -a ACTION, --action ACTION Start action. Options are: rssparser (collecting feed data), downloader, converter or sender -ss SWITCH_SEND, --switch-send SWITCH_SEND Pass ID of User. Switches said user Send eBook status - -sc SWITCH_CHAPTER, --switch-chapter SWITCH_CHAPTER - Pass ID of Chapter. Switches said Chapter Sent status -dc DELETE_CHAPTER, --delete-chapter DELETE_CHAPTER Pass ID of Chapter. Deletes said Chapter -du DELETE_USER, --delete-user DELETE_USER @@ -147,7 +174,7 @@ For the sending to work, you need to have an email account so the program can se ``` [CONFIG] # Location relative to the code position -SaveLocation = comic/ +SaveLocation = data/ # Database name Database = main.db # Duration the program sleeps after one run is finished in seconds @@ -203,7 +230,6 @@ If you wish to disable/enable sending status of a user, use the -ss command ``` - ### A complete run with nothing happening: ``` Starting Loop at 2017-11-15 18:13:05 diff --git a/bin/Config.py b/bin/Config.py new file mode 100644 index 0000000..4607bae --- /dev/null +++ b/bin/Config.py @@ -0,0 +1,17 @@ + +import configparser +import os + +def load_config(location='config.ini'): + config = {} + config_reader = configparser.ConfigParser() + config_reader.optionxform = str + config_reader.read(location) + + for key, value in config_reader.items("CONFIG"): + if key in os.environ: + config[key] = os.environ[key] + else: + config[key] = value + + return config diff --git a/bin/Converter.py b/bin/Converter.py index 8460bbb..e0f5c24 100644 --- a/bin/Converter.py +++ b/bin/Converter.py @@ -3,6 +3,7 @@ import os import zipfile import subprocess +import bin.Config as Config import bin.Helper as helper @@ -22,9 +23,14 @@ def __init__(self): - def data_collector(self, config, chapter): + def data_collector(self, chapter): """ Method that collects data""" + # Load config right at the start + config = None + if not config: + config = Config.load_config() + # Load configs required here self.saveloc = config["SaveLocation"] self.ebformat = config["EbookFormat"] diff --git a/bin/ConverterHandler.py b/bin/ConverterHandler.py index 3cafb3c..665bed3 100644 --- a/bin/ConverterHandler.py +++ b/bin/ConverterHandler.py @@ -3,7 +3,7 @@ import bin.Helper as helper from bin.Converter import Converter -def ConverterHandler(config, args): +def ConverterHandler(args): """ Function that handles the Converter in a loop """ # Load Chapters! @@ -15,14 +15,14 @@ def ConverterHandler(config, args): # Verify if chapter has been downloaded already - if not helper.verifyDownload(config, chapter): + if not helper.verifyDownload(chapter): logging.debug("Manga %s has not been downloaded!", chapter.title) else: # Spawn an Converter Object & get basic data from database & config current_conversation = Converter() - current_conversation.data_collector(config, chapter) + current_conversation.data_collector(chapter) # Check if Download loop & Download task is selected if not args.start: @@ -41,7 +41,7 @@ def ConverterHandler(config, args): -def directConverter(config, chapterids=[]): +def directConverter(chapterids=[]): """ Function that handles direct calls of the Converter """ logging.debug("Following Chapters are directly converted:") @@ -57,14 +57,14 @@ def directConverter(config, chapterids=[]): for chapter in chapters: # Verify if chapter has been downloaded already - if not helper.verifyDownload(config, chapter): + if not helper.verifyDownload(chapter): logging.info("Manga %s has not been downloaded!", chapter[2]) else: # Spawn an Converter Object & get basic data from database & config current_conversation = Converter() - current_conversation.data_collector(config, chapter) + current_conversation.data_collector(chapter) if os.path.exists(current_conversation.cbzlocation): logging.info("Manga %s converted to CBZ already!", diff --git a/bin/Downloader.py b/bin/Downloader.py index 861a176..abdb6c4 100644 --- a/bin/Downloader.py +++ b/bin/Downloader.py @@ -2,9 +2,11 @@ import os import requests from shutil import move +import bin.Config as Config import bin.Helper as helper import bin.sourceparser.Mangastream as msparser import bin.sourceparser.Mangafox as mxparser +import bin.sourceparser.Cdmnet as cdmparser from PIL import Image from PIL import ImageOps from PIL import ImageFilter @@ -27,8 +29,12 @@ def __init__(self): - def data_collector(self, config, chapter): - + def data_collector(self, chapter): + # Load config right at the start + config = None + if not config: + config = Config.load_config() + # Load configs required here self.database = config["Database"] self.saveloc = config["SaveLocation"] @@ -66,72 +72,87 @@ def data_processor(self): if self.origin == "mangastream.com" or self.origin == "readms.net": urllist = msparser.getPagesUrl(self.mangastarturl,self.mangapages) + + # check if we have images to download + if not len(urllist) == 0: + + # Turn Manga pages into Image links! + logging.info("Starting download of %s...", self.mangatitle) + counter = 0 + for i in urllist: + counter = counter + 1 + self.downloader(i, counter, msparser.getImageUrl) - # Turn Manga pages into Image links! - for i in urllist: - self.imageurls.append(msparser.getImageUrl(i)) - logging.debug("List of all Images for %s", self.mangatitle) - logging.debug(self.imageurls) + # Finish :) + logging.info("Finished download of %s!", self.mangatitle) # Mangafox Parser - elif self.origin == "mangafox.me" or self.origin == "mangafox.la": + elif self.origin == "mangafox.me" or self.origin == "mangafox.la" or self.origin == "fanfox.net": urllist = mxparser.getPagesUrl(self.mangastarturl, self.mangapages) + + # check if we have images to download + if not len(urllist) == 0: - - # Turn Manga pages into Image links! - for i in urllist: - self.imageurls.append(mxparser.getImageUrl(i)) - logging.debug("List of all Images for %s", self.mangatitle) - logging.debug(self.imageurls) - - - + # Turn Manga pages into Image links! + logging.info("Starting download of %s...", self.mangatitle) + counter = 0 + for i in urllist: + counter = counter + 1 + self.downloader(i, counter, mxparser.getImageUrl) - def downloader(self): - logging.info("Starting download of %s...", self.mangatitle) - # Download & save images! - # check if we have images to download - if not len(self.imageurls) == 0: - - # Check if we have the Download folder - helper.createFolder(self.downloadfolder) - - # Start download Task - counter = 0 - for image in self.imageurls: - counter = counter + 1 - - imagepath = self.downloadfolder + "/" + str("{0:0=3d}".format(counter)) + ".png" - tempdl = self.downloadfolder + "/" + str("{0:0=3d}".format(counter)) + ".tmp" - - # Download the image! - f = open(tempdl, 'wb') - f.write(requests.get(image).content) - f.close() - - # If everything is alright, write image to final name - os.rename(tempdl, imagepath) + # Finish :) + logging.info("Finished download of %s!", self.mangatitle) + # CDM Parser + elif self.origin == "cdmnet.com.br": + urllist = cdmparser.getPagesUrl(self.mangastarturl, self.mangapages) + + # check if we have images to download + if not len(urllist) == 0: - # Cleanse image, remove footer - # - # I have borrowed this code from the kmanga project. - # https://github.com/aplanas/kmanga/blob/master/mobi/mobi.py#L416 - # Thanks a lot to Alberto Planas for coming up with it! - # - if self.origin == "mangafox.me" or self.origin == "mangafox.la": - logging.debug("Cleaning Mangafox Footer") - img = Image.open(imagepath) - _img = ImageOps.invert(img.convert(mode='L')) - _img = _img.point(lambda x: x and 255) - _img = _img.filter(ImageFilter.MinFilter(size=3)) - _img = _img.filter(ImageFilter.GaussianBlur(radius=5)) - _img = _img.point(lambda x: (x >= 48) and x) + # Turn Manga pages into Image links! + logging.info("Starting download of %s...", self.mangatitle) + counter = 0 + for i in urllist: + counter = counter + 1 + self.downloader(i, counter, cdmparser.getImageUrl) - cleaned = img.crop(_img.getbbox()) if _img.getbbox() else img - cleaned.save(imagepath) # Finish :) - logging.info("Finished download of %s!", self.mangatitle) \ No newline at end of file + logging.info("Finished download of %s!", self.mangatitle) + + def downloader(self, url, counter, parser): + # Check if we have the Download folder + helper.createFolder(self.downloadfolder) + + imagepath = self.downloadfolder + "/" + str("{0:0=3d}".format(counter)) + ".png" + tempdl = self.downloadfolder + "/" + str("{0:0=3d}".format(counter)) + ".tmp" + + # Download the image! + f = open(tempdl, 'wb') + f.write(requests.get(parser(url)).content) + f.close() + + # If everything is alright, write image to final name + os.rename(tempdl, imagepath) + + + # Cleanse image, remove footer + # + # I have borrowed this code from the kmanga project. + # https://github.com/aplanas/kmanga/blob/master/mobi/mobi.py#L416 + # Thanks a lot to Alberto Planas for coming up with it! + # + if self.origin == "mangafox.me" or self.origin == "mangafox.la" or self.origin == "fanfox.net": + logging.debug("Cleaning Mangafox Footer") + img = Image.open(imagepath) + _img = ImageOps.invert(img.convert(mode='L')) + _img = _img.point(lambda x: x and 255) + _img = _img.filter(ImageFilter.MinFilter(size=3)) + _img = _img.filter(ImageFilter.GaussianBlur(radius=5)) + _img = _img.point(lambda x: (x >= 48) and x) + + cleaned = img.crop(_img.getbbox()) if _img.getbbox() else img + cleaned.save(imagepath) diff --git a/bin/DownloaderHandler.py b/bin/DownloaderHandler.py index 0bc6a00..388b8bd 100644 --- a/bin/DownloaderHandler.py +++ b/bin/DownloaderHandler.py @@ -7,7 +7,7 @@ ''' downloadHandler ''' -def downloader(config, args): +def downloader(args): # Make the query chapters = helper.getChapters() @@ -22,7 +22,7 @@ def downloader(config, args): # Initialize Downloader class & load basic params current_chapter = Downloader() - current_chapter.data_collector(config,chapter) + current_chapter.data_collector(chapter) # Check if the old DL location is being used and fix it! @@ -36,25 +36,22 @@ def downloader(config, args): # Check if chapter needs to be downloaded - if helper.verifyDownload(config, chapter): + if helper.verifyDownload(chapter): logging.debug("Manga %s downloaded already!" % current_chapter.mangatitle) else: # Check if Download loop & Download task is selected if not args.start: current_chapter.data_processor() - current_chapter.downloader() else: - # Only start run if chapter is younger than 24h if helper.checkTime(current_chapter.chapterdate): current_chapter.data_processor() - current_chapter.downloader() else: logging.debug("%s is older than 24h, will not be processed by daemon." % current_chapter.mangatitle) -def directDownloader(config, chapterids=[]): +def directDownloader(chapterids=[]): logging.debug("Following Chapters are directly converted:") logging.debug(chapterids) @@ -63,7 +60,7 @@ def directDownloader(config, chapterids=[]): chapters = helper.getChaptersFromID(chapterids) # Load Users - users = helper.getUsers() + users = helper.getUsers() # Debug Users: logging.debug("Userlist:") @@ -78,7 +75,7 @@ def directDownloader(config, chapterids=[]): # Initialize Downloader class & load basic params current_chapter = Downloader() - current_chapter.data_collector(config, chapter) + current_chapter.data_collector(chapter) # Check if the old DL location is being used and fix it! oldlocation = str(current_chapter.saveloc + current_chapter.mangatitle) @@ -89,9 +86,8 @@ def directDownloader(config, chapterids=[]): move(oldlocation, newlocation) # Check if chapter needs to be downloaded - if helper.verifyDownload(config, chapter): + if helper.verifyDownload(chapter): logging.info("Manga %s downloaded already!" % current_chapter.mangatitle) else: current_chapter.data_processor() - current_chapter.downloader() diff --git a/bin/Helper.py b/bin/Helper.py index d0a313b..521f026 100644 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -7,9 +7,11 @@ import requests import validators from urllib.parse import urlparse +import bin.Config as Config from bin.Models import * import bin.sourceparser.Mangastream as msparser import bin.sourceparser.Mangafox as mxparser +import bin.sourceparser.Cdmnet as cdmparser ''' @@ -19,7 +21,10 @@ ''' - +# Load config right at the start +config = None +if not config: + config = Config.load_config() ''' @@ -36,7 +41,7 @@ def setIsSent(mangaid): try: # Open DB - db.get_conn() + db.connection() query = Chapter.update(issent=1).where(Chapter.chapterid == mangaid) query.execute() logging.debug("Set chapter with ID %s as sent", mangaid) @@ -53,7 +58,7 @@ def setIsSent(mangaid): def writeFeed(url): # Connect to DB - db.get_conn() + db.connection() # Insert Data feed = Feeds.create(url=url) @@ -76,7 +81,7 @@ def printFeeds(): table.header(["ID", "URL"]) # Connect - db.get_conn() + db.connection() for row in Feeds.select(): table.add_row([row.feedid, row.url]) @@ -102,7 +107,7 @@ def printUsers(): 't']) # text table.header(["ID", "USERNAME", "EMAIL", "KINDLE EMAIL", "SEND EBOOK"]) - db.get_conn() + db.connection() for user in User.select(): if user.sendtokindle == 1: sendstatus = "YES" @@ -121,7 +126,7 @@ def printUsers(): def printChaptersAll(): # Make the query - db.get_conn() + db.connection() chapters = Chapter.select().order_by(Chapter.chapterid) db.close() @@ -200,7 +205,7 @@ def createUser(): sendToKindle = "0" # Save data now! - db.get_conn() + db.connection() newuser = User.create(email=email, name=username, sendtokindle=sendToKindle, kindle_mail=kindlemail) try: @@ -220,7 +225,7 @@ def switchUserSend(userid): user = "" # Get User - db.get_conn() + db.connection() try: user = User.get(User.userid == userid) except DoesNotExist: @@ -254,7 +259,7 @@ def switchUserSend(userid): def deleteUser(userid): # Get User - db.get_conn() + db.connection() try: user = User.get(User.userid == userid) @@ -274,7 +279,7 @@ def deleteUser(userid): def deleteChapter(chapterid): # Get Chapter - db.get_conn() + db.connection() try: chapter = Chapter.get(Chapter.chapterid == chapterid) @@ -292,7 +297,7 @@ def deleteChapter(chapterid): def deleteFeed(feedid): # Get Feed - db.get_conn() + db.connection() try: feed = Feeds.get(Feeds.feedid == feedid) @@ -310,7 +315,7 @@ def deleteFeed(feedid): def printChapters(): # Make the query - db.get_conn() + db.connection() chapters = Chapter.select().order_by(-Chapter.chapterid).limit(10) db.close() @@ -344,7 +349,7 @@ def printChapters(): def getFeeds(): # Make the query - db.get_conn() + db.connection() feeds = Feeds.select() db.close() @@ -359,7 +364,7 @@ def getFeeds(): def getChapters(): # Make the query - db.get_conn() + db.connection() chapters = Chapter.select() return chapters @@ -373,7 +378,7 @@ def getChaptersFromID(chapterids): chapterdata = [] - db.get_conn() + db.connection() for i in chapterids: # Get Data @@ -398,7 +403,7 @@ def getChaptersFromID(chapterids): def getUsers(): # Make the query - db.get_conn() + db.connection() users = User.select() return users @@ -450,8 +455,8 @@ def getMangaData(url, entry): mangadata = [manganame, pages, chapter, title, chapter_name, chapter_pubDate] # Mangafox Parser - elif origin == "mangafox.me" or origin == "mangafox.la": - logging.debug("Getting Mangadata from Mangafox.me for %s" % url) + elif origin == "mangafox.me" or origin == "mangafox.la" or origin == "fanfox.net": + logging.debug("Getting Mangadata from Mangafox. for %s" % url) # Easy Stuff title = entry.title @@ -471,7 +476,27 @@ def getMangaData(url, entry): mangadata = [manganame, pages, chapter, title, chapter_name, chapter_pubDate] + # CDM Parser + elif origin == "cdmnet.com.br": + logging.debug("Getting Mangadata from CDM. for %s" % url) + + # Easy Stuff + title = entry.title + chapter_pubDate = entry.published + + # Load page once to hand it over to parser function + logging.debug("Loading Page to gather data...") + page = requests.get(url) + + # Getting the data + manganame = cdmparser.getTitle(page) + pages = cdmparser.getPages(page) + chapter = cdmparser.getChapter(url) + chapter_name = cdmparser.getChapterName(page) + logging.debug("Mangadata succesfully loaded") + + mangadata = [manganame, pages, chapter, title, chapter_name, chapter_pubDate] else: logging.error("Not supportet origin!") @@ -525,7 +550,7 @@ def checkTime(time): Verify if chapter has been downloaded Returns: true or false ''' -def verifyDownload(config, chapter): +def verifyDownload(chapter): saveloc = config["SaveLocation"] mangapages = chapter.pages diff --git a/bin/Models.py b/bin/Models.py index 67d2664..7726bb5 100644 --- a/bin/Models.py +++ b/bin/Models.py @@ -1,31 +1,27 @@ from peewee import * import configparser +import bin.Config as Config -config_reader = configparser.ConfigParser() -config_reader.read("config.ini") -config = config_reader["CONFIG"] +# Load config right at the start +config = Config.load_config() db = SqliteDatabase(config['Database']) -class BaseModel(Model): +class ModelBase(Model): class Meta: database = db -class User(BaseModel): +class User(ModelBase): email = TextField(null=True) name = TextField() kindle_mail = TextField(null=True) sendtokindle = IntegerField(null=True) - userid = PrimaryKeyField() + userid = AutoField() - class Meta: - order_by = ('userid',) - - -class Chapter(BaseModel): +class Chapter(ModelBase): chapter = TextField(null=True) - chapterid = PrimaryKeyField() + chapterid = AutoField() date = TextField(null=True) desc = TextField(null=True) isconverted = IntegerField(null=True) @@ -37,17 +33,10 @@ class Chapter(BaseModel): title = TextField() url = TextField() - class Meta: - order_by = ('chapterid',) - - -class Feeds(BaseModel): - feedid = PrimaryKeyField() +class Feeds(ModelBase): + feedid = AutoField() url = TextField() - class Meta: - order_by = ('feedid',) - def create_tables(): - db.get_conn() + db.connection() db.create_tables([User, Chapter, Feeds]) diff --git a/bin/ModelsBackup.py b/bin/ModelsBackup.py deleted file mode 100644 index cae074f..0000000 --- a/bin/ModelsBackup.py +++ /dev/null @@ -1,53 +0,0 @@ -from peewee import * -import configparser - - -config_reader = configparser.ConfigParser() -config_reader.read("config.ini") -config = config_reader["CONFIG"] - -db = SqliteDatabase(config['Database']) - -class BaseModel(Model): - class Meta: - database = db - -class Chapter(BaseModel): - chapter = TextField(null=True) - chapterid = PrimaryKeyField() - date = TextField(null=True) - desc = TextField(null=True) - isconverted = IntegerField(null=True) - ispulled = IntegerField(null=True) - issent = IntegerField(null=True) - manganame = TextField(null=True) - origin = TextField(null=True) - pages = IntegerField(null=True) - title = TextField() - url = TextField() - - class Meta: - db_table = 'chapter' - -class Feeds(BaseModel): - feedid = PrimaryKeyField() - url = TextField() - - class Meta: - db_table = 'feeds' - -class User(BaseModel): - email = TextField(db_column='Email', null=True) - name = TextField(db_column='Name') - kindle_mail = TextField(null=True) - sendtokindle = IntegerField(db_column='sendToKindle', null=True) - userid = PrimaryKeyField() - - class Meta: - db_table = 'user' - -def create_tables(): - db.connect() - db.create_tables([User, Chapter, Feeds]) - - diff --git a/bin/RssParser.py b/bin/RssParser.py index c716cc9..f887729 100644 --- a/bin/RssParser.py +++ b/bin/RssParser.py @@ -9,11 +9,11 @@ ssl._create_default_https_context = ssl._create_unverified_context -def RssParser(config): +def RssParser(): """ Function that handles the coordination of rss parsing """ # Get all feeds - db.get_conn() + db.connection() rssdata = Feeds.select().execute() logging.info("Checking for new Feed Data...") diff --git a/bin/Sender.py b/bin/Sender.py index c4e37d3..9447d1d 100644 --- a/bin/Sender.py +++ b/bin/Sender.py @@ -7,6 +7,7 @@ from email.utils import formatdate, make_msgid from email.generator import Generator from email import encoders +import bin.Config as Config import bin.Helper as helper try: @@ -40,16 +41,21 @@ def __init__(self): - def data_collector(self, config, chapter): + def data_collector(self, chapter): """ Method that gathers data required for this class """ + # Load config right at the start + config = None + if not config: + config = Config.load_config() + # Load configs required here self.saveloc = config["SaveLocation"] self.ebformat = config["EbookFormat"] self.smtpserver = config["SMTPServer"] self.serverport = config["ServerPort"] - self.emailadress = config["EmailAdress"] - self.password = config["EmailAdressPw"] + self.emailadress = config["EmailAddress"] + self.password = config["EmailAddressPw"] self.starttls = config["ServerStartSSL"] diff --git a/bin/SenderHandler.py b/bin/SenderHandler.py index df1cc0b..9fde5e5 100644 --- a/bin/SenderHandler.py +++ b/bin/SenderHandler.py @@ -8,7 +8,7 @@ except ImportError: from io import StringIO -def SenderHandler(config, args): +def SenderHandler(args): """ Function that handles the sending of ebooks when a loop is called """ # Get all Chapters @@ -28,7 +28,7 @@ def SenderHandler(config, args): # Initiate Sender class and fill it with data current_sender = Sender() - current_sender.data_collector(config, chapter) + current_sender.data_collector(chapter) current_sender.users = users # Check if ebook has been converted yet, else skip @@ -56,7 +56,7 @@ def SenderHandler(config, args): current_sender.mangatitle) -def directSender(config, chapterids=[]): +def directSender(chapterids=[]): """ Function that handles the coordination of directly sending ebooks """ logging.debug("Following Chapters are directly sent:") @@ -81,7 +81,7 @@ def directSender(config, chapterids=[]): # Initiate Sender class and fill it with data current_sender = Sender() - current_sender.data_collector(config, chapter) + current_sender.data_collector(chapter) current_sender.users = users # Check if ebook has been converted yet, else skip diff --git a/bin/_version.py b/bin/_version.py index d123ede..1c7d256 100644 --- a/bin/_version.py +++ b/bin/_version.py @@ -1 +1 @@ -__version__ = "v0.4.0" +__version__ = "v0.5.0" diff --git a/bin/models/Manga.py b/bin/models/Manga.py index 418e5ca..a17bad5 100644 --- a/bin/models/Manga.py +++ b/bin/models/Manga.py @@ -26,7 +26,7 @@ def load_from_feed(self, entry, parent_feed): # Check if link is already in DB to make sure only data gets downloaded that is not yet downloaded logging.debug("Checking if chapter is already saved...") - db.get_conn() + db.connection() self.duplicated = Chapter.select().where(Chapter.url==self.chapter_link) if self.duplicated.exists(): @@ -68,7 +68,7 @@ def save(self): logging.debug("Manga is already in Database! Skipping...") else: logging.info("Saving Chapter Data for %s", self.title) - db.get_conn() + db.connection() chapter = Chapter() chapter.chapter = self.chapter chapter.date = self.chapter_pubDate diff --git a/bin/sourceparser/Cdmnet.py b/bin/sourceparser/Cdmnet.py new file mode 100644 index 0000000..515b2e4 --- /dev/null +++ b/bin/sourceparser/Cdmnet.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +import logging +import re +import requests +from urllib.parse import urlparse +from bs4 import BeautifulSoup + +''' + + CDM Parser + + +''' + + +''' +get Manga Title +Returns: title +''' +def getTitle(page): + title = None + soup = BeautifulSoup(page.content, 'html.parser') + + #Get Manga Titel + search = re.search('', str(soup)) + try: + title = search.group(1) + except AttributeError: + logging.error("No Title Fount!") + + return title + + +''' +get Manga Chapter name +Returns: Chapter name +''' +def getChapterName(page): + + logging.debug("CDM has no Chapternames") + chaptername = "" + return chaptername + + +''' +get Manga Pages +Returns: integer pages +''' +def getPages(page): + soup = BeautifulSoup(page.content, 'html.parser') + + #Get Manga Titel + search =re.search("var pages = \[.*'(.*?)',];", str(soup)) + pages = search.group(1) + return pages + + + +''' +get Manga chapter +Returns: integer chapter +''' +def getChapter(url): + #soup = BeautifulSoup(page.content, 'html.parser') + + search = re.search('ler-online/(.*?)\Z', str(url)) + chapter = search.group(1) + return chapter + +''' +get Manga Pages URL +Returns: urllist +''' +def getPagesUrl(starturl,pages): + pagesurllist=[] + + # Split URL to create list + parsed = urlparse(starturl) + + # start url generator + for page in range(pages): + page = page + 1 + fullurl = parsed.scheme + "://" + parsed.netloc + parsed.path + "#" + str(page) + pagesurllist.append(fullurl) + + logging.debug("All pages:") + logging.debug(pagesurllist) + return pagesurllist + + + +''' +get Manga Image URL +Returns: urllist +''' +def getImageUrl(pageurl): + # Download Page + page = requests.get(pageurl) + soup = BeautifulSoup(page.content, 'html.parser') + + # Get CDN URL suffix + search =re.search("var urlSulfix = '(.*?)';", str(soup)) + cdnsuffix = search.group(1) + + # Get pagenumber + var = re.search('ler-online/.*?#(.*?)\Z', str(pageurl)) + pagenumber = var.group(1).zfill(2) + + + imageurl = str(cdnsuffix + pagenumber + ".jpg") + return imageurl diff --git a/bin/sourceparser/Mangafox.py b/bin/sourceparser/Mangafox.py index e029a0a..2257d5d 100644 --- a/bin/sourceparser/Mangafox.py +++ b/bin/sourceparser/Mangafox.py @@ -39,7 +39,11 @@ def getChapterName(page): #Get Manga Titel search = re.search(': (.*?) at MangaFox', str(soup)) - chaptername = search.group(1) + try: + chaptername = search.group(1) + except AttributeError: + logging.debug("No Chapter name provided") + chaptername = "" return chaptername @@ -65,7 +69,7 @@ def getChapter(url): #soup = BeautifulSoup(page.content, 'html.parser') #Get Manga Titel - search =re.search('/c(.*?)/', str(url)) + search = re.search('/c(.*?)/', str(url)) chapter = search.group(1) return chapter @@ -111,4 +115,3 @@ def getImageUrl(pageurl): imageurl = var1['src'] return imageurl - pass \ No newline at end of file diff --git a/bin/sourceparser/Mangastream.py b/bin/sourceparser/Mangastream.py index d36fe2f..554fc04 100644 --- a/bin/sourceparser/Mangastream.py +++ b/bin/sourceparser/Mangastream.py @@ -99,4 +99,3 @@ def getImageUrl(pageurl): imageurl = "https:" + var1['src'] return imageurl - pass \ No newline at end of file diff --git a/config.ini b/config.ini index 056585f..698a0ad 100644 --- a/config.ini +++ b/config.ini @@ -1,12 +1,12 @@ [CONFIG] -SaveLocation = comic/ -Database = main.db +SaveLocation = data/ +Database = data/main.db Sleep = 900 EbookFormat = MOBI EbookProfile = KV # Sender Email Server Settings SMTPServer = mail.example.com ServerPort = 587 -EmailAdress = comic@example.com -EmailAdressPw = yourpassword -ServerStartSSL = True \ No newline at end of file +EmailAddress = comic@example.com +EmailAddressPw = yourpassword +ServerStartSSL = True diff --git a/m2em.py b/m2em.py index b49ccbb..499520f 100755 --- a/m2em.py +++ b/m2em.py @@ -4,11 +4,11 @@ import logging import time import argparse -import configparser import datetime import validators from bin._version import __version__ # Start of the fun! +import bin.Config as Config import bin.Helper as helper import bin.RssParser as mparser import bin.DownloaderHandler as mdownloader @@ -34,10 +34,13 @@ def __init__(self): # Load config right at the start self.config = None if not self.config: - self.read_config() + self.config = Config.load_config() + logging.debug("Loaded Config:") + logging.debug(self.config) # Check if Database exists, else create if not os.path.isfile(self.config["Database"]): + helper.createFolder(self.config["SaveLocation"]) helper.createDB() @@ -74,6 +77,9 @@ def read_arguments(self): parser.add_argument("--download", help="Downloads Chapter directly by chapter ID. Multiple IDs can be given", default=[], nargs='*',) + parser.add_argument("-p","--process", + help="Processes chapter(s) by chapter ID, Download, convert, send. Multiple IDs can be given", + default=[], nargs='*',) parser.add_argument("-a", "--action", help="Start action. Options are: rssparser (collecting feed data), downloader, converter or sender ") parser.add_argument("-ss", "--switch-send", @@ -120,25 +126,13 @@ def read_arguments(self): self.args.download, self.args.convert, self.args.send, + self.args.process, self.args.start,]): logging.error("At least one argument is required!") logging.debug("Passed arguments: \n %s", self.args) - def read_config(self): - """ Reads the config """ - - logging.debug("Loading configuration") - config_reader = configparser.ConfigParser() - config_reader.read("config.ini") - self.config = config_reader["CONFIG"] - - logging.debug("Loaded Config:") - logging.debug(self.config) - - - ''' Catch -af/--add-feed ''' @@ -258,35 +252,40 @@ def start_action(self): direct callers ''' def send_chapter(self): - msender.directSender(self.config, self.args.send) + msender.directSender(self.args.send) def convert_chapter(self): - mconverter.directConverter(self.config, self.args.convert) + mconverter.directConverter(self.args.convert) def download_chapter(self): - mdownloader.directDownloader(self.config, self.args.download) + mdownloader.directDownloader(self.args.download) + + def process_chapter(self): + mdownloader.directDownloader(self.args.process) + mconverter.directConverter(self.args.process) + msender.directSender(self.args.process) ''' This are the worker, one round ''' # Worker to get and parse rss feeds def parse_add_feeds(self): - mparser.RssParser(self.config) + mparser.RssParser() # Worker to fetch all images def images_fetcher(self): - mdownloader.downloader(self.config, self.args) + mdownloader.downloader(self.args) # Worker to convert all downloaded chapters into ebooks def image_converter(self): - mconverter.ConverterHandler(self.config, self.args) + mconverter.ConverterHandler(self.args) # Worker to convert all downloaded chapters into ebooks def send_ebooks(self): - msender.SenderHandler(self.config, self.args) + msender.SenderHandler(self.args) @@ -361,6 +360,9 @@ def run(self): self.convert_chapter() return + if self.args.process: + self.process_chapter() + return # Mainloop if self.args.start: diff --git a/requirements.txt b/requirements.txt index 1745687..bcede23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ validators==0.12.0 texttable==1.1.1 -requests==2.18.4 +requests==2.20.0 bs4==0.0.1 urllib3==1.22 feedparser==5.2.1 KindleComicConverter==5.4.3 -peewee==2.10.2 +peewee==3.7.0