ying-ck · sstzer · Nov 2, 2024 · Nov 2, 2024 · Nov 3, 2024 · Nov 3, 2024
diff --git a/src/cookie.py b/src/cookie.py
@@ -0,0 +1,46 @@
+import json, os, time, random
+
+import src.down.download
+from src import settings
+
+
+def _get_testid(headers: dict) -> str:
+    """Get an initial chapter ID for cookie testing"""
+    test_novel_id = 7143038691944959011  # Example novel ID
+    chapters = src.down.download.chapter_list(headers, test_novel_id)
+    if chapters and len(chapters[1]) > 21:
+        return str(random.choice(list(chapters[1].values())[21:]))
+    raise Exception("Failed to get initial chapter ID")
+
+def _test(self, chapter_id: str, cookie: str) -> bool:
+    """Test if cookie is valid"""
+    self.cookie = cookie
+    if len(src.down.download.chapter_content(self, chapter_id, test_mode=True)) > 200:
+        return True
+    return False
+
+def init(self):
+    """Initialize cookie for downloads"""
+    self.log_callback('正在获取cookie')
+    tzj = _get_testid(settings.headers)
+
+    if os.path.exists(settings.cookie_path):
+        with open(settings.cookie_path, 'r', encoding='UTF-8') as f:
+            self.cookie = json.load(f)
+            if not _test(self, tzj, self.cookie):
+                get(self, tzj)
+    else:
+        get(self, tzj)
+
+    self.log_callback('Cookie获取成功')
+
+def get(self, chapter_id: str):
+    """Generate new cookie"""
+    bas = 1000000000000000000
+    for i in range(random.randint(bas * 6, bas * 8), bas * 9):
+        time.sleep(random.randint(50, 150) / 1000)
+        self.cookie = f'novel_web_id={i}'
+        if len(src.down.download.chapter_content(self, chapter_id, test_mode=True)) > 200:
+            with open(settings.cookie_path, 'w', encoding='UTF-8') as f:
+                json.dump(self.cookie, f)
+            return
diff --git a/src/down/__init__.py b/src/down/__init__.py
@@ -0,0 +1,4 @@
+from .txt import txt
+from .epub import depub as epub
+from .latex import latex
+from .html import html
diff --git a/src/down/download.py b/src/down/download.py
@@ -0,0 +1,152 @@
+import json
+import time, random
+
+import requests as req
+from lxml import etree
+
+from src import utils, cookie
+from src.utils import decode_content
+from src import settings
+
+def chapter(self, title: str, chapter_id: str, existing_content: dict) -> str|None:
+    """Download a single chapter with retries"""
+    if title in existing_content:
+        self.zj[title] = existing_content[title]  # Add this
+        return existing_content[title]
+
+    self.log_callback(f'下载章节: {title}')
+    retries = 3
+    last_error = None
+
+    while retries > 0:
+        try:
+            content = chapter_content(self, chapter_id)
+            if content == 'err':  # Add this check
+                raise Exception('Download failed')
+
+            time.sleep(random.randint(
+                self.config.delay[0],
+                self.config.delay[1]
+            ) / 1000)
+
+            # Handle cookie refresh
+            if content == 'err':
+                self.tcs += 1
+                if self.tcs > 7:
+                    self.tcs = 0
+                    cookie.get(self,self.tzj)
+                continue  # Try again with new cookie
+
+            # Save progress periodically
+            self.cs += 1
+            if self.cs >= 5:
+                self.cs = 0
+                utils.save_progress(title, content, self.zj, self.book_json_path)
+
+            self.zj[title] = content  # Add this
+            return content
+
+        except Exception as e:
+            last_error = e
+            retries -= 1
+            if retries == 0:
+                self.log_callback(f'下载失败 {title}: {str(e)}')
+                break
+            time.sleep(1)
+
+    if last_error:
+        raise last_error
+    return None
+
+
+def chapter_list(headers:dict, novel_id: int) -> tuple:
+        """Get novel info and chapter list"""
+        url = f'https://fanqienovel.com/page/{novel_id}'
+        response = req.get(url, headers=headers)
+        ele = etree.HTML(response.text)
+
+        chapters = {}
+        a_elements = ele.xpath('//div[@class="chapter"]/div/a')
+        if not a_elements:  # Add this check
+            return 'err', {}, []
+
+        for a in a_elements:
+            href = a.xpath('@href')
+            if not href:  # Add this check
+                continue
+            chapters[a.text] = href[0].split('/')[-1]
+
+        title = ele.xpath('//h1/text()')
+        status = ele.xpath('//span[@class="info-label-yellow"]/text()')
+
+        if not title or not status:  # Check both title and status
+            return 'err', {}, []
+
+        return title[0], chapters, status
+
+
+def chapter_content(self, chapter_id: str, test_mode: bool = False) -> str:
+    """Download content with fallback and better error handling"""
+    headers = settings.headers.copy()
+    headers['cookie'] = self.cookie
+
+    for attempt in range(3):
+        try:
+            # Try primary method
+            response = req.get(
+                f'https://fanqienovel.com/reader/{chapter_id}',
+                headers=headers,
+                timeout=10
+            )
+            response.raise_for_status()
+
+            content = '\n'.join(
+                etree.HTML(response.text).xpath(
+                    '//div[@class="muye-reader-content noselect"]//p/text()'
+                )
+            )
+
+            if test_mode:
+                return content
+
+            try:
+                return decode_content(content)
+            except:
+                # Try alternative decoding mode
+                try:
+                    return decode_content(content, mode=1)
+                except:
+                    # Fallback HTML processing
+                    content = content[6:]
+                    tmp = 1
+                    result = ''
+                    for i in content:
+                        if i == '<':
+                            tmp += 1
+                        elif i == '>':
+                            tmp -= 1
+                        elif tmp == 0:
+                            result += i
+                        elif tmp == 1 and i == 'p':
+                            result = (result + '\n').replace('\n\n', '\n')
+                    return result
+
+        except Exception as e:
+            # Try alternative API endpoint
+            try:
+                response = req.get(
+                    f'https://fanqienovel.com/api/reader/full?itemId={chapter_id}',
+                    headers=headers
+                )
+                content = json.loads(response.text)['data']['chapterData']['content']
+
+                if test_mode:
+                    return content
+
+                return decode_content(content)
+            except:
+                if attempt == 2:  # Last attempt
+                    if test_mode:
+                        return 'err'
+                    raise Exception(f"Download failed after 3 attempts: {str(e)}")
+                time.sleep(1)
diff --git a/src/down/epub.py b/src/down/epub.py
@@ -0,0 +1,98 @@
+import concurrent.futures, os
+from tqdm import tqdm
+
+from src import utils, format, settings
+from src.down import download
+from ebooklib import epub
+
+def depub(self, novel_id: int) -> str:
+    """Download novel in EPUB format"""
+    try:
+        name, chapters, status = download.chapter_list(settings.headers, novel_id)
+        if name == 'err':
+            return 'err'
+
+        safe_name = utils.sanitize_filename(name)
+        self.log_callback(f'\n开始下载《{name}》，状态：{status[0]}')
+
+        # Create EPUB book
+        book = epub.EpubBook()
+        book.set_title(name)
+        book.set_language('zh')
+
+        # Get author info and cover
+        if author:= utils.get_author_info(self, novel_id):
+            book.add_author(author)
+        if cover_url:= format.epub.get_cover_url(self, novel_id):
+            format.epub.add_cover(self, book, cover_url)
+
+        total_chapters = len(chapters)
+        completed_chapters = 0
+
+        # Download chapters with progress tracking
+        epub_chapters = []
+        with tqdm(total=total_chapters, desc='下载进度') as pbar:
+            with concurrent.futures.ThreadPoolExecutor(max_workers=self.config.xc) as executor:
+                future_to_chapter = {
+                    executor.submit(
+                        _download_chapter_for_epub,
+                        self,
+                        title,
+                        chapter_id
+                    ): title
+                    for title, chapter_id in chapters.items()
+                }
+
+                for future in concurrent.futures.as_completed(future_to_chapter):
+                    chapter_title = future_to_chapter[future]
+                    try:
+                        epub_chapter = future.result()
+                        if epub_chapter:
+                            epub_chapters.append(epub_chapter)
+                            book.add_item(epub_chapter)
+                    except Exception as e:
+                        self.log_callback(f'下载章节失败 {chapter_title}: {str(e)}')
+
+                    completed_chapters += 1
+                    pbar.update(1)
+                    self.progress_callback(
+                        completed_chapters,
+                        total_chapters,
+                        '下载进度',
+                        chapter_title
+                    )
+
+        # Add navigation
+        book.toc = epub_chapters
+        book.spine = ['nav'] + epub_chapters
+        book.add_item(epub.EpubNcx())
+        book.add_item(epub.EpubNav())
+
+        # Save EPUB file
+        epub_path = os.path.join(self.config.save_path, f'{safe_name}.epub')
+        epub.write_epub(epub_path, book)
+        return 's'
+
+    finally:
+        if 'completed_chapters' in locals() and 'total_chapters' in locals():
+            if completed_chapters < total_chapters:
+                self.progress_callback(total_chapters, total_chapters, '下载完成')
+
+def _download_chapter_for_epub(self, title: str, chapter_id: str) -> epub.EpubHtml | None:
+    """Download and format chapter for EPUB"""
+    content = download.chapter(self, title, chapter_id, {})
+    if not content:
+        return None
+
+    chapter = epub.EpubHtml(
+        title=title,
+        file_name=f'chapter_{chapter_id}.xhtml',
+        lang='zh'
+    )
+
+    formatted_content = content.replace(
+        '\n',
+        f'\n{self.config.kgf * self.config.kg}'
+    )
+    chapter.content = f'<h1>{title}</h1><p>{formatted_content}</p>'
+    return chapter
diff --git a/src/down/html.py b/src/down/html.py
@@ -0,0 +1,81 @@
+import concurrent.futures, os
+from tqdm import tqdm
+
+from src import utils, format, settings
+from src.down import download
+
+
+
+def html(self, novel_id: int) -> str:
+    """Download novel in HTML format"""
+    try:
+        name, chapters, status = download.chapter_list(settings.headers, novel_id)
+        if name == 'err':
+            return 'err'
+
+        safe_name = utils.sanitize_filename(name)
+        html_dir = os.path.join(self.config.save_path, f"{safe_name}(html)")
+        os.makedirs(html_dir, exist_ok=True)
+
+        self.log_callback(f'\n开始下载《{name}》，状态：{status[0]}')
+
+        # Create index.html
+        toc_content = format.html.index(name, chapters)
+        with open(os.path.join(html_dir, "index.html"), "w", encoding='UTF-8') as f:
+            f.write(toc_content)
+
+        total_chapters = len(chapters)
+        completed_chapters = 0
+
+        # Download chapters with progress tracking
+        with tqdm(total=total_chapters, desc='下载进度') as pbar:
+            with concurrent.futures.ThreadPoolExecutor(max_workers=self.config.xc) as executor:
+                future_to_chapter = {
+                    executor.submit(
+                        _download_chapter_for_html,
+                        self,
+                        title,
+                        chapter_id,
+                        html_dir,
+                        list(chapters.keys())
+                    ): title
+                    for title, chapter_id in chapters.items()
+                }
+
+                for future in concurrent.futures.as_completed(future_to_chapter):
+                    chapter_title = future_to_chapter[future]
+                    try:
+                        future.result()
+                    except Exception as e:
+                        self.log_callback(f'下载章节失败 {chapter_title}: {str(e)}')
+
+                    completed_chapters += 1
+                    pbar.update(1)
+                    self.progress_callback(
+                        completed_chapters,
+                        total_chapters,
+                        '下载进度',
+                        chapter_title
+                    )
+
+        return 's'
+
+    finally:
+        if 'completed_chapters' in locals() and 'total_chapters' in locals():
+            if completed_chapters < total_chapters:
+                self.progress_callback(total_chapters, total_chapters, '下载完成')
+
+def _download_chapter_for_html(self, title: str, chapter_id: str, output_dir: str, all_titles: list[str]) -> None:
+    """Download and format chapter for HTML"""
+    content = download.chapter(self, title, chapter_id, {})
+    if not content:
+        return
+
+    current_index = all_titles.index(title)
+    prev_link = f'<a href="{utils.sanitize_filename(all_titles[current_index-1])}.html">上一章</a>' if current_index > 0 else ''
+    next_link = f'<a href="{utils.sanitize_filename(all_titles[current_index+1])}.html">下一章</a>' if current_index < len(all_titles)-1 else ''
+
+    html_content = format.html.content(title, content, prev_link, next_link, self.config.kgf * self.config.kg)
+
+    with open(os.path.join(output_dir, f"{utils.sanitize_filename(title)}.html"), "w", encoding='UTF-8') as f:
+        f.write(html_content)