Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

开始拆代码了 #69

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
46 changes: 46 additions & 0 deletions src/cookie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import json, os, time, random

import src.down.download
from src import settings


def _get_testid(headers: dict) -> str:
"""Get an initial chapter ID for cookie testing"""
test_novel_id = 7143038691944959011 # Example novel ID
chapters = src.down.download.chapter_list(headers, test_novel_id)
if chapters and len(chapters[1]) > 21:
return str(random.choice(list(chapters[1].values())[21:]))
raise Exception("Failed to get initial chapter ID")

def _test(self, chapter_id: str, cookie: str) -> bool:
"""Test if cookie is valid"""
self.cookie = cookie
if len(src.down.download.chapter_content(self, chapter_id, test_mode=True)) > 200:
return True
return False

def init(self):
"""Initialize cookie for downloads"""
self.log_callback('正在获取cookie')
tzj = _get_testid(settings.headers)

if os.path.exists(settings.cookie_path):
with open(settings.cookie_path, 'r', encoding='UTF-8') as f:
self.cookie = json.load(f)
if not _test(self, tzj, self.cookie):
get(self, tzj)
else:
get(self, tzj)

self.log_callback('Cookie获取成功')

def get(self, chapter_id: str):
"""Generate new cookie"""
bas = 1000000000000000000
for i in range(random.randint(bas * 6, bas * 8), bas * 9):
time.sleep(random.randint(50, 150) / 1000)
self.cookie = f'novel_web_id={i}'
if len(src.down.download.chapter_content(self, chapter_id, test_mode=True)) > 200:
with open(settings.cookie_path, 'w', encoding='UTF-8') as f:
json.dump(self.cookie, f)
return
4 changes: 4 additions & 0 deletions src/down/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .txt import txt
from .epub import depub as epub
from .latex import latex
from .html import html
152 changes: 152 additions & 0 deletions src/down/download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import json
import time, random

import requests as req
from lxml import etree

from src import utils, cookie
from src.utils import decode_content
from src import settings

def chapter(self, title: str, chapter_id: str, existing_content: dict) -> str|None:
"""Download a single chapter with retries"""
if title in existing_content:
self.zj[title] = existing_content[title] # Add this
return existing_content[title]

self.log_callback(f'下载章节: {title}')
retries = 3
last_error = None

while retries > 0:
try:
content = chapter_content(self, chapter_id)
if content == 'err': # Add this check
raise Exception('Download failed')

time.sleep(random.randint(
self.config.delay[0],
self.config.delay[1]
) / 1000)

# Handle cookie refresh
if content == 'err':
self.tcs += 1
if self.tcs > 7:
self.tcs = 0
cookie.get(self,self.tzj)
continue # Try again with new cookie

# Save progress periodically
self.cs += 1
if self.cs >= 5:
self.cs = 0
utils.save_progress(title, content, self.zj, self.book_json_path)

self.zj[title] = content # Add this
return content

except Exception as e:
last_error = e
retries -= 1
if retries == 0:
self.log_callback(f'下载失败 {title}: {str(e)}')
break
time.sleep(1)

if last_error:
raise last_error
return None


def chapter_list(headers:dict, novel_id: int) -> tuple:
"""Get novel info and chapter list"""
url = f'https://fanqienovel.com/page/{novel_id}'
response = req.get(url, headers=headers)
ele = etree.HTML(response.text)

chapters = {}
a_elements = ele.xpath('//div[@class="chapter"]/div/a')
if not a_elements: # Add this check
return 'err', {}, []

for a in a_elements:
href = a.xpath('@href')
if not href: # Add this check
continue
chapters[a.text] = href[0].split('/')[-1]

title = ele.xpath('//h1/text()')
status = ele.xpath('//span[@class="info-label-yellow"]/text()')

if not title or not status: # Check both title and status
return 'err', {}, []

return title[0], chapters, status


def chapter_content(self, chapter_id: str, test_mode: bool = False) -> str:
"""Download content with fallback and better error handling"""
headers = settings.headers.copy()
headers['cookie'] = self.cookie

for attempt in range(3):
try:
# Try primary method
response = req.get(
f'https://fanqienovel.com/reader/{chapter_id}',
headers=headers,
timeout=10
)
response.raise_for_status()

content = '\n'.join(
etree.HTML(response.text).xpath(
'//div[@class="muye-reader-content noselect"]//p/text()'
)
)

if test_mode:
return content

try:
return decode_content(content)
except:
# Try alternative decoding mode
try:
return decode_content(content, mode=1)
except:
# Fallback HTML processing
content = content[6:]
tmp = 1
result = ''
for i in content:
if i == '<':
tmp += 1
elif i == '>':
tmp -= 1
elif tmp == 0:
result += i
elif tmp == 1 and i == 'p':
result = (result + '\n').replace('\n\n', '\n')
return result

except Exception as e:
# Try alternative API endpoint
try:
response = req.get(
f'https://fanqienovel.com/api/reader/full?itemId={chapter_id}',
headers=headers
)
content = json.loads(response.text)['data']['chapterData']['content']

if test_mode:
return content

return decode_content(content)
except:
if attempt == 2: # Last attempt
if test_mode:
return 'err'
raise Exception(f"Download failed after 3 attempts: {str(e)}")
time.sleep(1)
98 changes: 98 additions & 0 deletions src/down/epub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import concurrent.futures, os
from tqdm import tqdm

from src import utils, format, settings
from src.down import download
from ebooklib import epub

def depub(self, novel_id: int) -> str:
"""Download novel in EPUB format"""
try:
name, chapters, status = download.chapter_list(settings.headers, novel_id)
if name == 'err':
return 'err'

safe_name = utils.sanitize_filename(name)
self.log_callback(f'\n开始下载《{name}》,状态:{status[0]}')

# Create EPUB book
book = epub.EpubBook()
book.set_title(name)
book.set_language('zh')

# Get author info and cover
if author:= utils.get_author_info(self, novel_id):
book.add_author(author)
if cover_url:= format.epub.get_cover_url(self, novel_id):
format.epub.add_cover(self, book, cover_url)

total_chapters = len(chapters)
completed_chapters = 0

# Download chapters with progress tracking
epub_chapters = []
with tqdm(total=total_chapters, desc='下载进度') as pbar:
with concurrent.futures.ThreadPoolExecutor(max_workers=self.config.xc) as executor:
future_to_chapter = {
executor.submit(
_download_chapter_for_epub,
self,
title,
chapter_id
): title
for title, chapter_id in chapters.items()
}

for future in concurrent.futures.as_completed(future_to_chapter):
chapter_title = future_to_chapter[future]
try:
epub_chapter = future.result()
if epub_chapter:
epub_chapters.append(epub_chapter)
book.add_item(epub_chapter)
except Exception as e:
self.log_callback(f'下载章节失败 {chapter_title}: {str(e)}')

completed_chapters += 1
pbar.update(1)
self.progress_callback(
completed_chapters,
total_chapters,
'下载进度',
chapter_title
)

# Add navigation
book.toc = epub_chapters
book.spine = ['nav'] + epub_chapters
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())

# Save EPUB file
epub_path = os.path.join(self.config.save_path, f'{safe_name}.epub')
epub.write_epub(epub_path, book)
return 's'

finally:
if 'completed_chapters' in locals() and 'total_chapters' in locals():
if completed_chapters < total_chapters:
self.progress_callback(total_chapters, total_chapters, '下载完成')

def _download_chapter_for_epub(self, title: str, chapter_id: str) -> epub.EpubHtml | None:
"""Download and format chapter for EPUB"""
content = download.chapter(self, title, chapter_id, {})
if not content:
return None

chapter = epub.EpubHtml(
title=title,
file_name=f'chapter_{chapter_id}.xhtml',
lang='zh'
)

formatted_content = content.replace(
'\n',
f'\n{self.config.kgf * self.config.kg}'
)
chapter.content = f'<h1>{title}</h1><p>{formatted_content}</p>'
return chapter
81 changes: 81 additions & 0 deletions src/down/html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import concurrent.futures, os
from tqdm import tqdm

from src import utils, format, settings
from src.down import download



def html(self, novel_id: int) -> str:
"""Download novel in HTML format"""
try:
name, chapters, status = download.chapter_list(settings.headers, novel_id)
if name == 'err':
return 'err'

safe_name = utils.sanitize_filename(name)
html_dir = os.path.join(self.config.save_path, f"{safe_name}(html)")
os.makedirs(html_dir, exist_ok=True)

self.log_callback(f'\n开始下载《{name}》,状态:{status[0]}')

# Create index.html
toc_content = format.html.index(name, chapters)
with open(os.path.join(html_dir, "index.html"), "w", encoding='UTF-8') as f:
f.write(toc_content)

total_chapters = len(chapters)
completed_chapters = 0

# Download chapters with progress tracking
with tqdm(total=total_chapters, desc='下载进度') as pbar:
with concurrent.futures.ThreadPoolExecutor(max_workers=self.config.xc) as executor:
future_to_chapter = {
executor.submit(
_download_chapter_for_html,
self,
title,
chapter_id,
html_dir,
list(chapters.keys())
): title
for title, chapter_id in chapters.items()
}

for future in concurrent.futures.as_completed(future_to_chapter):
chapter_title = future_to_chapter[future]
try:
future.result()
except Exception as e:
self.log_callback(f'下载章节失败 {chapter_title}: {str(e)}')

completed_chapters += 1
pbar.update(1)
self.progress_callback(
completed_chapters,
total_chapters,
'下载进度',
chapter_title
)

return 's'

finally:
if 'completed_chapters' in locals() and 'total_chapters' in locals():
if completed_chapters < total_chapters:
self.progress_callback(total_chapters, total_chapters, '下载完成')

def _download_chapter_for_html(self, title: str, chapter_id: str, output_dir: str, all_titles: list[str]) -> None:
"""Download and format chapter for HTML"""
content = download.chapter(self, title, chapter_id, {})
if not content:
return

current_index = all_titles.index(title)
prev_link = f'<a href="{utils.sanitize_filename(all_titles[current_index-1])}.html">上一章</a>' if current_index > 0 else ''
next_link = f'<a href="{utils.sanitize_filename(all_titles[current_index+1])}.html">下一章</a>' if current_index < len(all_titles)-1 else ''

html_content = format.html.content(title, content, prev_link, next_link, self.config.kgf * self.config.kg)

with open(os.path.join(output_dir, f"{utils.sanitize_filename(title)}.html"), "w", encoding='UTF-8') as f:
f.write(html_content)
Loading