Skip to content

Commit

Permalink
fix(source): Arnovel (#671)
Browse files Browse the repository at this point in the history
* fix arnovel

* update 1stkissnovel url

* fix images 1stkissnovel

* QoL fixes for Agitoon and Pawread

* Partial fix for sakuranovel/indowebnovel chapnames

* Experimental lnp and pawread array

* I know how commas work

* fix inoveltranslation actual

* startCase

* fix bug
  • Loading branch information
K1ngfish3r authored Jul 15, 2023
1 parent 414ca51 commit 4c00b2b
Show file tree
Hide file tree
Showing 10 changed files with 88 additions and 96 deletions.
33 changes: 9 additions & 24 deletions src/sources/en/lightnovelpub.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,43 +53,28 @@ const parseNovelAndChapters = async novelUrl => {
sourceName: 'LightNovelPub',
};

novel.novelName = loadedCheerio('h1.novel-title')
.text()
.replace(/[\t\n]/g, '')
.trim();
novel.novelName = loadedCheerio('h1.novel-title').text().trim();

novel.novelCover = loadedCheerio('figure.cover > img').attr('data-src');

novel.genre = '';
novel.genre = loadedCheerio('.categories li')
.find('a')
.map((i, el) => loadedCheerio(el).text())
.toArray()
.join(',');

loadedCheerio('div.categories > ul > li').each(function () {
novel.genre +=
loadedCheerio(this)
.text()
.replace(/[\t\n]/g, '') + ',';
});

loadedCheerio('div.header-stats > span').each(function () {
if (loadedCheerio(this).find('small').text() === 'Status') {
novel.status = loadedCheerio(this).find('strong').text();
}
});

novel.genre = novel.genre.slice(0, -1);
novel.status = loadedCheerio('small:contains("Status")').prev().text().trim();

novel.author = loadedCheerio('.author > a > span').text();

loadedCheerio('.expand').remove();
novel.summary = loadedCheerio('.summary > .content').text().trim();

const delay = ms => new Promise(res => setTimeout(res, ms));

let lastPage = 1;

lastPage = loadedCheerio(
'#novel > header > div.header-body.container > div.novel-info > div.header-stats > span:nth-child(1) > strong',
)
.text()
?.trim();
lastPage = loadedCheerio('small:contains("Chapters")').prev().text().trim();

lastPage = Math.ceil(lastPage / 100);

Expand Down
10 changes: 4 additions & 6 deletions src/sources/en/novelhall.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,16 @@ const parseNovelAndChapters = async novelUrl => {
.text()
?.replace('back<<', '');

novel.author = loadedCheerio('span.blue')
.first()
loadedCheerio('p[style="display: none;"]').remove();
novel.author = loadedCheerio('span.blue:contains("Author")')
.text()
.replace('Author:', '');
.replace(/Author|\n+\t+/g, '');

novel.genre = loadedCheerio('a.red').text();

novel.artist = null;

novel.status = loadedCheerio('span.blue')
.first()
.next()
novel.status = loadedCheerio('span.blue:contains("Status")')
.text()
.replace('Status:', '');

Expand Down
10 changes: 9 additions & 1 deletion src/sources/en/novelupdates.js
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,15 @@ const parseChapter = async (novelUrl, chapterUrl) => {
} else if (isLightNovelsTls) {
chapterText = loadedCheerio('.text_story').html();
} else if (isiNovelTranslation) {
chapterText = loadedCheerio('.chakra-skeleton').html();
const link = 'https://api.' + result.url.slice(8);
const json = await fetchApi({
url: link,
sourceId,
}).then(r => r.json());
chapterText =
json.content.replace(/\n/g, '<br>') +
'<br><hr><br>TL Notes:<br>' +
json.notes.replace(/\n/g, '<br>');
} else if (isWordPress) {
/**
* Remove wordpress bloat tags
Expand Down
31 changes: 13 additions & 18 deletions src/sources/en/pawread.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,34 +47,28 @@ const parseNovelAndChapters = async novelUrl => {
.attr('style')
.replace(/.*\((.*?)\)/g, '$1');

novel.author = loadedCheerio(
'#views_info > div:nth-child(4) > span:nth-child(2)',
)
novel.author = loadedCheerio('#views_info > div:last > span:last')
.text()
.replace(/(Author: )/g, '');
.replace('Author: ', '');

novel.status = loadedCheerio('.label').text().trim();

novel.genre = loadedCheerio('div.mt20:nth-child(4)')
.text()
.trim()
.replace(/\s/g, ',');
novel.genre = loadedCheerio('.col-md-9 .mt20')
.find('a')
.map((i, el) => loadedCheerio(el).text())
.toArray()
.join(',');

novel.summary = loadedCheerio('#simple-des').text().trim();

let chapters = [];

loadedCheerio('div.filtr-item').each(function () {
loadedCheerio('.item-box').each(function () {
const chapterName = loadedCheerio(this).find('.c_title').text();
const releaseDate = loadedCheerio(this)
.find('.c_title')
.next()
.next()
.text();
const releaseDate = loadedCheerio(this).find('span:last').text();
const chapterUrl =
novelUrl +
loadedCheerio(this)
.find('.item-box')
.attr('onclick')
.replace(/.*'(.*)'.*/g, '$1') +
'.html';
Expand All @@ -95,9 +89,10 @@ const parseChapter = async (novelUrl, chapterUrl) => {

let loadedCheerio = cheerio.load(body);

const chapterName = loadedCheerio(
'div.panel:nth-child(2) > div > div > h3',
).text();
const chapterName = loadedCheerio('.chapter-content h3').text();

const steal = ['bit.ly', 'tinyurl', 'pawread'];
steal.map(tag => loadedCheerio(`p:icontains(${tag})`).remove());
const chapterText = loadedCheerio('#chapter_item').html();

const chapter = {
Expand Down
31 changes: 14 additions & 17 deletions src/sources/id/indowebnovel.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import * as cheerio from 'cheerio';
import { fetchHtml } from '@utils/fetch/fetch';
import { startCase } from 'lodash-es';

const sourceId = 87;
const sourceName = 'IndoWebNovel';
Expand Down Expand Up @@ -50,7 +51,7 @@ const parseNovelAndChapters = async novelUrl => {
novelUrl,
};

novel.novelName = loadedCheerio('.series-title h2').text().trim();
novel.novelName = startCase(loadedCheerio('.series-title h2').text().trim());

novel.novelCover = loadedCheerio('.series-thumb img').attr('src');

Expand Down Expand Up @@ -78,22 +79,18 @@ const parseNovelAndChapters = async novelUrl => {

let chapters = [];

loadedCheerio('.series-chapterlist li').each(function () {
const chapterName = loadedCheerio(this)
.find('a span')
.first()
.text()
.replace(/.*?(Chapter.|[0-9])/g, '$1')
.replace(/Bahasa Indonesia/g, '')
.replace(/\s+/g, ' ')
.trim();

const releaseDate = loadedCheerio(this)
.find('a span')
.first()
.next()
.text();
const chapterUrl = loadedCheerio(this).find('a').attr('href');
loadedCheerio('.series-chapterlist li a').each(function () {
let titles = startCase(
loadedCheerio(this)
.attr('title')
.replace(/Bahasa Indonesia/g, '')
.replace(/\s\s+/g, ' ')
.trim(),
);

const chapterName = titles.replace(`${novel.novelName}`, '');
const releaseDate = loadedCheerio(this).find('span:last').text();
const chapterUrl = loadedCheerio(this).attr('href');

chapters.push({ chapterName, releaseDate, chapterUrl });
});
Expand Down
31 changes: 14 additions & 17 deletions src/sources/id/sakuranovel.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { fetchHtml } from '@utils/fetch/fetch';
import * as cheerio from 'cheerio';
import { startCase } from 'lodash-es';

const sourceId = 103;
const sourceName = 'SakuraNovel';
Expand Down Expand Up @@ -45,7 +46,7 @@ const parseNovelAndChapters = async novelUrl => {
novelUrl,
};

novel.novelName = loadedCheerio('.series-title h2').text().trim();
novel.novelName = startCase(loadedCheerio('.series-title h2').text().trim());

novel.novelCover = loadedCheerio('.series-thumb img').attr('src');

Expand Down Expand Up @@ -73,22 +74,18 @@ const parseNovelAndChapters = async novelUrl => {

let chapters = [];

loadedCheerio('.series-chapterlist li').each(function () {
const chapterName = loadedCheerio(this)
.find('a span')
.first()
.text()
.replace(/.*?(Chapter.|[0-9])/g, '$1')
.replace(/Bahasa Indonesia/g, '')
.replace(/\s+/g, ' ')
.trim();

const releaseDate = loadedCheerio(this)
.find('a span')
.first()
.next()
.text();
const chapterUrl = loadedCheerio(this).find('a').attr('href');
loadedCheerio('.series-chapterlist li a').each(function () {
let titles = startCase(
loadedCheerio(this)
.attr('title')
.replace(/Bahasa Indonesia/g, '')
.replace(/\s\s+/g, ' ')
.trim(),
);

const chapterName = titles.replace(`${novel.novelName}`, '');
const releaseDate = loadedCheerio(this).find('span:last').text();
const chapterUrl = loadedCheerio(this).attr('href');

chapters.push({ chapterName, releaseDate, chapterUrl });
});
Expand Down
12 changes: 3 additions & 9 deletions src/sources/kr/Agitoon.js
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,10 @@ const parseChapter = async (novelUrl, chapterUrl) => {
const loadedCheerio = cheerio.load(body);

const title = loadedCheerio('div > div.col-12 > h2').text();
const contentTag = loadedCheerio('#id_wr_content > p');

let content = '';
contentTag.each((_, element) => {
content += loadedCheerio(element).text();
content += '<br />';
});
let chapterText = loadedCheerio('#id_wr_content').html();

// gets rid of the popup thingy
content = content.replace(
chapterText = chapterText.replace(
'팝업메뉴는 빈공간을 더치하거나 스크룰시 사라집니다',
'',
);
Expand All @@ -121,7 +115,7 @@ const parseChapter = async (novelUrl, chapterUrl) => {
novelUrl,
chapterUrl,
chapterName: title,
chapterText: content,
chapterText,
};

return chapter;
Expand Down
2 changes: 1 addition & 1 deletion src/sources/multisrc/madara/MadaraGenerator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ export const FreeNovelMeScraper = new MadaraScraper(

export const FirstKissNovelScraper = new MadaraScraper(
46,
'https://1stkissnovel.love/',
'https://1stkissnovel.org/',
'FirstKissNovel',
{ 'useNewChapterEndpoint': true },
);
Expand Down
22 changes: 20 additions & 2 deletions src/sources/multisrc/madara/MadaraScraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ class MadaraScraper {
.attr('href')
.split('/')[4];

if (sourceId === 59) {
novelUrl = loadedCheerio(this)
.find('.post-title')
.find('a')
.attr('href')
.split('/')[5];
}

const novel = {
sourceId,
novelName,
Expand Down Expand Up @@ -79,11 +87,12 @@ class MadaraScraper {
novel.novelName = loadedCheerio('.post-title h1').text().trim();

novel.novelCover =
loadedCheerio('.summary_image > a > img').attr('data-lazy-src') ||
loadedCheerio('.summary_image > a > img').attr('data-src') ||
loadedCheerio('.summary_image > a > img').attr('src') ||
defaultCoverUri;

loadedCheerio('.post-content_item', '.post-content').each(function () {
loadedCheerio('.post-content_item, .post-content').each(function () {
const detailName = loadedCheerio(this).find('h5').text().trim();
const detail = loadedCheerio(this).find('.summary-content').text().trim();

Expand All @@ -94,6 +103,7 @@ class MadaraScraper {
break;
case 'Author(s)':
case 'المؤلف':
case 'المؤلف (ين)':
novel.author = detail;
break;
case 'Status':
Expand All @@ -106,7 +116,7 @@ class MadaraScraper {
}
});

loadedCheerio('div.summary__content .code-block').remove();
loadedCheerio('div.summary__content .code-block,script').remove();
novel.summary = loadedCheerio('div.summary__content').text().trim();

let novelChapters = [];
Expand Down Expand Up @@ -237,6 +247,14 @@ class MadaraScraper {
.attr('href')
.split('/')[4];

if (sourceId === 59) {
novelUrl = loadedCheerio(this)
.find('.post-title')
.find('a')
.attr('href')
.split('/')[5];
}

const novel = {
sourceId,
novelName,
Expand Down
2 changes: 1 addition & 1 deletion src/sources/multisrc/madara/MadaraSources.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
},
{
"sourceId": 46,
"baseUrl": "https://1stkissnovel.love/",
"baseUrl": "https://1stkissnovel.org/",
"sourceName": "FirstKissNovel",
"options": {
"useNewChapterEndpoint": true
Expand Down

0 comments on commit 4c00b2b

Please sign in to comment.