This repository was archived by the owner on Jun 2, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathindex.js
127 lines (107 loc) · 4.3 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
const cloudscraper = require('cloudscraper');
const xpath = require('xpath');
const { DOMParser } = require('xmldom');
const fs = require('fs');
const isUrl = require('is-url');
const mkdirp = require('mkdirp');
const path = require('path');
const basePath = './mrm-downloads';
function download (url, {directory = '.', filename = 'download'} = {}) {
return new Promise((resolve, reject) => {
mkdirp.sync(directory);
return cloudscraper.get({uri: url, encoding: null}).then(bufferAsBody => {
fs.writeFile(path.join(directory, filename), bufferAsBody, error => {
if (error) reject(error);
else resolve();
})
});
});
}
let argv = require('minimist')(process.argv.slice(2));
if (argv._.length < 1) {
console.log('You need to pass at least one MyReadingManga.info URL as argument. If the manga contains more than one page, this script will automatically handle this and download these pages in order.');
console.log('You can pass how much mangas as you want, they will be downloades sequentially.');
process.exit(1);
}
for (let arg of argv._) {
if (!isUrl(arg) || arg.indexOf('myreadingmanga.info') === -1) {
console.log('All arguments must be a valid MyReadingManga.info URL.');
process.exit(2);
}
}
(async function main () {
console.log(`Downloading ${argv._.length} manga${argv._.length > 1 ? 's' : ''}, please wait...\n`);
for (let url of argv._) {
// Download html pages and extract images
console.log('Gathering information...');
let { title, images } = await getTitleAndImages(url);
// Download manga pages
console.log(`\nDownloading "${title}"...`);
await downloadBatch(`${basePath}/${title}`, images);
// Add a README to the destination folder
let dir = `${basePath}/${title}`;
mkdirp.sync(dir);
fs.writeFileSync(`${dir}/README.txt`, `Doujinshi downloaded from ${url} by the MyReadingManga Downloader.\n`, { encoding: 'utf8' });
console.log(`\nDownload complete!`);
console.log(`Manga saved in "${basePath}/${title}".`);
}
console.log("\nAll downloads complete! Good reading :)");
process.exit(0);
})();
async function getTitleAndImages (firstURL) {
let title;
let next = firstURL;
let images = [];
do {
await tryUntilResolve(this, download, next, {directory: basePath, filename: 'tmp.html'});
let dom = parse(`${basePath}/tmp.html`);
if (!title) title = xpath.select("string(//head/title)", dom).split('/').join(' ');
let result = searchDOM(dom);
node = result.iterateNext();
while (node) {
images.push(node.value);
node = result.iterateNext();
}
next = xpath.select('string(//head/link[contains(@rel, "next")]/@href)', dom);
} while (next);
fs.unlinkSync(`${basePath}/tmp.html`);
return { title, images };
}
function parse (file) {
let html = fs.readFileSync(file, { encoding: 'utf8' });
return new DOMParser().parseFromString(html);
}
function searchDOM (dom) {
let images;
// first attempt
images = xpath.evaluate("//div[contains(@class, 'entry-content')]/div/img/@data-lazy-src", dom, null, xpath.XPathResult.ANY_TYPE, null);
if (images.nodes.length === 0) // second attempt
images = xpath.evaluate("//div[contains(@class, 'entry-content')]/div/img/@data-src", dom, null, xpath.XPathResult.ANY_TYPE, null);
if (images.nodes.length === 0) // third attempt
images = xpath.evaluate("//div[contains(@class, 'entry-content')]//img/@data-lazy-src", dom, null, xpath.XPathResult.ANY_TYPE, null);
if (images.nodes.length === 0) // forth attempt
images = xpath.evaluate("//div[contains(@class, 'entry-content')]//img/@data-src", dom, null, xpath.XPathResult.ANY_TYPE, null);
if (images.nodes.length === 0) // all failed
console.error('\nCan\'t find manga pages in the specified URL :(\n');
return images;
}
function downloadBatch (directory, urls) {
let total = urls.length;
let downloaded = 0;
let promises = [];
console.log(`${downloaded}/${total}`);
for (let i in urls) {
promises.push(tryUntilResolve(this, download, urls[i], {directory, filename: `${i}.${urls[i].split('.').pop()}`}).then(_ => console.log(`${++downloaded}/${total}`)));
}
return Promise.all(promises).then(_ => Promise.resolve(downloaded));
}
function tryUntilResolve (thisArg, fn, ...args) {
return new Promise(resolve => {
(function trying () {
fn.apply(thisArg, args).then(resolve, _ => {
console.log('Network error, retrying...');
setTimeout(trying, 500);
});
})();
});
}