-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapper.js
92 lines (77 loc) · 3.19 KB
/
scrapper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
const puppeteer = require("puppeteer");
const https = require("https");
const fs = require("fs");
async function getImageLink() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(
"https://www.bazakolejowa.pl/index.php?dzial=stacje&id=-1"
);
if(page.url() === "https://www.bazakolejowa.pl/index.php") await browser.close()
const idPattern = /id=(\d+)/;
const match = await page.url().match(idPattern);
const id = match[0].split("=")[1]
await page.goto(
`https://www.bazakolejowa.pl/index.php?dzial=stacje&id=${id}&ed=0&okno=galeria`
);
await page.waitForSelector("h2#obiektNazwa");
const textContent = await page.evaluate(() => {
const element = document.querySelector("h2#obiektNazwa");
return element.textContent.trim();
});
console.log(textContent);
await page.waitForSelector("div#galeriaObrazow");
const textContent2 = await page.evaluate(() => {
const divs = document.querySelectorAll("div#galeriaObrazow div a img")
const contents = [];
divs.forEach((div) => {
contents.push(div.src.replace("addons/tempbases/thumbs", "foto"));
});
return contents;
});
await page.goto(
`https://www.bazakolejowa.pl/index.php?dzial=stacje&id=${id}&ed=0&okno=polozenie`
);
const waitForWindow = new Promise(resolve => page.on('popup', resolve));
await page.waitForSelector("button#gsvKlawisz");
await page.click("button#gsvKlawisz")
const newPage = await waitForWindow;
const cords = extractCoordinatesFromGoogleMapsLink(newPage.url())
console.log()
console.log(textContent2);
await newPage.waitForNavigation({
waitUntil: 'networkidle0',
});
await browser.close();
return {name: textContent, imageLinks: textContent2, cords: cords}
};
function extractCoordinatesFromGoogleMapsLink(link) {
const regex = /@(-?\d+\.\d+),(-?\d+\.\d+)/;
const match = link.match(regex);
if (match) {
const latitude = parseFloat(match[1]);
const longitude = parseFloat(match[2]);
return { latitude, longitude };
} else {
return null; // Coordinates not found in the link
}
}
module.exports = { getImageLink };
//addons/tempbases/thumbs/1036/1286388805-1731.jpg
//foto/1036/1286388805-1731.jpg
// textContent2.forEach((link, i) => {
// const imageName = `img/${i+1}.jpg`
// const file = fs.createWriteStream(imageName)
// https
// .get(link, (response) => {
// response.pipe(file)
// file.on("finish", () => {
// file.close()
// console.log(`Image downloaded as ${imageName}`)
// });
// })
// .on("error", (err) => {
// fs.unlink(imageName)
// console.error(`Error downloading image: ${err.message}`)
// });
// });