-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.js
107 lines (96 loc) · 3.03 KB
/
data.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import fs from 'fs'
import path from 'path';
import { fileURLToPath } from 'url';
import { load } from 'cheerio'
const getAllData = async (initialAddress) => {
checkStorageFolder();
let $ = await getData(initialAddress);
if (!checkIfValidPage($)) {
console.log(`Not a valid address : ${initialAddress}`)
return [];
}
let data = extractData($)
const followupPages = getFollowupPages($, initialAddress)
console.log(`---FOLLOWUP URLS---`)
console.log(followupPages)
for (const address of followupPages) {
$ = await getData(address);
data = data.concat(extractData($))
}
return data
}
const saveData = async (key, data) => {
// convert JSON object to a string
const stringifiedData = JSON.stringify(data, null, 4)
// write JSON string to a file
fs.writeFile(`storage/${key}.json`, stringifiedData, err => {
if (err) {
throw err
}
console.log('JSON data is saved.')
})
}
const checkStorageFolder = () => {
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const dir = path.resolve(path.join(__dirname, 'storage'));
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir);
} else {
fs.readdir(dir, (err, files) => {
if (err) throw err;
for (const file of files) {
fs.unlink(path.join(dir, file), (err) => {
if (err) throw err;
});
}
});
}
}
const getData = async (address) => {
console.log(`---HANDLING URL: ${address}`)
const response = await fetch(address);
const body = await response.text();
return load(body);
}
const extractData = ($) => {
const data = []
$("table.datatable > tbody > tr:not(.DraftTableLabel)").each(function () {
var tmpArray = $(this).find('td')
let date = $(tmpArray[0]).text()
let teamName = $(tmpArray[1]).text().trim()
let acquired = $(tmpArray[2]).text().trim().split('• ').filter(Boolean)
let relinquished = $(tmpArray[3]).text().trim().split('• ').filter(Boolean)
let notes = $(tmpArray[4]).text().trim()
data.push({
date,
teamName,
acquired: acquired.join(','),
relinquished: relinquished.join(','),
notes
})
})
return data;
}
const getFollowupPages = ($, address) => {
const countOfFollowupPages = extractPageCount($)
const urls = []
const urlObj = new URL(address)
for (var i = 0; i < countOfFollowupPages; i++) {
const newStartIndex = (i + 1) * 25
urlObj.searchParams.set("start", newStartIndex)
urls.push(urlObj.toString())
}
return urls
}
const extractPageCount = ($) => {
const paginationSection = $("table:not(.datatable) > tbody > tr:not(.DraftTableLabel) > td")[2]
return $(paginationSection).find('a').length
}
const checkIfValidPage = ($) => {
return $("table.datatable").length
}
export default {
getAllData,
saveData
}