-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgetEventHistoryAndJSON.js
125 lines (113 loc) · 3.92 KB
/
getEventHistoryAndJSON.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
require("dotenv").config();
const cheerio = require("cheerio");
const axios = require("axios");
const mysql = require("mysql");
const fs = require("fs");
const eventURLBase = "https://www.meetup.com/seattle-coffee-club/events/";
const delay = ms => new Promise(res => setTimeout(res, ms));
const connection = mysql.createConnection({
host: process.env.HOST,
user: process.env.USER,
password: process.env.PASSWORD,
database: process.env.DATABASE
});
function getHistory() {
// Will scrape the eventIDs for the last 10 events
// and add them to a Queue table on MySQL
let eventCount = 0;
axios
.get("https://www.meetup.com/seattle-coffee-club/events/past/")
.then(response => {
// Load the web page source code into a cheerio instance
const $ = cheerio.load(response.data);
console.log("Scraping EventIDs from Meetup");
$("a.eventCard--link").each((index, value) => {
var link = $(value).attr("href");
let eventID = link.replace(/\D/g, "");
eventCount++;
console.log(`${eventCount}: ${eventID}`);
const insertSQL = `CALL spAddCCEventQueue (?)`;
connection.query(insertSQL, [eventID], function(
error,
results,
fields
) {
if (error) throw error;
});
});
});
}
function getQueue() {
connection.query(
`SELECT eventID FROM cc_events_queue WHERE processed = 0`,
function(error, results, fields) {
if (error) throw error;
Object.keys(results).forEach(function(key) {
var row = results[key];
scrapeEvent(row.eventID);
});
connection.end();
}
);
}
function scrapeEvent(eventID) {
let eventURL = `${eventURLBase}${eventID}/`;
axios.get(eventURL).then(response => {
// Load the web page source code into a cheerio instance
const $ = cheerio.load(response.data);
const scriptJSON = $('script[type="application/ld+json"]').html();
const jsonObj = JSON.parse(scriptJSON);
const eventJSON = `./json/${eventID}.json`;
let eventIsCanceled = 0;
// Test to see if event was canceled
if (response.data.indexOf("<span>Canceled</span>") !== -1) {
console.log(">>> Canceled");
eventIsCanceled = 1;
} else {
// Save JSON file of Event to be processed however you like
fs.writeFileSync(eventJSON, JSON.stringify(jsonObj));
}
// Flag Event as Processed
let updateSQL = `UPDATE cc_events_queue SET processed = 1 WHERE eventID = ${eventID}`;
connection.query(updateSQL);
try {
const eventName = jsonObj.name;
const eventDate = jsonObj.startDate.toString().slice(0, 16);
const venueName = jsonObj.location.name;
const address = jsonObj.location.address.streetAddress;
const city = jsonObj.location.address.addressLocality;
const state = jsonObj.location.address.addressRegion;
const zip = jsonObj.location.address.postalCode;
let country = jsonObj.location.address.addressCountry;
if (country.length > 2) {
country = country.substr(0, 2);
}
const lat = jsonObj.location.geo.latitude.toFixed(8);
const lon = jsonObj.location.geo.longitude.toFixed(8);
console.log(`******`);
console.log(`EVENTID: ${eventID}`);
console.log(`CANCELED: ${eventIsCanceled}`);
console.log(`URL: ${eventURL}`);
console.log(`EVENT: ${eventName}`);
console.log(`WHEN: ${eventDate}`);
console.log(`VENUE: ${venueName}`);
console.log(`ADDRESS: ${address}`);
console.log(`CITY: ${city}`);
console.log(`STATE: ${state}`);
console.log(`ZIPCODE: ${zip}`);
console.log(`COUNTRY: ${country}`);
console.log(`LATITUDE: ${lat}`);
console.log(`LONGITUDE: ${lon}`);
} catch {
console.log(`>> Issue scraping ${eventID}`);
return;
}
});
}
const scrapeHistoryAndEvents = async () => {
connection.connect();
getHistory();
await delay(4000);
getQueue();
};
scrapeHistoryAndEvents();