From 38ab9e9b8b90c0080deaecccbeb75567a3028b01 Mon Sep 17 00:00:00 2001 From: Joe Crawford Date: Sun, 2 Jun 2024 09:13:43 -0400 Subject: [PATCH] utility to find http-only sites --- find-http-only-sites.js | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 find-http-only-sites.js diff --git a/find-http-only-sites.js b/find-http-only-sites.js new file mode 100644 index 0000000..a00bfdd --- /dev/null +++ b/find-http-only-sites.js @@ -0,0 +1,30 @@ +// Tool to run periodically to find sites that are only available over HTTP. +const sites = require('./sites.json'); +const cheerio = require('cheerio'); +const httpSites = require('./http-sites.json'); + +for (const blog of sites.blogs) { + const protocol = 'https://'; + fetch(`${protocol}${blog}`) + .then(res => res.text()) + .catch(() => { + console.log(`Failed to load ${protocol}${blog}. Trying http...`); + fetch(`http://${blog}`) + .then(res => res.text()) + .then(html => { + const $ = cheerio.load(html); + if (html) { + console.log(`Loaded ${protocol}${blog} with http.`); + // is it in httpSites? + if (!httpSites.sites.includes(blog)) { + console.log(`Adding ${blog} to http-sites.json.`); + httpSites.sites.push(blog); + fs.writeFileSync('./http-sites.json', JSON.stringify(httpSites, null, 2)); + } else { + console.log(`${blog} already in http-sites.json.`); + } + } + + }) + }); +}