Skip to content

Commit

Permalink
utility to find http-only sites
Browse files Browse the repository at this point in the history
  • Loading branch information
artlung committed Jun 2, 2024
1 parent 1dca6c5 commit 38ab9e9
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions find-http-only-sites.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Tool to run periodically to find sites that are only available over HTTP.
const sites = require('./sites.json');
const cheerio = require('cheerio');
const httpSites = require('./http-sites.json');

for (const blog of sites.blogs) {
const protocol = 'https://';
fetch(`${protocol}${blog}`)
.then(res => res.text())
.catch(() => {
console.log(`Failed to load ${protocol}${blog}. Trying http...`);
fetch(`http://${blog}`)
.then(res => res.text())
.then(html => {
const $ = cheerio.load(html);
if (html) {
console.log(`Loaded ${protocol}${blog} with http.`);
// is it in httpSites?
if (!httpSites.sites.includes(blog)) {
console.log(`Adding ${blog} to http-sites.json.`);
httpSites.sites.push(blog);
fs.writeFileSync('./http-sites.json', JSON.stringify(httpSites, null, 2));
} else {
console.log(`${blog} already in http-sites.json.`);
}
}

})
});
}

0 comments on commit 38ab9e9

Please sign in to comment.