Skip to content
This repository has been archived by the owner on Feb 13, 2025. It is now read-only.

Commit

Permalink
Update Report
Browse files Browse the repository at this point in the history
  • Loading branch information
Brandon32 committed Feb 2, 2024
1 parent c171b72 commit 3f39207
Show file tree
Hide file tree
Showing 9 changed files with 289 additions and 86 deletions.
4 changes: 3 additions & 1 deletion bulk-update/bulk-update.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ export async function loadListData(source, fetchFunction = fetch) {
return loadListData(JSON.parse(fs.readFileSync(source, 'utf8').trim()), fetchFunction);
case 'txt':
return loadListData(fs.readFileSync(source, 'utf8').trim().split('\n'), fetchFunction);
case 'html':
return [source];
default:
throw new Error(`Unsupported list format or entry: ${source}`);
}
Expand Down Expand Up @@ -117,7 +119,7 @@ export default async function main(config, migrate, reporter = null) {
*/
if (import.meta.url === `file://${process.argv[1]}`) {
const args = process.argv.slice(2);
const [migrationFolder, list = null] = args;
const [migrationFolder, list] = args;
const migrationFile = `${process.cwd()}/${migrationFolder}/migration.js`;
// eslint-disable-next-line import/no-dynamic-require, global-require
const migration = await import(migrationFile);
Expand Down
39 changes: 29 additions & 10 deletions bulk-update/document-manager/document-manager.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* eslint-disable max-len */
import { fetch } from '@adobe/fetch';
import { fetch, timeoutSignal, AbortError } from '@adobe/fetch';
import { mdast2docx } from '@adobe/helix-md2docx';
import parseMarkdown from '@adobe/helix-html-pipeline/src/steps/parse-markdown.js';

Expand Down Expand Up @@ -32,18 +32,26 @@ export function entryToPath(entry) {
* @param {number} fetchWaitMs - The number of milliseconds to wait before fetching the markdown.
* @returns {Promise<string>} A promise that resolves to the fetched markdown.
*/
async function getMarkdown(url, reporter, fetchWaitMs = 500, fetchFunction = fetch) {
async function fetchMarkdown(url, reporter, fetchWaitMs = 500, fetchFunction = fetch) {
try {
console.log(`Fetching ${url}`);
await delay(fetchWaitMs); // Wait 500ms to avoid rate limiting, not needed for live.
const response = await fetchFunction(url);
const signal = timeoutSignal(5000); // 5s timeout
const response = await fetchFunction(url, { signal });

if (!response.ok) {
reporter.log('load', 'error', 'Failed to fetch markdown.', url, response.status, response.statusText);
return '';
}
return await response.text();
const text = await response.text();
signal.clear();
return text;
} catch (e) {
reporter.log('load', 'warn', 'Markdown not found at url', url, e.message);
if (e instanceof AbortError) {
reporter.log('load', 'warn', 'Fetch timed out after 1s', url);
} else {
reporter.log('load', 'warn', 'Markdown not found at url', url, e.message);
}
}

return '';
Expand All @@ -64,6 +72,20 @@ function getMdast(mdTxt, reporter) {
return mdast;
}

/**
* Checks if a document has expired based on its modified time and cache time.
*
* @param {number} mtime - The modified time of the document.
* @param {number} cacheTime - The cache time in milliseconds. Use -1 for no caching.
* @returns {boolean} - Returns true if the document has not expired, false otherwise.
*/
export function hasExpired(mtime, cacheTime, date = Date.now()) {
const modifiedTime = new Date(mtime).getTime();
const expiryTime = cacheTime === -1 ? Infinity : modifiedTime + cacheTime;

return expiryTime < date;
}

/**
* Load entry markdown from a file or URL.
*
Expand Down Expand Up @@ -93,17 +115,14 @@ export async function loadDocument(entry, config, fetchFunction = fetch) {

if (mdDir && fs.existsSync(document.markdownFile)) {
const stats = fs.statSync(document.markdownFile);
const modifiedTime = new Date(stats.mtime).getTime();
const expiryTime = mdCacheMs === -1 ? Infinity : modifiedTime - mdCacheMs;

if (expiryTime > Date.now()) {
if (!hasExpired(stats.mtime, mdCacheMs)) {
document.markdown = fs.readFileSync(document.markdownFile, 'utf8');
reporter.log('load', 'success', 'Loaded markdown', document.markdownFile);
}
}

if (!document.markdown) {
document.markdown = await getMarkdown(`${document.url}.md`, reporter, fetchWaitMs, fetchFunction);
document.markdown = await fetchMarkdown(`${document.url}.md`, reporter, fetchWaitMs, fetchFunction);
reporter.log('load', 'success', 'Fetched markdown', `${document.url}.md`);

if (document.markdown && mdDir) {
Expand Down
15 changes: 7 additions & 8 deletions bulk-update/reporter/excel-reporter.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import xlsx from 'xlsx';
import * as fs from 'fs';
import path from 'path';
import xlsx from 'xlsx';
import BaseReporter from './reporter.js';

/**
Expand All @@ -13,6 +14,7 @@ class ExcelReporter extends BaseReporter {
*
* @param {string} filepath - The file path where the Excel file will be saved.
* @param {boolean} [autoSave=true] - Excel file should be automatically saved when logging.
* Disable to improve performance. Don't forget to call `saveReport` when done.
*/
constructor(filepath, autoSave = true) {
super();
Expand Down Expand Up @@ -93,12 +95,9 @@ class ExcelReporter extends BaseReporter {
generateTotals() {
const totals = super.generateTotals();
const totalsSheet = this.workbook.Sheets.Totals;
const data = [];
Object.entries(totals).forEach(([topic, statusCount]) => {
Object.entries(statusCount).forEach(([status, count]) => {
data.push([topic, status, count]);
});
});
const data = Object.entries(totals)
.flatMap(([topic, statusCount]) => Object.entries(statusCount)
.map(([status, count]) => [topic, status, count]));
xlsx.utils.sheet_add_aoa(totalsSheet, data, { origin: 'A2' });
if (!this.filepath) return totals;
try {
Expand All @@ -116,7 +115,7 @@ class ExcelReporter extends BaseReporter {
*/
saveReport() {
if (!this.filepath) return;
const directoryPath = this.filepath.split('/').slice(0, -1).join('/');
const directoryPath = path.dirname(this.filepath);
fs.mkdirSync(directoryPath, { recursive: true });
xlsx.set_fs(fs);
xlsx.writeFile(this.workbook, this.filepath);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
[
"https://main--bacom--adobecom.hlx.live/query-index.json",
"https://main--bacom--adobecom.hlx.live/ae_ar/query-index.json",
"https://main--bacom--adobecom.hlx.live/ae_en/query-index.json",
"https://main--bacom--adobecom.hlx.live/africa/query-index.json",
Expand Down
114 changes: 82 additions & 32 deletions faas-variations-report/report.js
Original file line number Diff line number Diff line change
@@ -1,39 +1,45 @@
/* eslint-disable import/no-extraneous-dependencies */
import fs from 'fs';
import { select, selectAll } from 'unist-util-select';
import { visitParents } from 'unist-util-visit-parents';
import { createHash } from 'crypto';
import { BulkUpdate, ExcelReporter, loadListData } from '../bulk-update/index.js';
import { loadDocument } from '../bulk-update/document-manager/document-manager.js';

const variations = {};

const reportVariations = {};
const { pathname } = new URL('.', import.meta.url);
const dateString = ExcelReporter.getDateString();
const config = {
list: [
`${pathname}query-indexes.json`,
`${pathname}query-indexes-row.json`,
],
siteUrl: 'https://main--bacom--adobecom.hlx.live',
reporter: new ExcelReporter(`${pathname}reports/${ExcelReporter.getDateString()}.xlsx`),
reporter: new ExcelReporter(`${pathname}reports/faas-report-${dateString}.xlsx`, false),
outputDir: `${pathname}output`,
mdDir: `${pathname}md`,
mdCacheMs: -1,
mdCacheMs: 30 * 24 * 60 * 60 * 1000, // 30 days
fetchWaitMs: 20,
};

/**
* Retrieves the block information from a string and normalizes it.
* For example the input `Block(option1, Option2)` returns
* `{ block: 'block', options: ['option1', 'option2'] , variant: 'block (option1, option2)'}`
* And `block` returns `{ block: 'block', options: [], variant: 'block'}`
* For example, the input `Block(option1, Option2)` returns
* `{ block: 'block', options: ['option1', 'option2'], variant: 'block (option1, option2)' }`
* And `block` returns `{ block: 'block', options: [], variant: 'block' }`
*
* @param {string} str - The input block string.
* @returns {Object} - An object containing the block name and options.
*/
export const getBlockInfo = (str) => {
const [, blockName, optionsRaw] = str.toLowerCase().match(/(\w+)\s*(?:\((.*)\))?/).map((t) => (t ? t.trim() : undefined));
const options = optionsRaw ? optionsRaw.split(',').map((option) => option.trim()) : [];
const variant = options.length > 0 ? `${blockName} (${options.join(', ')})` : blockName;
return { blockName, options, variant };
const blockInfo = {};
const regex = /(\w+)\s*(?:\(([^)]*)\))?/;
const match = regex.exec(str.toLowerCase());
const [, blockName, optionsRaw] = match.map((t) => (t ? t.trim() : undefined));

blockInfo.blockName = blockName;
blockInfo.options = optionsRaw ? optionsRaw.split(',').map((option) => option.trim()) : [];
blockInfo.variant = blockInfo.options.length > 0 ? `${blockName} (${blockInfo.options.join(', ')})` : blockName;

return blockInfo;
};

/**
Expand All @@ -54,13 +60,19 @@ const mapAncestors = (ancestors) => ancestors.map((ancestor) => {
return `${ancestor.type} '${variant}'`;
});

async function loadFragments(document) {
/**
* Loads fragments from the given document.
*
* @param {Document} document - The document containing the fragments.
* @returns {Promise<void>} - A promise that resolves when all fragments are loaded.
*/
export async function loadFragments(document, fetchFunction = fetch) {
const links = selectAll('link', document.mdast).filter((node) => node.url.includes('/fragments/'));
await Promise.all(links.map(async (node) => {
config.reporter.log('fragments', 'info', 'Found Fragment Link', { entry: document.entry, url: node.url });
const fragmentUrl = new URL(node.url, config.siteUrl);
console.log(`Loading fragment: ${fragmentUrl.pathname}`);
const fragment = await loadDocument(fragmentUrl.pathname, config);
const fragment = await loadDocument(fragmentUrl.pathname, config, fetchFunction);
if (fragment && fragment.mdast) {
config.reporter.log('fragments', 'success', 'Loaded Fragment', { entry: fragment.entry, url: fragment.url });
delete node.url;
Expand All @@ -70,6 +82,42 @@ async function loadFragments(document) {
}));
}

/**
* Returns the variant corresponding to the given index.
* The variant is a string of capital letters,
* starting from 0 = 'A' and going to 'Z', then 'AA' to 'ZZ', etc.
*
* @param {number} number - The index of the variant.
* @returns {string} The variant.
*/
export const getLetterScheme = (number) => {
let result = '';
let index = number;
while (index >= 0) {
result = String.fromCharCode(65 + (index % 26)) + result;
index = Math.floor(index / 26) - 1;
}
return result;
};

/**
* Retrieves the variant information for a given node and its ancestors.
*
* @param {Node} node - The node for which to retrieve the variant information.
* @param {Array<Node>} ancestors - The ancestors of the node.
* @returns {Object} - The variant information object.
*/
const getVariant = (node, ancestors) => {
const variation = {};

variation.structure = `${mapAncestors(ancestors).join(' > ')} > ${node.type}`;
variation.hash = createHash('sha1').update(variation.structure).digest('hex');
variation.variant = reportVariations[variation.hash]?.variant
|| getLetterScheme(Object.keys(reportVariations).length);

return variation;
};

/**
* Find the mdast structure variation for the faas link, "https://milo.adobe.com/tools/faas#...", and report it.
* Loop through the parent node types to analyze the structure.
Expand All @@ -78,34 +126,34 @@ async function loadFragments(document) {
* @param {Object} document - The document object
*/
export async function report(document) {
const pageVariations = {};
const pageVariations = [];
const { mdast, entry } = document;
const faasTool = 'https://milo.adobe.com/tools/faas#';
await loadFragments(document);
const faasLinks = selectAll('link', mdast).filter((node) => node.url.startsWith(faasTool));
if (faasLinks.length === 0) return pageVariations;

visitParents(mdast, 'link', (node, ancestors) => {
if (node.type === 'link' && node.url.startsWith(faasTool)) {
const structure = `${mapAncestors(ancestors).join(' > ')} > ${node.type}`;
const hash = createHash('sha1').update(structure).digest('hex').slice(0, 5);
pageVariations[hash] = pageVariations[hash] || { count: 0, structure };
pageVariations[hash].count += 1;
config.reporter.log('faas', 'info', 'Found FaaS Link', { variation: hash, structure, entry, url: node.url });
}
});
if (node.url.startsWith(faasTool)) {
const variation = getVariant(node, ancestors);
pageVariations.push(variation);

Object.entries(pageVariations).forEach(([hash, { count, structure }]) => {
variations[hash] = variations[hash] || { count: 0, structure };
variations[hash].count += count;
if (!reportVariations[variation.hash]) {
reportVariations[variation.hash] = { ...variation, count: 0, example: entry };
}
reportVariations[variation.hash].count += 1;
config.reporter.log('faas', 'info', 'Found FaaS Link', { ...variation, entry, url: node.url });
}
});

return pageVariations;
}

export async function init(list = null) {
export async function init(list) {
const entryList = await loadListData(list || config.list);
config.list = entryList.filter((entry) => entry && (entry.includes('/resources/') || entry.includes('/fragments/')));
fs.mkdirSync(`${pathname}reports/`, { recursive: true });
fs.writeFileSync(`${pathname}reports/config-list.json`, JSON.stringify(config.list, null, 2));

return config;
}
Expand All @@ -116,14 +164,16 @@ export function migration(document) {

if (import.meta.url === `file://${process.argv[1]}`) {
const args = process.argv.slice(2);
const [list = null] = args;
const [list] = args;

await init(list);
await BulkUpdate(config, report);
// log each variant in variations
Object.entries(variations).forEach(([hash, { count, structure }]) => {
config.reporter.log('faas-variations', 'info', 'Variation', { hash, count, structure });

const sortedVariations = Object.entries(reportVariations).sort((a, b) => b[1].count - a[1].count);
sortedVariations.forEach(([hash, { count, structure, example, variant }]) => {
config.reporter.log('faas-variations', 'info', 'Found Variation', { variant, count, hash, structure, example: `${config.siteUrl}${example}` });
});

config.reporter.saveReport();
process.exit(0);
}
32 changes: 31 additions & 1 deletion test/bulk-update/document-manager/document-manager.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { expect } from '@esm-bundle/chai';
import fs from 'fs';
import { stub } from 'sinon';
import { loadDocument, saveDocument, entryToPath } from '../../../bulk-update/document-manager/document-manager.js';
import { loadDocument, saveDocument, entryToPath, hasExpired } from '../../../bulk-update/document-manager/document-manager.js';
import BaseReporter from '../../../bulk-update/reporter/reporter.js';

const { pathname } = new URL('.', import.meta.url);
Expand Down Expand Up @@ -35,6 +35,36 @@ describe('DocumentManager', () => {
config.reporter = new BaseReporter();
});

describe('hasExpired', () => {
it('returns false for a cache expiry of 30 days and current date is 10 days after modification time', () => {
const mtime = 'Thu Jan 01 2024 09:30:00 GMT-0800 (Pacific Standard Time)';
const cacheMs = 30 * 24 * 60 * 60 * 1000;
const date = new Date('Thu Jan 10 2024 09:30:00 GMT-0800 (Pacific Standard Time)');
expect(hasExpired(mtime, cacheMs, date)).to.equal(false);
});

it('returns true for a cache expiry of 7 days and current date is 1 month after modification time', () => {
const mtime = 'Thu Jan 01 2024 09:30:00 GMT-0800 (Pacific Standard Time)';
const cacheMs = 7 * 24 * 60 * 60 * 1000;
const date = new Date('Thu Feb 01 2024 09:30:00 GMT-0800 (Pacific Standard Time)');
expect(hasExpired(mtime, cacheMs, date)).to.equal(true);
});

it('returns true when the cache expiry is set to 0 and a minute has passed since the last modification', () => {
const mtime = 'Thu Jan 01 2024 09:30:00 GMT-0800 (Pacific Standard Time)';
const cacheMs = 0;
const date = new Date('Thu Jan 01 2024 09:31:00 GMT-0800 (Pacific Standard Time)');
expect(hasExpired(mtime, cacheMs, date)).to.equal(true);
});

it('returns false when the cache expiry is set to -1 (indicating no expiry) and a year has passed since the last modification', () => {
const mtime = 'Thu Jan 01 2024 09:30:00 GMT-0800 (Pacific Standard Time)';
const cacheMs = -1;
const date = new Date('Thu Jan 01 2025 09:30:00 GMT-0800 (Pacific Standard Time)');
expect(hasExpired(mtime, cacheMs, date)).to.equal(false);
});
});

describe('entryToPath', () => {
const tests = [
['/', '/index'],
Expand Down
Loading

0 comments on commit 3f39207

Please sign in to comment.