Skip to content
This repository has been archived by the owner on Feb 13, 2025. It is now read-only.

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
Brandon32 committed Mar 4, 2024
1 parent 5ed016f commit 77b142a
Show file tree
Hide file tree
Showing 13 changed files with 366 additions and 205 deletions.
41 changes: 10 additions & 31 deletions bulk-update/bulk-update.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import fs from 'fs';
import { fetch } from '@adobe/fetch';
import { loadDocument, checkLinks } from './document-manager/document-manager.js';
import { loadDocument } from './document-manager/document-manager.js';
import { validateMigration } from './validation/validation.js';

const delay = (milliseconds) => new Promise((resolve) => { setTimeout(resolve, milliseconds); });

Expand Down Expand Up @@ -37,7 +38,7 @@ export async function loadQueryIndex(url, fetchFunction = fetch, fetchWaitMs = 5
const nextUrl = new URL(url);
nextUrl.searchParams.set('limit', limit);
nextUrl.searchParams.set('offset', offset + limit);
entries.push(...await loadQueryIndex(nextUrl.toString(), fetchFunction));
entries.push(...await loadQueryIndex(nextUrl.toString(), fetchFunction, fetchWaitMs));
}

return entries;
Expand All @@ -52,13 +53,13 @@ export async function loadQueryIndex(url, fetchFunction = fetch, fetchWaitMs = 5
* @returns {Promise<string[]>} - The loaded data as an array of strings.
* @throws {Error} - If the list format or entry is unsupported.
*/
export async function loadListData(source, fetchFunction = fetch) {
export async function loadListData(source, fetchFunction = fetch, fetchWaitMs = 500) {
if (!source) return [];
if (Array.isArray(source) || source.includes(',')) {
const entries = Array.isArray(source) ? source : source.split(',');
const loadedEntries = [];
for (const entry of entries) {
const loadedData = await loadListData(entry.trim(), fetchFunction);
const loadedData = await loadListData(entry.trim(), fetchFunction, fetchWaitMs);
if (loadedData) loadedEntries.push(...loadedData);
}
return loadedEntries;
Expand All @@ -73,38 +74,18 @@ export async function loadListData(source, fetchFunction = fetch) {
switch (extension) {
case 'json':
if (source.startsWith('http')) {
return loadQueryIndex(source, fetchFunction);
return loadQueryIndex(source, fetchFunction, fetchWaitMs);
}
return loadListData(JSON.parse(fs.readFileSync(source, 'utf8').trim()), fetchFunction);
return loadListData(JSON.parse(fs.readFileSync(source, 'utf8').trim()), fetchFunction, fetchWaitMs);
case 'txt':
return loadListData(fs.readFileSync(source, 'utf8').trim().split('\n'), fetchFunction);
return loadListData(fs.readFileSync(source, 'utf8').trim().split('\n'), fetchFunction, fetchWaitMs);
case 'html':
return [source];
default:
throw new Error(`Unsupported list format or entry: ${source}`);
}
}

/**
* Validates the migration by checking the links in the entry against the provided configuration.
*
* @param {Object} entry - The entry to validate.
* @param {Object} config - The configuration object.
* @returns {Promise<void>} - A promise that resolves once the validation is complete.
*/
async function validateMigration({ entry }, config) {
const links = await checkLinks(entry, config);
if (links) {
console.log(`Links Match: ${links.match}, ${links.unique.length} unique links found.`);
if (links.unique.length) {
config?.reporter.log('validation', 'error', 'Unique links found', { entry, count: links.unique.length });
console.table(links.unique);
}
} else {
console.log('Could not validate links');
}
}

/**
* Executes a bulk update operation using the provided migration function
* Loads data from various sources and executes bulk update operations from the migration function.
Expand All @@ -121,10 +102,8 @@ export default async function main(config, migrate, reporter = null) {
for (const [i, entry] of config.list.entries()) {
console.log(`Processing entry ${i + 1} of ${config.list.length} ${entry}`);
const document = await loadDocument(entry, config);
const success = await migrate(document);
if (success) {
await validateMigration(document, config);
}
await migrate(document);
await validateMigration(document, config);
}
} catch (e) {
console.error('Bulk Update Error:', e);
Expand Down
17 changes: 0 additions & 17 deletions bulk-update/document-manager/document-manager.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import fs from 'fs';
import { fetch, timeoutSignal, AbortError } from '@adobe/fetch';
import { mdast2docx } from '@adobe/helix-md2docx';
import parseMarkdown from '@adobe/helix-html-pipeline/src/steps/parse-markdown.js';
import { compare } from '../../link-check/linkCompare.js';

const delay = (milliseconds) => new Promise((resolve) => { setTimeout(resolve, milliseconds); });
const { pathname } = new URL('.', import.meta.url);
Expand All @@ -22,22 +21,6 @@ export function entryToPath(entry) {
return path;
}

/**
* Checks links against the original document.
*
* @param {string} entry - The entry to check the links for.
* @param {object} config - The configuration object.
* @returns {Promise<object>}
*/
export function checkLinks(entry, config) {
const output = `${config.outputDir}${entryToPath(entry)}.docx`;
const mdURL = `${config.siteUrl}${entry}.md`;

if (!fs.existsSync(output)) return false;

return compare(mdURL, output);
}

/**
* Fetches a markdown file from a given URL.
*
Expand Down
3 changes: 1 addition & 2 deletions bulk-update/migration-tools/select.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ import { select, selectAll } from 'unist-util-select';
* @param {string} str - The input block string.
* @returns {Object} - An object containing the block name and options.
*/
export const getBlockInfo = (str) => {
if (!str) return null;
export const getBlockInfo = (str = '') => {
const blockInfo = {};
const regex = /([\w\s-]+)\s*(?:\(([^)]*)\))?/;
const match = regex.exec(str.toLowerCase());
Expand Down
23 changes: 23 additions & 0 deletions bulk-update/validation/images.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/* eslint-disable import/prefer-default-export */
/**
* Checks the alt text of images in a markdown string.
*
* @param {string} markdown - The markdown string to check.
* @returns {string[]} - An array of URLs of images with missing alt text.
*/
export function checkAltText(markdown) {
const regex = /!\[(.*?)\]\((.*?)\)/g;
const matches = markdown.match(regex);
const missingAltTextUrls = [];

if (!matches) return missingAltTextUrls;

for (const match of matches) {
const [, altText, url] = match.match(/\[(.*?)\]\((.*?)\)/);
if (!altText && url.startsWith('http')) {
missingAltTextUrls.push(url);
}
}

return missingAltTextUrls;
}
62 changes: 62 additions & 0 deletions bulk-update/validation/links.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/**
* Compares two links and checks if they have the same host and pathname.
*
* @param {string} link1 - The first link to compare.
* @param {string} link2 - The second link to compare.
* @returns {boolean} - Returns true if the links have the same host and pathname, otherwise false.
*/
export function compareLink(link1, link2, site) {
const url1 = new URL(link1.trim(), site);
const url2 = new URL(link2.trim(), site);

return (url1.host === url2.host) && (url1.pathname === url2.pathname);
}
/**
* Extracts links from markdown content.
*
* @param {string} content - The markdown content.
* @returns {string[]} - An array of links extracted from the content.
*/
export function extractLinks(content) {
const regex = /\[.*?\]\((.*?)\)/g;
const links = [];
let match = regex.exec(content);
while (match !== null) {
const link = match[1];
if (link.startsWith('http')) {
links.push(link);
}
match = regex.exec(content);
}
return links;
}

/**
* Compares two arrays of links and returns an object indicating if they match and the unique links.
*
* @param {Array} links1 - The first array of links.
* @param {Array} links2 - The second array of links.
* @returns {Promise<object>} - Match status and unique links.
*/
export function compareLinks(links1, links2) {
const result = { match: false, unique: [] };

result.links = links1.map((link1, index) => {
const link2 = links2[index];
const match = (link1 && link2) ? compareLink(link1, link2) : false;

return { link: index, link1, link2, match };
});

result.unique = result.links.filter((link) => !link.match);
result.match = result.unique.length === 0;

return result;
}

export function compareMarkdown(content1, content2, site = 'https://business.adobe.com/') {
const links1 = extractLinks(content1);
const links2 = extractLinks(content2);

return compareLinks(links1, links2, site);
}
46 changes: 46 additions & 0 deletions bulk-update/validation/validation.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import fs from 'fs';
import { docx2md } from '@adobe/helix-docx2md';
import { entryToPath } from '../document-manager/document-manager.js';
import { compareMarkdown } from './links.js';
import { checkAltText } from './images.js';

export function checkLinks(md, markdown, reporter, entry) {
const links = compareMarkdown(md, markdown);

if (links) {
console.log(`Links Match: ${links.match}, ${links.unique.length} unique links found.`);
if (links.unique.length) {
reporter?.log('validation', 'error', 'Unique links found', { entry, count: links.unique.length });
console.table(links.unique);
}
} else {
console.log('Could not validate links');
}
}

export function checkImages(md, markdown, reporter, entry) {
const missingAltText = checkAltText(md, markdown);
console.log(`Images Missing Alt Text: ${missingAltText.length}`);
if (missingAltText.length > 0) {
reporter?.log('validation', 'error', 'Missing alt text', { entry, count: missingAltText.length });
console.log(missingAltText);
}
}

export async function validateMigration(document, config) {
const { markdown, entry } = document;
const { reporter, outputDir } = config;
const output = `${outputDir}${entryToPath(entry)}.docx`;

if (!fs.existsSync(output)) return;

try {
const docx = await fs.promises.readFile(output);
const outputMd = await docx2md(docx, { listener: null });
checkLinks(outputMd, markdown, reporter, entry);
checkImages(outputMd, markdown, reporter, entry);
} catch (error) {
console.error('Error validating migration:', error);
reporter?.log('validation', 'error', 'Error validating migration', { entry, error: error.message });
}
}
46 changes: 18 additions & 28 deletions link-check/linkCompare.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import { docx2md } from '@adobe/helix-docx2md';
* @returns {boolean} - Returns true if the links have the same host and pathname, otherwise false.
*/
export function compareLink(link1, link2) {
if (!link1 || !link2) return false;
const url1 = new URL(link1.trim(), 'https://business.adobe.com/');
const url2 = new URL(link2.trim(), 'https://business.adobe.com/');

Expand Down Expand Up @@ -49,54 +48,47 @@ function getFileType(source) {

return source.split('.').pop() || null;
}

/**
* Extracts links from content based on a given regex pattern.
* Extracts links from markdown content.
*
* @param {string} content - The content to extract links from.
* @param {RegExp} regex - The regex pattern to match links.
* @param {string} content - The markdown content.
* @returns {string[]} - An array of links extracted from the content.
*/
function findLinks(content, regex, i) {
function extractLinksFromMarkdown(content) {
const regex = /\[.*?\]\((.*?)\)/g;
const links = [];
let match = regex.exec(content);
while (match !== null) {
const link = match[i];
if (link.startsWith('http')) {
links.push(link);
}
links.push(match[1]);
match = regex.exec(content);
}
return links;
}

/**
* Extracts links from markdown content.
*
* @param {string} content - The markdown content.
* @returns {string[]} - An array of links extracted from the content.
*/
export function extractLinksFromMarkdown(content) {
const regex = /\[.*?\]\((.*?)\)/g;
return findLinks(content, regex, 1);
}

/**
* Extracts links from HTML content.
*
* @param {string} content - The HTML content.
* @returns {string[]} - An array of links extracted from the content.
*/
export function extractLinksFromHtml(content) {
function extractLinksFromHtml(content) {
const regex = /<a\s+(?:[^>]*?\s+)?href=(["'])(.*?)\1/g;
return findLinks(content, regex, 2);
const links = [];
let match = regex.exec(content);
while (match !== null) {
links.push(match[2]);
match = regex.exec(content);
}
return links;
}

/**
* Extracts links from a source based on its file type.
*
* @param {string} source - The source URL or file path.
* @param {Function} [fetchFn=fetch] - The function used to fetch the content from the source.
* @returns {Promise<string[]>} - An array of links extracted from the source.
* @param {string} content - The content of the source.
* @returns {string[]} - An array of links extracted from the source.
* @throws {Error} - Throws an error if the file type is unsupported.
*/
export async function extractLinks(source, fetchFn = fetch) {
Expand Down Expand Up @@ -128,14 +120,12 @@ export async function extractLinks(source, fetchFn = fetch) {
* @param {Array} links2 - The second array of links.
* @returns {Promise<object>} - Match status and unique links.
*/
export function compareLinks(links1, links2) {
export async function compareLinks(links1, links2) {
const result = { match: false, unique: [] };

result.links = links1.map((link1, index) => {
const link2 = links2[index];
const match = compareLink(link1, link2);

return { link: index, link1, link2, match };
return { index, link1, link2, match: compareLink(link1, link2) };
});

result.unique = result.links.filter((link) => !link.match);
Expand Down
4 changes: 2 additions & 2 deletions test/bulk-update/bulk-update.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ describe('BulkUpdater', () => {
],
}),
});
const data = await loadListData('https://main--bacom--adobecom.hlx.test/query-index.json', stubFetch);
const data = await loadListData('https://main--bacom--adobecom.hlx.test/query-index.json', stubFetch, 0);

expect(data).to.be.an('array');
expect(data.length).to.equal(1);
Expand All @@ -55,7 +55,7 @@ describe('BulkUpdater', () => {
],
}),
});
const data = await loadListData(`${pathname}mock/query-indexes.json`, stubFetch);
const data = await loadListData(`${pathname}mock/query-indexes.json`, stubFetch, 0);

expect(data).to.be.an('array');
expect(data).to.deep.equal(['/test/path1', '/test/path1']);
Expand Down
Loading

0 comments on commit 77b142a

Please sign in to comment.