Skip to content
This repository has been archived by the owner on Feb 13, 2025. It is now read-only.

Commit

Permalink
MWPW-144868 Card Metadata Refinement
Browse files Browse the repository at this point in the history
  • Loading branch information
Brandon32 committed Mar 20, 2024
1 parent 1346fe0 commit b3e0bba
Show file tree
Hide file tree
Showing 11 changed files with 201 additions and 45 deletions.
54 changes: 35 additions & 19 deletions blog-caas/migration.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,23 @@ const dateString = ExcelReporter.getDateString();
const missingTags = {};

const config = {
list: `${pathname}list.txt`,
list: [
'https://main--bacom-blog--adobecom.hlx.live/de/blog/query-index.json',
'https://main--bacom-blog--adobecom.hlx.live/fr/blog/query-index.json',
'https://main--bacom-blog--adobecom.hlx.live/au/blog/query-index.json',
'https://main--bacom-blog--adobecom.hlx.live/uk/blog/query-index.json',
'https://main--bacom-blog--adobecom.hlx.live/blog/query-index.json',
'https://main--bacom-blog--adobecom.hlx.live/jp/blog/query-index.json',
'https://main--bacom-blog--adobecom.hlx.live/kr/blog/query-index.json',
],
siteUrl: 'https://main--bacom-blog--adobecom.hlx.live',
prodSiteUrl: 'https://business.adobe.com',
reporter: new ExcelReporter(`${pathname}reports/caas-${dateString}.xlsx`, false),
outputDir: `${pathname}output`,
mdDir: `${pathname}md`,
mdCacheMs: 1 * 24 * 60 * 60 * 1000, // 1 day(s)
fetchWaitMs: 20,
validateMigration: !DRY_RUN,
};

function loadCaasMappings() {
Expand Down Expand Up @@ -78,7 +87,7 @@ function mapTags(metadata, entry) {
* @returns {Object} - The card metadata object.
*/
export function getCardMetadata(mdast, entry) {
const cardMetadata = { ContentType: 'blog' };
const cardMetadata = { };
const pageTitle = select('heading[depth="1"] text', mdast);
const pageImage = select('image', mdast);
const metadataBlock = selectBlock(mdast, 'metadata');
Expand All @@ -99,8 +108,13 @@ export function getCardMetadata(mdast, entry) {
cardMetadata.Title = metadata.Title;
}
if (metadata['Publication Date']) {
const [date] = new Date(metadata['Publication Date']).toISOString().split('T');
cardMetadata.CardDate = date;
try {
const publicationDate = new Date(metadata['Publication Date']);
const [date] = publicationDate.toISOString().split('T');
cardMetadata.CardDate = date;
} catch (error) {
config.reporter?.log('Card Metadata', 'Error', 'Error parsing publication date', { Value: metadata['Publication Date'], entry });
}
}
if (metadata.Description) cardMetadata.CardDescription = metadata.Description;

Expand Down Expand Up @@ -138,13 +152,12 @@ export function createBlock(name, fields) {
*/
export function createCardMetadataBlock(cardMetadata) {
const fields = [
[u('text', 'Title'), u('text', cardMetadata.Title)],
[u('text', 'CardDate'), u('text', cardMetadata.CardDate)],
[u('text', 'Title'), u('text', cardMetadata.Title ?? '')],
[u('text', 'CardDate'), u('text', cardMetadata.CardDate ?? '')],
[u('text', 'CardImage'), cardMetadata.CardImage],
[u('text', 'CardImageAltText'), u('text', cardMetadata.CardImageAltText)],
[u('text', 'CardDescription'), u('text', cardMetadata.CardDescription)],
[u('text', 'ContentType'), u('text', cardMetadata.ContentType)],
[u('text', 'primaryTag'), u('text', cardMetadata.PrimaryTag)],
[u('text', 'CardImageAltText'), u('text', cardMetadata.CardImageAltText ?? '')],
[u('text', 'CardDescription'), u('text', cardMetadata.CardDescription ?? '')],
[u('text', 'primaryTag'), u('text', cardMetadata.PrimaryTag ?? '')],
[u('text', 'Tags'), u('text', cardMetadata.Tags.join(', '))],
];

Expand All @@ -162,7 +175,7 @@ export function createCardMetadataBlock(cardMetadata) {
* @returns {bool} - True if the metadata is valid, false otherwise.
*/
export function validateCardMetadata(cardMetadata, reporter, entry) {
const requiredFields = ['Title', 'CardDescription', 'CardDate', 'Tags', 'CardImage'];
const requiredFields = ['Title', 'CardDate', 'Tags', 'CardImage'];
let valid = true;

const missingFields = requiredFields.filter((field) => !cardMetadata[field]);
Expand All @@ -172,7 +185,7 @@ export function validateCardMetadata(cardMetadata, reporter, entry) {
}

if (cardMetadata.CardDate && !/^\d{4}-\d{2}-\d{2}$/.test(cardMetadata.CardDate)) {
reporter?.log('Card Metadata', 'Error', 'Card Date should be in YYYY-MM-DD format.', { Value: cardMetadata.CardDate, entry });
reporter?.log('Card Metadata', 'Error', 'Card Date should be in YYYY-MM-DD format.', { Value: cardMetadata.CardDate, entry, CardDate: cardMetadata.CardDate });
valid = false;
}

Expand All @@ -182,15 +195,16 @@ export function validateCardMetadata(cardMetadata, reporter, entry) {

const { Title, CardDescription } = cardMetadata;
if (Title && Title.length > MAX_CARD_TITLE_LENGTH) {
reporter?.log('Card Metadata', 'Error', `Card Title should be a maximum of ${MAX_CARD_TITLE_LENGTH} characters.`, { Value: Title, entry });
valid = false;
reporter?.log('Card Metadata', 'Warning', `Card Title should be a maximum of ${MAX_CARD_TITLE_LENGTH} characters.`, { Value: Title, entry });
}

if (CardDescription && CardDescription.length > MAX_CARD_DESCRIPTION_LENGTH) {
reporter?.log('Card Metadata', 'Error', `Card Description should be a maximum of ${MAX_CARD_DESCRIPTION_LENGTH} characters.`, { Value: CardDescription, entry });
valid = false;
reporter?.log('Card Metadata', 'Warning', `Card Description should be a maximum of ${MAX_CARD_DESCRIPTION_LENGTH} characters.`, { Value: CardDescription, entry });
} else if (CardDescription && CardDescription.length > WARNING_CARD_DESCRIPTION_LENGTH) {
reporter?.log('Card Metadata', 'Warning', `Card Description exceeds ${WARNING_CARD_DESCRIPTION_LENGTH} characters.`, { Value: CardDescription, entry });
reporter?.log('Card Metadata', 'Info', `Card Description exceeds ${WARNING_CARD_DESCRIPTION_LENGTH} characters.`, { Value: CardDescription, entry });
}
if (!CardDescription) {
reporter?.log('Missing Description', 'Info', 'Card Description is required', { entry, CardDate: cardMetadata.CardDate });
}

return valid;
Expand All @@ -210,7 +224,9 @@ export async function migrate(document) {
}

const cardMetadata = getCardMetadata(mdast, entry);
validateCardMetadata(cardMetadata, config.reporter, entry);
const isValid = validateCardMetadata(cardMetadata, config.reporter, entry);
if (!isValid) return false;

const cardMetadataBlock = createCardMetadataBlock(cardMetadata);
mdast.children.push(cardMetadataBlock);
if (DRY_RUN) {
Expand All @@ -232,7 +248,7 @@ export async function migrate(document) {
*/
export async function init(list) {
const entryList = await loadListData(list || config.list);
config.list = entryList;
config.list = entryList.filter((entry) => entry && !entry.match(/^\/\w{0,2}\/?blog\/$/));

return config;
}
Expand Down
8 changes: 6 additions & 2 deletions bulk-update/bulk-update.js
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,17 @@ export async function loadListData(source, fetchFunction = fetch, fetchWaitMs =
*/
export default async function main(config, migrate, reporter = null) {
config.reporter = reporter || config.reporter;
const { length } = config.list;

try {
for (const [i, entry] of config.list.entries()) {
console.log(`Processing entry ${i + 1} of ${config.list.length} ${entry}`);
const percentage = Math.round(((i + 1) / length) * 10000) / 100;
console.log(`Processing entry ${i + 1} of ${length} (${percentage}%) ${entry}`);
const document = await loadDocument(entry, config);
await migrate(document);
await validateMigration(document, config);
if (config.validateMigration) {
await validateMigration(document, config);
}
}
} catch (e) {
console.error('Bulk Update Error:', e);
Expand Down
2 changes: 2 additions & 0 deletions bulk-update/document-manager/document-manager.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import fs from 'fs';
import { fetch, timeoutSignal, AbortError } from '@adobe/fetch';
import { mdast2docx } from '@adobe/helix-md2docx';
import parseMarkdown from '@adobe/helix-html-pipeline/src/steps/parse-markdown.js';
import validateMdast from '../validation/mdast.js';

const delay = (milliseconds) => new Promise((resolve) => { setTimeout(resolve, milliseconds); });
const { pathname } = new URL('.', import.meta.url);
Expand Down Expand Up @@ -177,6 +178,7 @@ export async function loadDocument(entry, config, fetchFunction = fetch) {
async function saveDocx(mdast, output) {
const outputFolder = output.split('/').slice(0, -1).join('/');
fs.mkdirSync(outputFolder, { recursive: true });
validateMdast(mdast);

const stylesXML = fs.readFileSync(`${pathname}styles.xml`, 'utf8');
const buffer = await mdast2docx(mdast, { stylesXML });
Expand Down
3 changes: 2 additions & 1 deletion bulk-update/index.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import { saveDocument } from './document-manager/document-manager.js';
import { loadDocument, saveDocument } from './document-manager/document-manager.js';

Check warning on line 1 in bulk-update/index.js

View workflow job for this annotation

GitHub Actions / Running eslint

[eslint] reported by reviewdog 🐶 File ignored because of a matching ignore pattern. Use "--no-ignore" to override. Raw Output: {"ruleId":null,"fatal":false,"severity":1,"message":"File ignored because of a matching ignore pattern. Use \"--no-ignore\" to override.","nodeType":null}
import ConsoleReporter from './reporter/console-reporter.js';
import ExcelReporter from './reporter/excel-reporter.js';
import BulkUpdate, { loadListData } from './bulk-update.js';

export {
BulkUpdate,
loadDocument,
saveDocument,
ConsoleReporter,
ExcelReporter,
Expand Down
4 changes: 2 additions & 2 deletions bulk-update/migration-tools/select.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ export const getBlockInfo = (str = '') => {
const blockInfo = {};
const regex = /([\w\s-]+)\s*(?:\(([^)]*)\))?/;
const match = regex.exec(str.toLowerCase());
const [, blockName, optionsRaw] = match.map((t) => (t ? t.trim() : undefined));
const [, blockName, optionsRaw] = match?.map((t) => (t ? t.trim() : undefined)) ?? [];

blockInfo.blockName = blockName.replace(/\s+/g, '-');
blockInfo.blockName = blockName ? blockName.replace(/\s+/g, '-') : '';
blockInfo.options = optionsRaw ? optionsRaw.split(',').map((option) => option.trim()) : [];
blockInfo.variant = blockInfo.options.length > 0 ? `${blockInfo.blockName} (${blockInfo.options.join(', ')})` : blockInfo.blockName;

Expand Down
31 changes: 31 additions & 0 deletions bulk-update/validation/mdast.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { visitParents } from 'unist-util-visit-parents';
import { select } from 'unist-util-select';
import { getBlockInfo } from '../migration-tools/select.js';

/**
* Maps the ancestors array and returns an array of ancestor types.
* If the ancestor type is 'gridTable', it finds the first text node in the table
* and extracts the block variant from it.
*
* @param {Array} ancestors - The array of ancestors to map.
* @returns {Array} - The array of mapped ancestor types.
*/
export const mapAncestors = (ancestors) => ancestors.map((ancestor) => {
if (ancestor.type !== 'gridTable') {
return ancestor.type;
}
// find the first text node in the table
const cell = select('text', ancestor);
const { variant } = getBlockInfo(cell.value);

return `${ancestor.type} '${variant}'`;
});

export default function validateMdast(mdast) {
visitParents(mdast, 'text', (node, ancestors) => {
if (!node.value) {
const structure = `${mapAncestors(ancestors).join(' > ')} > ${node.type}`;
throw new Error(`Invalid text node ${structure}: ${JSON.stringify(node)}`);
}
});
}
3 changes: 1 addition & 2 deletions migration-example/custom-migration.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { loadDocument, saveDocument } from '../bulk-update/document-manager/document-manager.js';
import ConsoleReporter from '../bulk-update/reporter/console-reporter.js';
import { ConsoleReporter, loadDocument, saveDocument } from '../bulk-update/index.js';

/**
* Example Migration, run using `node migration-example/custom-migration.js`
Expand Down
67 changes: 48 additions & 19 deletions migration-example/migration.js
Original file line number Diff line number Diff line change
@@ -1,45 +1,74 @@
import ExcelReporter from '../bulk-update/reporter/excel-reporter.js';
import { saveDocument } from '../bulk-update/document-manager/document-manager.js';
/**
* Example Migration using the bulk-update library.
*/

import { BulkUpdate, ExcelReporter, saveDocument } from '../bulk-update/index.js';

const { pathname } = new URL('.', import.meta.url);
const config = {
list: ['/'], // The list of entries to migrate
siteUrl: 'https://main--bacom--adobecom.hlx.live', // The site URL
reporter: new ExcelReporter(`${pathname}reports/example.xlsx`), // The logging type
outputDir: `${pathname}output`, // The output directory for the docx files
list: ['/'], // The list of entries to migrate.
siteUrl: 'https://main--bacom--adobecom.hlx.live', // The site URL.
reporter: new ExcelReporter(`${pathname}reports/example.xlsx`, true), // The logging type, save location and autosave.
outputDir: `${pathname}output`, // The output directory for the docx files.
mdDir: `${pathname}md`, // The directory for storing the fetched markdown.
mdCacheMs: 0, // The markdown cache time in milliseconds.
};

/**
* Example Migration, run using `npm run bulk-update 'migration-example'`
* Adds a "Hello World" heading to the given mdast.
*
* @param {Object} mdast - The mdast object to modify.
*/
function addHelloWorld(mdast, entry) {
const helloWorld = {
type: 'heading',
depth: 1,
children: [
{
type: 'text',
value: 'Hello World',
},
],
};

mdast.children.unshift(helloWorld);

// Log the migration to the hello world tab with a status of success.
config.reporter.log('hello world', 'success', 'Added Hello World', { entry });
}

/**
* Run using `npm run bulk-update 'migration-example'`
*
* @returns {Object} - The configuration object for the migration.
*/
export function init() {
// Any file path filtering of the list can be done here.
return config;
}

/**
* Example Migration
*
* @param {Object} document - The document to be migrated.
* @param {string} document.entry - The entry path of the document.
* @param {Object} document.mdast - The Markdown AST of the document.
*/
export async function migrate(document) {
const { mdast } = document;
const { mdast, entry } = document;
// Additional filtering base on content can be done here.

mdast.children.unshift({
type: 'heading',
depth: 1,
children: [
{
type: 'text',
value: 'Hello World',
},
],
});
// Helper functions, add a heading to the document.
addHelloWorld(mdast, entry);

config.reporter.log('hello world', 'success', 'Added Hello World', document.entry);
// Save the document after migrating.
await saveDocument(document, config);
}

/**
* Run using `node migration-example/custom-migration.js`
*/
if (import.meta.url === `file://${process.argv[1]}`) {
await BulkUpdate(config, migrate);
process.exit();
}
19 changes: 19 additions & 0 deletions migration-example/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Hello World Migration

## Description

This migration adds a "hello world" header to the top of the document.
This is an example of using the bulk update library located at `bulk-update/index.js` to perform bulk updates.

## Usage

Run the migration script using the command `npm run bulk-update 'migration-example'` or `node migration-example/custom-migration.js`.

## Overview

The `migration-example/migration.js` script is responsible for setting up the configuration object and performing the actual migration.
The `init` function returns the configuration object for the migration, and the `migrate` function performs the actual migration.

## Custom Migrations

For complete control over the migration process, you can create a custom migration script. The `migration-example/custom-migration.js` script is an example of this.
1 change: 1 addition & 0 deletions test/bulk-update/migration-tools/select.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ describe('Select', () => {
['Block Name', { blockName: 'block-name', options: [], variant: 'block-name' }],
['Block Name(option1)', { blockName: 'block-name', options: ['option1'], variant: 'block-name (option1)' }],
['Block Name(option1, Option2)', { blockName: 'block-name', options: ['option1', 'option2'], variant: 'block-name (option1, option2)' }],
['', { blockName: '', options: [], variant: '' }],
];

tests.forEach(([input, expectedOutput]) => {
Expand Down
Loading

0 comments on commit b3e0bba

Please sign in to comment.