Skip to content
This repository has been archived by the owner on Feb 13, 2025. It is now read-only.

MWPW-147240: Link validator #39

Merged
merged 9 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions test/validation/link-validator.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import fs from 'fs';
import { expect } from '@esm-bundle/chai';
import {
getLinksLists,
compareLinkLists,
LINKS_MATCH,
LINKS_DO_NOT_MATCH,
LENGTHS_DO_NOT_MATCH,
validateMigratedPageLinks,
} from '../../validation/link-validator.js';
import { getMdast } from '../../bulk-update/document-manager/document-manager.js';
import { ExcelReporter } from '../../bulk-update/index.js';

describe('Validator', () => {
const sourceMd = fs.readFileSync('test/validation/mocks/adobe-experience-manager-source.md', 'utf-8');
const updatedMd = fs.readFileSync('test/validation/mocks/adobe-experience-manager-updated.md', 'utf-8');
const mismatchMd = fs.readFileSync('test/validation/mocks/adobe-experience-manager-updated-mismatched.md', 'utf-8');
const shuffledMd = fs.readFileSync('test/validation/mocks/adobe-experience-manager-shuffled.md', 'utf-8');

it('Returns "all links match" based on link match', async () => {
const sourceMdast = await getMdast(sourceMd);
const updatedMdast = await getMdast(updatedMd);
const { sourceLinks, updatedLinks } = await getLinksLists(sourceMdast, updatedMdast);

const message = compareLinkLists(sourceLinks, updatedLinks);
expect(message[1]).to.equal(LINKS_MATCH);
});

it('Returns "link mismatch mapping" based on link mismatch', async () => {
const sourceMdast = await getMdast(sourceMd);
const mismatchedMdast = await getMdast(mismatchMd);
const { sourceLinks, updatedLinks } = await getLinksLists(sourceMdast, mismatchedMdast);

const message = compareLinkLists(sourceLinks, updatedLinks);
const mismatchHash = message[3].log['hashMatch-5'];
const mismatchPath = message[3].log['pathMatch-6'];
const mismatchSearch = message[3].log['searchMatch-7'];
const mismatchHost = message[3].log['hostMatch-8'];
const mismatchText = message[3].log['textMatch-9'];
expect(message[1]).to.equal(LINKS_DO_NOT_MATCH);
expect(mismatchHash).to.be.false;
expect(mismatchPath).to.be.false;
expect(mismatchSearch).to.be.false;
expect(mismatchHost).to.be.false;
expect(mismatchText).to.be.false;
});

it('Returns "source and updated list do not have the same length" when the files have different link numbers', async () => {
const sourceMdast = await getMdast(sourceMd);
const shuffledMdast = await getMdast(shuffledMd);
const { sourceLinks, updatedLinks } = await getLinksLists(sourceMdast, shuffledMdast);

const message = compareLinkLists(sourceLinks, updatedLinks);
expect(message[2]).to.equal(LENGTHS_DO_NOT_MATCH);
});

it('valiates the migration', async () => {
const pathToListShort = 'test/validation/mocks/list.json';
const mdPath = 'test/validation/mocks/md';
const { pathname } = new URL('.', import.meta.url);
const dateString = ExcelReporter.getDateString();
const myReporter = new ExcelReporter(`${pathname}validation-${dateString}.xlsx`, false);

await validateMigratedPageLinks(pathToListShort, mdPath, myReporter);
const report = myReporter.getReport();
console.log(report);
expect(Object.keys(report.logs).length).to.equal(2);
expect(report.logs['Compare Links'].length).to.equal(3);
expect(report.logs['Deep Compare Links'].length).to.equal(2);
// Uncomment to troubleshoot report
// myReporter.saveReport();
});
});
235 changes: 235 additions & 0 deletions test/validation/mocks/adobe-experience-manager-shuffled.md

Large diffs are not rendered by default.

235 changes: 235 additions & 0 deletions test/validation/mocks/adobe-experience-manager-source.md

Large diffs are not rendered by default.

243 changes: 243 additions & 0 deletions test/validation/mocks/adobe-experience-manager-updated-mismatched.md

Large diffs are not rendered by default.

243 changes: 243 additions & 0 deletions test/validation/mocks/adobe-experience-manager-updated.md

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions test/validation/mocks/list.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[
"/advertising",
"/affiliate-marketing-guide",
"/affinity-diagram-guide",
"/agile-daily-stand-up",
"/agile-development"
]
21 changes: 21 additions & 0 deletions validation/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Post Bulk Update Link Validation

Validation tool to test the integrity of links pre and post bulk update and user preview.

The tool takes the list of paths provided asnd uses it to check the source and updated md
folders that should contain md files pulled down from query-log.json files.

The validator differs from the link validation done during bulk updating by checking two different
folders that should be representative of md files from different stages of the bulk update process.
To ensure link integraty, the "updated" md folder should contain MDs that are either post update, post
Sharepoint opened, or both.

## Usage

Run the migration script directly, ensuring to set the path:

```bash
node validation/link-validator.js {path to list.json} {path to md directory}

example: node validation/link-validator.js ./blog-test/output/list.json ./blog-test/md
```
172 changes: 172 additions & 0 deletions validation/link-validator.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
/* eslint-disable no-continue */
import fs from 'fs';
import { selectAll } from 'unist-util-select';
import { ExcelReporter, loadListData } from '../bulk-update/index.js';
import { getMdast } from '../bulk-update/document-manager/document-manager.js';

export const LINKS_MATCH = 'all links match';
export const LINKS_DO_NOT_MATCH = 'links mismatch mapping';
export const LENGTHS_DO_NOT_MATCH = 'source and updated list do not have the same length';

/**
*
* @param {mdast} sourceMd
* @param {mdast} updatedMd
* @returns {Object} an object with mdast collections of links from both files
*/
export async function getLinksLists(sourceMdast, updatedMdast) {
return {
sourceLinks: selectAll('link', sourceMdast),
updatedLinks: selectAll('link', updatedMdast),
};
}

/**
* Checks the source and update link lists and compares to find the differences
*
* @param {list of mdast link nodes} sourceLinks
* @param {list of mdast link nodes} updateLinks
* @param {path to the file} entry
* @returns Log messages for the reporter based on findings
*/
export function deepCompare(sourceLinks, updateLinks, path) {
console.log(`Deep comparing links on source and updated files at this path: ${path}`);
const linkLog = {};

sourceLinks.forEach((link, index) => {
const updateLink = updateLinks[index];

let sourceUrl;
let updateUrl;
try {
sourceUrl = new URL(link.url);
} catch (e) {
sourceUrl = false;
linkLog[`relative-source-link-${index}`] = link.url;
linkLog[`relative-source-text-${index}`] = link?.children[0].value;
}
try {
updateUrl = new URL(updateLink.url);
} catch (e) {
updateUrl = false;
linkLog[`relative-update-link-${index}`] = link.url;
linkLog[`relative-update-text-${index}`] = link?.children[0].value;
}

// Partial matches are not fully qualified urls
if (!sourceUrl || !updateUrl) {
linkLog[`partialUrlMatch-${index}`] = link.url === updateLink.url;
}

linkLog[`sourceLink-${index}`] = link.url;
linkLog[`updatedLink-${index}`] = link.children[0].value;
linkLog[`sourceText-${index}`] = updateLinks[index].url;
linkLog[`updatedText-${index}`] = updateLinks[index]?.children[0]?.value;
linkLog[`linksMatch-${index}`] = link.url === updateLink.url;
linkLog[`hashMatch-${index}`] = sourceUrl ? sourceUrl.hash === updateUrl.hash : '';
linkLog[`hostMatch-${index}`] = sourceUrl ? sourceUrl.host === updateUrl.host : '';
linkLog[`pathMatch-${index}`] = sourceUrl ? sourceUrl.pathname === updateUrl.pathname : '';
linkLog[`searchMatch-${index}`] = sourceUrl ? sourceUrl.search === updateUrl.search : '';
linkLog[`textMatch-${index}`] = link?.children[0]?.value === updateLink?.children[0]?.value;
});

return ['Deep Compare Links', LINKS_DO_NOT_MATCH, path, { log: linkLog }];
}

/**
* Does an initial check for link matching, if it finds issue, it runs the deepCompare function
*
* @param {list of source links} sourceLinks
* @param {list of updated links} updatedLinks
* @param {path to files} path
* @returns Returns a message for reporter
*/
export function compareLinkLists(sourceLinks, updatedLinks, path) {
console.log(`Comparing source and update files at this path: ${path}`);
// If not the same length, something is wrong from the start
if (sourceLinks.length !== updatedLinks.length) {
return ['Compare Links', 'list length', LENGTHS_DO_NOT_MATCH];
}

const linksMatch = !sourceLinks.map((link, i) => {
const updated = updatedLinks[i];
return link.url === updated.url
&& link?.children[0]?.value === updated?.children[0]?.value;
}).includes(false);

if (!linksMatch) {
return deepCompare(sourceLinks, updatedLinks, path);
}

return ['Compare Links', LINKS_MATCH, path];
}

/**
* Runs the primary migration and returns a message for reporter.
* Additional logic for code failing conditions
*
* @param {list of paths} list
* @param {path to md files} mdPath
* @param {ExcelReporter} reporter
*/
export async function validateMigratedPageLinks(list, mdPath, reporter) {
const listData = await loadListData(list);

for (const path of listData) {
const pathToSourceMd = path.endsWith('/') ? `${mdPath}/source${path}index.md` : `${mdPath}/source${path}.md`;
const pathToUpdateMd = path.endsWith('/') ? `${mdPath}/updated${path}index.md` : `${mdPath}/updated${path}.md`;

let sourceMd;
let updatedMd;
try {
sourceMd = fs.readFileSync(pathToSourceMd, 'utf-8');
} catch (e) {
console.log(`File does not exist at provided path: ${pathToSourceMd}`);
reporter.log('Error', 'File does not exist at provided path:', pathToSourceMd);
continue;
}
try {
updatedMd = fs.readFileSync(pathToUpdateMd, 'utf-8');
} catch (e) {
console.log(`File does not exist at provided path: ${pathToUpdateMd}`);
reporter.log('Error', 'File does not exist at provided path', pathToUpdateMd);
continue;
}

const sourceMdast = await getMdast(sourceMd);
const updatedMdast = await getMdast(updatedMd);
const { sourceLinks, updatedLinks } = await getLinksLists(sourceMdast, updatedMdast);
const message = compareLinkLists(sourceLinks, updatedLinks, path);

reporter.log(message[0], message[1], message[2], message[3]?.log);
}
}

/**
* Set up reporter and save
*
* @param {list of paths} listPath
* @param {path to md files} mdPath
*/
export async function main(listPath, mdPath) {
const { pathname } = new URL('.', import.meta.url);
const dateString = ExcelReporter.getDateString();
const myReporter = new ExcelReporter(`${pathname}validation-${dateString}.xlsx`, false);

await validateMigratedPageLinks(listPath, mdPath, myReporter);
myReporter.generateTotals();
myReporter.saveReport();
}

export async function init(list, mdPath) {
await main(list, mdPath);
}

// test values ./blog-test/output/list.json', './blog-test/md'
if (import.meta.url === `file://${process.argv[1]}`) {
const args = process.argv.slice(2);
const [list, mdPath] = args;

await init(list, mdPath);
process.exit(0);
}
Loading