Skip to content
This repository has been archived by the owner on Feb 13, 2025. It is now read-only.

Commit

Permalink
MWPW-147512 Markdown Downloader
Browse files Browse the repository at this point in the history
  • Loading branch information
Brandon32 committed May 7, 2024
1 parent 5abf00c commit 4b81064
Show file tree
Hide file tree
Showing 6 changed files with 292 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
},
{
"type": "node-terminal",
"name": "Test File",
"name": "Test Current File",
"request": "launch",
"command": "npm run test ${file}",
"cwd": "${workspaceFolder}"
Expand Down
19 changes: 15 additions & 4 deletions bulk-update/bulk-update.js
Original file line number Diff line number Diff line change
Expand Up @@ -87,19 +87,30 @@ export async function loadListData(source, fetchFunction = fetch, fetchWaitMs =
}

/**
* Generates the staged-content URL by localizing the stage path based on the entry path.
* Generates the staged-content path by localizing the stage path based on the entry path.
*
* @param {string} siteUrl - The base URL of the site.
* @param {string} entry - The entry path.
* @param {string} stagePath - The path to the stage.
* @param {string[]} locales - An array of supported locales.
* @returns {string} The staged URL.
*/
export function localizeStageUrl(siteUrl, entry, stagePath = '', locales = []) {
export function localizeStagePath(entry, stagePath = '', locales = []) {
const currentLocale = locales.find((locale) => locale && entry.startsWith(`/${locale}/`));
const localizedPath = currentLocale ? entry.replace(`/${currentLocale}/`, `/${currentLocale}${stagePath}/`) : `${stagePath}${entry}`;
return localizedPath.replace(/\/+/g, '/');
}

return `${siteUrl}${localizedPath}`;
/**
* Generates the staged-content URL by localizing the stage path based on the entry path.
*
* @param {string} siteUrl - The base URL of the site.
* @param {string} entry - The entry path.
* @param {string} stagePath - The path to the stage.
* @param {string[]} locales - An array of supported locales.
* @returns {string} The staged URL.
*/
export function localizeStageUrl(siteUrl, entry, stagePath = '', locales = []) {
return siteUrl + localizeStagePath(entry, stagePath, locales);
}

/**
Expand Down
15 changes: 7 additions & 8 deletions bulk-update/document-manager/document-manager.js
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,14 @@ function loadMarkdownFromFile(markdownFile, mdCacheMs) {
}

/**
* Saves the provided markdown content to a file.
* Saves the provided content to a file.
*
* @param {string} markdownFile - The path of the markdown file to save.
* @param {string} markdown - The markdown content to be saved.
* @param {string} file - The path of the file to save.
* @param {string} content - The content to be saved.
*/
function saveMarkdownToFile(markdownFile, markdown) {
const folder = markdownFile.split('/').slice(0, -1).join('/');
fs.mkdirSync(folder, { recursive: true });
fs.writeFileSync(markdownFile, markdown);
export function saveToFile(file, content) {
fs.mkdirSync(path.dirname(file), { recursive: true });
fs.writeFileSync(file, content);
}

/**
Expand Down Expand Up @@ -164,7 +163,7 @@ export async function loadDocument(entry, config, fetchFunction = fetch) {
document.markdown = await fetchMarkdown(`${document.url}.md`, reporter, fetchWaitMs, fetchFunction);

if (document.markdown) {
if (mdDir) saveMarkdownToFile(document.markdownFile, document.markdown);
if (mdDir) saveToFile(document.markdownFile, document.markdown);
document.mdast = getMdast(document.markdown, reporter);
reporter.log('load', 'success', 'Fetched entry', { entry });

Expand Down
172 changes: 172 additions & 0 deletions download-markdown/download-markdown.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import path from 'path';
import fs from 'fs';
import { fetch, timeoutSignal, AbortError } from '@adobe/fetch';
import { saveToFile, entryToPath } from '../bulk-update/document-manager/document-manager.js';
import { localizeStageUrl } from '../bulk-update/bulk-update.js';

const delay = (milliseconds) => new Promise((resolve) => { setTimeout(resolve, milliseconds); });

const ALLOW_SKIP = true; // Allow skipping files that already exist
const PAGE_DELAY = 500; // 500ms delay for fetching from hlx.page
const LIVE_DELAY = 0; // 0ms delay for fetching from live site

/**
* Reads a JSON file from the specified directory.
* @param {string} file - The name of the JSON file.
* @param {string} directory - The directory where the file is located.
* @returns {object} - The parsed JSON object.
*/
function readJsonFile(file, directory) {
const filePath = path.join(directory, file);
if (!fs.existsSync(filePath)) {
console.error(`File not found: ${filePath}`);
return null;
}
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
}

/**
* Fetches markdown content from the specified URL.
*
* @param {string} url - The URL of the markdown file to fetch.
* @param {number} fetchWaitMs - The delay in milliseconds before making the fetch request.
* @param {function} fetchFn - The fetch function to use.
* @returns {Promise<string>} A promise that resolves to the fetched markdown content as a string.
*/
export async function fetchMarkdown(url, fetchWaitMs, fetchFn = fetch) {
try {
console.log(`Fetching markdown ${url}, delay ${fetchWaitMs}ms, timeout 5s`);
await delay(fetchWaitMs); // Wait 500ms to avoid rate limiting, not needed for live.
const signal = timeoutSignal(5000); // 5s timeout
const response = await fetchFn(url, { signal });

if (!response.ok) {
console.warn('Failed to fetch markdown.', response.status, response.statusText);
signal.clear();
return '';
}
const text = await response.text();
signal.clear();
return text;
} catch (e) {
if (e instanceof AbortError) {
console.warn('Fetch timed out after 1s');
} else {
console.warn('Markdown not found at url', e.message);
}
}

return '';
}

/**
* Downloads a markdown file from a given document URL and saves it to a specified folder.
*
* @param {string} documentUrl - The URL of the markdown document to download.
* @param {string} folderPath - Folder where the downloaded markdown file will be saved.
* @param {string} entry - The name of the downloaded markdown file (without the file extension).
* @param {Function} [fetchFn=fetch] - The fetch function to use for making HTTP requests.
* @returns {Promise<boolean>} - true if the download is successful, or false otherwise.
*/
export async function downloadMD(documentUrl, folderPath, entry, fetchFn = fetch) {
if (ALLOW_SKIP && fs.existsSync(path.join(folderPath, `${entry}.md`))) {
console.log(`Skipping ${entry}.md`);
return true;
}

const waitMs = documentUrl.includes('hlx.page') ? PAGE_DELAY : LIVE_DELAY;
const markdown = await fetchMarkdown(`${documentUrl}.md`, waitMs, fetchFn);
const markdownFile = path.join(folderPath, `${entry}.md`);

if (!markdown) {
return false;
}

console.log(`Saving ${markdownFile}`);
saveToFile(markdownFile, markdown);

return true;
}

/**
* Downloads multiple markdown files from the specified URLs and saves them to a specified folder.
*
* @param {Map<string, string>} stagedUrls - A map of entry names to their corresponding URLs.
* @param {string} folderPath - The path of the folder where the markdown files will be saved.
* @param {Function} [fetchFn=fetch] - The fetch function to use for downloading the files.
* @returns {Promise<string[]>} - A list of entries that failed to download.
*/
export async function downloadMDs(stagedUrls, folderPath, fetchFn = fetch) {
const failedEntries = [];
for (const [entry, stageUrl] of stagedUrls) {
const success = await downloadMD(stageUrl, folderPath, entry, fetchFn);

if (!success) {
console.warn(`No markdown found for ${entry}`);
failedEntries.push(entry);
}
}
return failedEntries;
}

/**
* Downloads markdown files from a list of URLs and saves them to a specified folder.
*
* @param {string} folder - The folder path where the markdown files will be saved.
* @param {Array} list - The list of entries to be downloaded.
* @param {Array} locales - The locales to be used for localizing the staged URLs.
* @param {string} siteURL - The base URL of the website.
* @param {string} stagePath - The path to the staging environment.
* @returns {Promise<void>} A promise that resolves when the download process is complete.
*/
export function downloadMarkdown(folder, list, locales, siteURL, stagePath, fetchFn = fetch) {
// eslint-disable-next-line arrow-body-style
const stagedUrls = list.map((entry) => {
const entryPath = entryToPath(entry);
return [entryPath, localizeStageUrl(siteURL, entryPath, stagePath, locales)];
});

fs.mkdirSync(folder, { recursive: true });
return downloadMDs(stagedUrls, folder, fetchFn);
}

/**
* Initializes the download process for markdown files.
*
* @param {string} migrationDir - The directory path for the migration.
* @param {string} outputDir - The directory path for the output markdown files.
* @param {string} siteUrl - The base URL of the website.
* @param {string} stagePath - The path to the staging environment.
* @returns {Promise<void>} A promise that resolves when the download process is complete.
*/
async function init(migrationDir, outputDir, siteUrl, stagePath) {
const list = readJsonFile('output/list.json', migrationDir);
const locales = readJsonFile('locales.json', migrationDir);

if (!list || !locales) {
console.error('Missing list or locales');
process.exit(1);
}

if (!siteUrl || !stagePath) {
console.error('Missing siteUrl or stagePath');
process.exit(1);
}

const markdownFolder = path.join(migrationDir, 'md', outputDir);
const failed = await downloadMarkdown(markdownFolder, list, locales, siteUrl, stagePath);

console.log('Download complete');
if (failed.length) {
console.warn('Failed entries:', failed);
}
}

// example usage: node tools/download-markdown/download-markdown.js 'blog-test' 'uploaded' 'https://main--bacom-blog--adobecom.hlx.page' '/drafts/staged-content'
if (import.meta.url === `file://${process.argv[1]}`) {
const args = process.argv.slice(2);
const [folder, outputDir, siteUrl, stagePath] = args;

await init(folder, outputDir, siteUrl, stagePath);
process.exit(0);
}
24 changes: 23 additions & 1 deletion test/bulk-update/bulk-update.test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { expect } from '@esm-bundle/chai';
import { stub } from 'sinon';
import BulkUpdate, { loadListData, localizeStageUrl } from '../../bulk-update/bulk-update.js';
import BulkUpdate, { loadListData, localizeStageUrl, localizeStagePath } from '../../bulk-update/bulk-update.js';
import BaseReporter from '../../bulk-update/reporter/reporter.js';

const { pathname } = new URL('.', import.meta.url);
Expand Down Expand Up @@ -143,4 +143,26 @@ describe('BulkUpdater', () => {
expect(result).to.equal(expectedUrl);
});
});
describe('localizeStagePath', () => {
it('generates the correct staged path without locales', () => {
const path = '/test/path';
const stagePath = '/stage-content';

const expectedPath = '/stage-content/test/path';
const result = localizeStagePath(path, stagePath);

expect(result).to.equal(expectedPath);
});

it('generates the correct staged path', () => {
const path = '/fr/test/path';
const stagePath = '/staged-content';
const locales = ['fr', 'de'];

const expectedPath = '/fr/staged-content/test/path';
const result = localizeStagePath(path, stagePath, locales);

expect(result).to.equal(expectedPath);
});
});
});
74 changes: 74 additions & 0 deletions test/download-markdown/download-markdown.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import fs from 'fs';
import { expect } from '@esm-bundle/chai';
import { downloadMarkdown, downloadMDs, downloadMD, fetchMarkdown } from '../../download-markdown/download-markdown.js';

const { pathname } = new URL('.', import.meta.url);

const fetchFn = async (url) => {
const markdownContent = '# Sample Markdown Content';
const parsedUrl = new URL(url);
switch (parsedUrl.pathname) {
case '/staged/file.md':
case '/fr/staged/file.md':
return new Promise((resolve) => {
resolve({ ok: true, text: () => markdownContent });
});
default:
return new Promise((resolve) => {
resolve({ ok: false });
});
}
};

describe('download-markdown', () => {
const outputDir = `${pathname}output`;

describe('fetchMarkdown', () => {
it('fetch the content of a markdown file from a specified URL', async () => {
const url = 'https://business.adobe.com/staged/file.md';
const fetchWaitMs = 0;

const markdownContent = await fetchMarkdown(url, fetchWaitMs, fetchFn);
expect(markdownContent).to.be.a('string');
expect(markdownContent).to.not.be.empty;
});
});

describe('downloadMD', () => {
it('download a single markdown file from a specified URL and save it to a folder', async () => {
const entry = 'file';
const url = `https://business.adobe.com/staged/${entry}`;

const success = await downloadMD(url, outputDir, entry, fetchFn);
expect(success).to.be.true;
expect(fs.existsSync(`${outputDir}/file.md`)).to.be.true;
});
});

describe('downloadMDs', () => {
it('download multiple markdown files from a list of staged URLs and save them to a specified folder', async () => {
const stagedUrls = [
['file', 'https://business.adobe.com/staged/file'],
['fr/file', 'https://business.adobe.com/fr/staged/file']];

const failed = await downloadMDs(stagedUrls, outputDir, fetchFn);
expect(failed).to.be.empty;
expect(fs.existsSync(`${outputDir}/file.md`)).to.be.true;
expect(fs.existsSync(`${outputDir}/fr/file.md`)).to.be.true;
});
});

describe('downloadMarkdown', () => {
it('download markdown files from a list of URLs and save them to a specified folder', async () => {
const list = ['/file', '/fr/file'];
const locales = ['fr'];
const siteURL = 'https://business.adobe.com';
const stagePath = '/staged';

const failed = await downloadMarkdown(outputDir, list, locales, siteURL, stagePath, fetchFn);
expect(failed).to.be.empty;
expect(fs.existsSync(`${outputDir}/file.md`)).to.be.true;
expect(fs.existsSync(`${outputDir}/fr/file.md`)).to.be.true;
});
});
});

0 comments on commit 4b81064

Please sign in to comment.