This repository has been archived by the owner on Feb 13, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
292 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
import path from 'path'; | ||
import fs from 'fs'; | ||
import { fetch, timeoutSignal, AbortError } from '@adobe/fetch'; | ||
import { saveToFile, entryToPath } from '../bulk-update/document-manager/document-manager.js'; | ||
import { localizeStageUrl } from '../bulk-update/bulk-update.js'; | ||
|
||
const delay = (milliseconds) => new Promise((resolve) => { setTimeout(resolve, milliseconds); }); | ||
|
||
const ALLOW_SKIP = true; // Allow skipping files that already exist | ||
const PAGE_DELAY = 500; // 500ms delay for fetching from hlx.page | ||
const LIVE_DELAY = 0; // 0ms delay for fetching from live site | ||
|
||
/** | ||
* Reads a JSON file from the specified directory. | ||
* @param {string} file - The name of the JSON file. | ||
* @param {string} directory - The directory where the file is located. | ||
* @returns {object} - The parsed JSON object. | ||
*/ | ||
function readJsonFile(file, directory) { | ||
const filePath = path.join(directory, file); | ||
if (!fs.existsSync(filePath)) { | ||
console.error(`File not found: ${filePath}`); | ||
return null; | ||
} | ||
return JSON.parse(fs.readFileSync(filePath, 'utf8')); | ||
} | ||
|
||
/** | ||
* Fetches markdown content from the specified URL. | ||
* | ||
* @param {string} url - The URL of the markdown file to fetch. | ||
* @param {number} fetchWaitMs - The delay in milliseconds before making the fetch request. | ||
* @param {function} fetchFn - The fetch function to use. | ||
* @returns {Promise<string>} A promise that resolves to the fetched markdown content as a string. | ||
*/ | ||
export async function fetchMarkdown(url, fetchWaitMs, fetchFn = fetch) { | ||
try { | ||
console.log(`Fetching markdown ${url}, delay ${fetchWaitMs}ms, timeout 5s`); | ||
await delay(fetchWaitMs); // Wait 500ms to avoid rate limiting, not needed for live. | ||
const signal = timeoutSignal(5000); // 5s timeout | ||
const response = await fetchFn(url, { signal }); | ||
|
||
if (!response.ok) { | ||
console.warn('Failed to fetch markdown.', response.status, response.statusText); | ||
signal.clear(); | ||
return ''; | ||
} | ||
const text = await response.text(); | ||
signal.clear(); | ||
return text; | ||
} catch (e) { | ||
if (e instanceof AbortError) { | ||
console.warn('Fetch timed out after 1s'); | ||
} else { | ||
console.warn('Markdown not found at url', e.message); | ||
} | ||
} | ||
|
||
return ''; | ||
} | ||
|
||
/** | ||
* Downloads a markdown file from a given document URL and saves it to a specified folder. | ||
* | ||
* @param {string} documentUrl - The URL of the markdown document to download. | ||
* @param {string} folderPath - Folder where the downloaded markdown file will be saved. | ||
* @param {string} entry - The name of the downloaded markdown file (without the file extension). | ||
* @param {Function} [fetchFn=fetch] - The fetch function to use for making HTTP requests. | ||
* @returns {Promise<boolean>} - true if the download is successful, or false otherwise. | ||
*/ | ||
export async function downloadMD(documentUrl, folderPath, entry, fetchFn = fetch) { | ||
if (ALLOW_SKIP && fs.existsSync(path.join(folderPath, `${entry}.md`))) { | ||
console.log(`Skipping ${entry}.md`); | ||
return true; | ||
} | ||
|
||
const waitMs = documentUrl.includes('hlx.page') ? PAGE_DELAY : LIVE_DELAY; | ||
const markdown = await fetchMarkdown(`${documentUrl}.md`, waitMs, fetchFn); | ||
const markdownFile = path.join(folderPath, `${entry}.md`); | ||
|
||
if (!markdown) { | ||
return false; | ||
} | ||
|
||
console.log(`Saving ${markdownFile}`); | ||
saveToFile(markdownFile, markdown); | ||
|
||
return true; | ||
} | ||
|
||
/** | ||
* Downloads multiple markdown files from the specified URLs and saves them to a specified folder. | ||
* | ||
* @param {Map<string, string>} stagedUrls - A map of entry names to their corresponding URLs. | ||
* @param {string} folderPath - The path of the folder where the markdown files will be saved. | ||
* @param {Function} [fetchFn=fetch] - The fetch function to use for downloading the files. | ||
* @returns {Promise<string[]>} - A list of entries that failed to download. | ||
*/ | ||
export async function downloadMDs(stagedUrls, folderPath, fetchFn = fetch) { | ||
const failedEntries = []; | ||
for (const [entry, stageUrl] of stagedUrls) { | ||
const success = await downloadMD(stageUrl, folderPath, entry, fetchFn); | ||
|
||
if (!success) { | ||
console.warn(`No markdown found for ${entry}`); | ||
failedEntries.push(entry); | ||
} | ||
} | ||
return failedEntries; | ||
} | ||
|
||
/** | ||
* Downloads markdown files from a list of URLs and saves them to a specified folder. | ||
* | ||
* @param {string} folder - The folder path where the markdown files will be saved. | ||
* @param {Array} list - The list of entries to be downloaded. | ||
* @param {Array} locales - The locales to be used for localizing the staged URLs. | ||
* @param {string} siteURL - The base URL of the website. | ||
* @param {string} stagePath - The path to the staging environment. | ||
* @returns {Promise<void>} A promise that resolves when the download process is complete. | ||
*/ | ||
export function downloadMarkdown(folder, list, locales, siteURL, stagePath, fetchFn = fetch) { | ||
// eslint-disable-next-line arrow-body-style | ||
const stagedUrls = list.map((entry) => { | ||
const entryPath = entryToPath(entry); | ||
return [entryPath, localizeStageUrl(siteURL, entryPath, stagePath, locales)]; | ||
}); | ||
|
||
fs.mkdirSync(folder, { recursive: true }); | ||
return downloadMDs(stagedUrls, folder, fetchFn); | ||
} | ||
|
||
/** | ||
* Initializes the download process for markdown files. | ||
* | ||
* @param {string} migrationDir - The directory path for the migration. | ||
* @param {string} outputDir - The directory path for the output markdown files. | ||
* @param {string} siteUrl - The base URL of the website. | ||
* @param {string} stagePath - The path to the staging environment. | ||
* @returns {Promise<void>} A promise that resolves when the download process is complete. | ||
*/ | ||
async function init(migrationDir, outputDir, siteUrl, stagePath) { | ||
const list = readJsonFile('output/list.json', migrationDir); | ||
const locales = readJsonFile('locales.json', migrationDir); | ||
|
||
if (!list || !locales) { | ||
console.error('Missing list or locales'); | ||
process.exit(1); | ||
} | ||
|
||
if (!siteUrl || !stagePath) { | ||
console.error('Missing siteUrl or stagePath'); | ||
process.exit(1); | ||
} | ||
|
||
const markdownFolder = path.join(migrationDir, 'md', outputDir); | ||
const failed = await downloadMarkdown(markdownFolder, list, locales, siteUrl, stagePath); | ||
|
||
console.log('Download complete'); | ||
if (failed.length) { | ||
console.warn('Failed entries:', failed); | ||
} | ||
} | ||
|
||
// example usage: node tools/download-markdown/download-markdown.js 'blog-test' 'uploaded' 'https://main--bacom-blog--adobecom.hlx.page' '/drafts/staged-content' | ||
if (import.meta.url === `file://${process.argv[1]}`) { | ||
const args = process.argv.slice(2); | ||
const [folder, outputDir, siteUrl, stagePath] = args; | ||
|
||
await init(folder, outputDir, siteUrl, stagePath); | ||
process.exit(0); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import fs from 'fs'; | ||
import { expect } from '@esm-bundle/chai'; | ||
import { downloadMarkdown, downloadMDs, downloadMD, fetchMarkdown } from '../../download-markdown/download-markdown.js'; | ||
|
||
const { pathname } = new URL('.', import.meta.url); | ||
|
||
const fetchFn = async (url) => { | ||
const markdownContent = '# Sample Markdown Content'; | ||
const parsedUrl = new URL(url); | ||
switch (parsedUrl.pathname) { | ||
case '/staged/file.md': | ||
case '/fr/staged/file.md': | ||
return new Promise((resolve) => { | ||
resolve({ ok: true, text: () => markdownContent }); | ||
}); | ||
default: | ||
return new Promise((resolve) => { | ||
resolve({ ok: false }); | ||
}); | ||
} | ||
}; | ||
|
||
describe('download-markdown', () => { | ||
const outputDir = `${pathname}output`; | ||
|
||
describe('fetchMarkdown', () => { | ||
it('fetch the content of a markdown file from a specified URL', async () => { | ||
const url = 'https://business.adobe.com/staged/file.md'; | ||
const fetchWaitMs = 0; | ||
|
||
const markdownContent = await fetchMarkdown(url, fetchWaitMs, fetchFn); | ||
expect(markdownContent).to.be.a('string'); | ||
expect(markdownContent).to.not.be.empty; | ||
}); | ||
}); | ||
|
||
describe('downloadMD', () => { | ||
it('download a single markdown file from a specified URL and save it to a folder', async () => { | ||
const entry = 'file'; | ||
const url = `https://business.adobe.com/staged/${entry}`; | ||
|
||
const success = await downloadMD(url, outputDir, entry, fetchFn); | ||
expect(success).to.be.true; | ||
expect(fs.existsSync(`${outputDir}/file.md`)).to.be.true; | ||
}); | ||
}); | ||
|
||
describe('downloadMDs', () => { | ||
it('download multiple markdown files from a list of staged URLs and save them to a specified folder', async () => { | ||
const stagedUrls = [ | ||
['file', 'https://business.adobe.com/staged/file'], | ||
['fr/file', 'https://business.adobe.com/fr/staged/file']]; | ||
|
||
const failed = await downloadMDs(stagedUrls, outputDir, fetchFn); | ||
expect(failed).to.be.empty; | ||
expect(fs.existsSync(`${outputDir}/file.md`)).to.be.true; | ||
expect(fs.existsSync(`${outputDir}/fr/file.md`)).to.be.true; | ||
}); | ||
}); | ||
|
||
describe('downloadMarkdown', () => { | ||
it('download markdown files from a list of URLs and save them to a specified folder', async () => { | ||
const list = ['/file', '/fr/file']; | ||
const locales = ['fr']; | ||
const siteURL = 'https://business.adobe.com'; | ||
const stagePath = '/staged'; | ||
|
||
const failed = await downloadMarkdown(outputDir, list, locales, siteURL, stagePath, fetchFn); | ||
expect(failed).to.be.empty; | ||
expect(fs.existsSync(`${outputDir}/file.md`)).to.be.true; | ||
expect(fs.existsSync(`${outputDir}/fr/file.md`)).to.be.true; | ||
}); | ||
}); | ||
}); |