Skip to content

Commit 68e05f4

Browse files
committed
feat(check-html-links): handle as internal if starts with a provided absolute base url
1 parent 660f64c commit 68e05f4

File tree

15 files changed

+172
-41
lines changed

15 files changed

+172
-41
lines changed

.changeset/fair-falcons-shave.md

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
'@rocket/cli': patch
3+
---
4+
5+
`rocket lint` can now validate external links.
6+
7+
Example:
8+
9+
```
10+
rocket lint --validate-externals
11+
```

.changeset/great-bobcats-destroy.md

+12-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
---
2-
'check-html-links': minor
2+
'check-html-links': patch
33
---
44

5-
add external links validation using the flag `--validate-externals`.
5+
Add external links validation via the flag `--validate-externals`.
6+
7+
You can/should provide an optional `--absolute-base-url` to handle urls starting with it as internal.
8+
9+
```bash
10+
# check external urls
11+
npx check-html-links _site --validate-externals
12+
13+
# check external urls but treat links like https://rocket.modern-web.dev/about/ as internal
14+
npx check-html-links _site --validate-externals --absolute-base-url https://rocket.modern-web.dev
15+
```

packages/check-html-links/src/CheckHtmlLinksCli.js

+36-12
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import path from 'path';
66
import chalk from 'chalk';
77

88
import commandLineArgs from 'command-line-args';
9-
import { validateFiles } from './validateFolder.js';
9+
import { prepareFiles, validateFiles } from './validateFolder.js';
1010
import { formatErrors } from './formatErrors.js';
1111
import { listFiles } from './listFiles.js';
1212

@@ -20,6 +20,7 @@ export class CheckHtmlLinksCli {
2020
{ name: 'root-dir', type: String, defaultOption: true },
2121
{ name: 'continue-on-error', type: Boolean },
2222
{ name: 'validate-externals', type: Boolean },
23+
{ name: 'absolute-base-url', type: String },
2324
];
2425
const options = commandLineArgs(mainDefinitions, {
2526
stopAtFirstUnknown: true,
@@ -31,6 +32,7 @@ export class CheckHtmlLinksCli {
3132
rootDir: options['root-dir'],
3233
ignoreLinkPatterns: options['ignore-link-pattern'],
3334
validateExternals: options['validate-externals'],
35+
absoluteBaseUrl: options['absolute-base-url'],
3436
};
3537
}
3638

@@ -45,26 +47,48 @@ export class CheckHtmlLinksCli {
4547
}
4648

4749
async run() {
48-
const { ignoreLinkPatterns, rootDir: userRootDir, validateExternals } = this.options;
50+
const {
51+
ignoreLinkPatterns,
52+
rootDir: userRootDir,
53+
validateExternals,
54+
absoluteBaseUrl,
55+
} = this.options;
4956
const rootDir = userRootDir ? path.resolve(userRootDir) : process.cwd();
5057
const performanceStart = process.hrtime();
5158

52-
console.log('👀 Checking if all internal links work...');
5359
const files = await listFiles('**/*.html', rootDir);
5460

61+
console.log('Check HTML Links');
62+
5563
const filesOutput =
5664
files.length == 0
57-
? '🧐 No files to check. Did you select the correct folder?'
58-
: `🔥 Found a total of ${chalk.green.bold(files.length)} files to check!`;
65+
? ' 🧐 No files to check. Did you select the correct folder?'
66+
: ` 📖 Found ${chalk.green.bold(files.length)} files containing`;
5967
console.log(filesOutput);
6068

61-
const { errors, numberLinks } = await validateFiles(files, rootDir, {
62-
ignoreLinkPatterns,
69+
const { numberLinks, checkLocalFiles, checkExternalLinks } = await prepareFiles(
70+
files,
71+
rootDir,
72+
{
73+
ignoreLinkPatterns,
74+
validateExternals,
75+
absoluteBaseUrl,
76+
},
77+
);
78+
79+
console.log(` 🔗 ${chalk.green.bold(numberLinks)} internal links`);
80+
if (validateExternals) {
81+
console.log(` 🌐 ${chalk.green.bold(checkExternalLinks.length)} external links`);
82+
}
83+
84+
console.log(' 👀 Checking...');
85+
86+
const { errors } = await validateFiles({
87+
checkLocalFiles,
6388
validateExternals,
89+
checkExternalLinks,
6490
});
6591

66-
console.log(`🔗 Found a total of ${chalk.green.bold(numberLinks)} links to validate!\n`);
67-
6892
const performance = process.hrtime(performanceStart);
6993
/** @type {string[]} */
7094
let output = [];
@@ -75,15 +99,15 @@ export class CheckHtmlLinksCli {
7599
referenceCount += error.usage.length;
76100
}
77101
output = [
78-
`❌ Found ${chalk.red.bold(
102+
` ❌ Found ${chalk.red.bold(
79103
errors.length.toString(),
80104
)} missing reference targets (used by ${referenceCount} links) while checking ${
81105
files.length
82106
} files:`,
83107
...formatErrors(errors)
84108
.split('\n')
85109
.map(line => ` ${line}`),
86-
`Checking links duration: ${performance[0]}s ${performance[1] / 1000000}ms`,
110+
` 🕑 Checking links duration: ${performance[0]}s ${performance[1] / 1000000}ms`,
87111
];
88112
message = output.join('\n');
89113
if (this.options.printOnError === true) {
@@ -94,7 +118,7 @@ export class CheckHtmlLinksCli {
94118
}
95119
} else {
96120
console.log(
97-
`✅ All internal links are valid. (executed in ${performance[0]}s ${
121+
` ✅ All tested links are valid. (executed in ${performance[0]}s ${
98122
performance[1] / 1000000
99123
}ms)`,
100124
);

packages/check-html-links/src/checkLinks.js

+8-4
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,14 @@ const memorizeCheckup = (url, result) => {
2222
* @param {string} method
2323
* @returns
2424
*/
25-
const fetchWrap = async (url, method = 'GET') =>
26-
fetch(url, { method })
27-
.then(response => response.ok)
28-
.catch(() => false);
25+
const fetchWrap = async (url, method = 'GET') => {
26+
return Promise.race([
27+
fetch(url, { method })
28+
.then(response => response.ok)
29+
.catch(() => false),
30+
new Promise(resolve => setTimeout(resolve, 10000, false)),
31+
]);
32+
};
2933

3034
/**
3135
*

packages/check-html-links/src/formatErrors.js

+4-4
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ export function formatErrors(errors, relativeFrom = process.cwd()) {
1515
const filePath = path.relative(relativeFrom, error.filePath);
1616
if (error.onlyAnchorMissing === true) {
1717
output.push(
18-
`${number}. missing ${chalk.red.bold(
18+
` ${number}. missing ${chalk.red.bold(
1919
`id="${error.usage[0].anchor}"`,
2020
)} in ${chalk.cyanBright(filePath)}`,
2121
);
@@ -24,7 +24,7 @@ export function formatErrors(errors, relativeFrom = process.cwd()) {
2424
const title =
2525
firstAttribute === 'src' || firstAttribute === 'srcset' ? 'file' : 'reference target';
2626

27-
output.push(`${number}. missing ${title} ${chalk.red.bold(filePath)}`);
27+
output.push(` ${number}. missing ${title} ${chalk.red.bold(filePath)}`);
2828
}
2929
const usageLength = error.usage.length;
3030

@@ -34,11 +34,11 @@ export function formatErrors(errors, relativeFrom = process.cwd()) {
3434
const clickAbleLink = chalk.cyanBright(`${usagePath}:${usage.line + 1}:${usage.character}`);
3535
const attributeStart = chalk.gray(`${usage.attribute}="`);
3636
const attributeEnd = chalk.gray('"');
37-
output.push(` from ${clickAbleLink} via ${attributeStart}${usage.value}${attributeEnd}`);
37+
output.push(` from ${clickAbleLink} via ${attributeStart}${usage.value}${attributeEnd}`);
3838
}
3939
if (usageLength > 3) {
4040
const more = chalk.red((usageLength - 3).toString());
41-
output.push(` ... ${more} more references to this target`);
41+
output.push(` ... ${more} more references to this target`);
4242
}
4343
output.push('');
4444
}

packages/check-html-links/src/validateFolder.js

+33-9
Original file line numberDiff line numberDiff line change
@@ -223,11 +223,16 @@ function isNonHttpSchema(url) {
223223
* @param {object} options
224224
* @param {string} options.htmlFilePath
225225
* @param {string} options.rootDir
226+
* @param {string} options.absoluteBaseUrl
226227
* @param {function(string): boolean} options.ignoreUsage
227228
*/
228-
async function resolveLinks(links, { htmlFilePath, rootDir, ignoreUsage }) {
229+
async function resolveLinks(links, { htmlFilePath, rootDir, ignoreUsage, absoluteBaseUrl }) {
229230
for (const hrefObj of links) {
230-
const { value, anchor } = getValueAndAnchor(hrefObj.value);
231+
const { value: rawValue, anchor } = getValueAndAnchor(hrefObj.value);
232+
233+
const value = rawValue.startsWith(absoluteBaseUrl)
234+
? rawValue.substring(absoluteBaseUrl.length)
235+
: rawValue;
231236

232237
const usageObj = {
233238
attribute: hrefObj.attribute,
@@ -252,8 +257,6 @@ async function resolveLinks(links, { htmlFilePath, rootDir, ignoreUsage }) {
252257
} else if (valueFile === '' && anchor !== '') {
253258
addLocalFile(htmlFilePath, anchor, usageObj);
254259
} else if (value.startsWith('//') || value.startsWith('http')) {
255-
// TODO: handle external urls
256-
// external url - we do not handle that (yet)
257260
addExternalLink(htmlFilePath, usageObj);
258261
} else if (value.startsWith('/')) {
259262
const filePath = path.join(rootDir, valueFile);
@@ -328,7 +331,7 @@ async function validateExternalLinks(checkExternalLinks) {
328331
* @param {string} rootDir
329332
* @param {Options} opts?
330333
*/
331-
export async function validateFiles(files, rootDir, opts) {
334+
export async function prepareFiles(files, rootDir, opts) {
332335
await parserReferences.prepareWasm(saxWasmBuffer);
333336
await parserIds.prepareWasm(saxWasmBuffer);
334337

@@ -350,14 +353,27 @@ export async function validateFiles(files, rootDir, opts) {
350353
for (const htmlFilePath of files) {
351354
const { links } = await extractReferences(htmlFilePath);
352355
numberLinks += links.length;
353-
await resolveLinks(links, { htmlFilePath, rootDir, ignoreUsage });
356+
await resolveLinks(links, {
357+
htmlFilePath,
358+
rootDir,
359+
ignoreUsage,
360+
absoluteBaseUrl: opts?.absoluteBaseUrl,
361+
});
354362
}
363+
return { checkLocalFiles, checkExternalLinks, numberLinks };
364+
}
365+
366+
/**
367+
* @param {*} param0
368+
* @returns
369+
*/
370+
export async function validateFiles({ checkLocalFiles, validateExternals, checkExternalLinks }) {
355371
await validateLocalFiles(checkLocalFiles);
356-
if (opts?.validateExternals) {
372+
if (validateExternals) {
357373
await validateExternalLinks(checkExternalLinks);
358374
}
359375

360-
return { errors: errors, numberLinks: numberLinks };
376+
return { errors };
361377
}
362378

363379
/**
@@ -367,6 +383,14 @@ export async function validateFiles(files, rootDir, opts) {
367383
export async function validateFolder(inRootDir, opts) {
368384
const rootDir = path.resolve(inRootDir);
369385
const files = await listFiles('**/*.html', rootDir);
370-
const { errors } = await validateFiles(files, rootDir, opts);
386+
387+
const { checkLocalFiles, checkExternalLinks } = await prepareFiles(files, rootDir, opts);
388+
389+
const { errors } = await validateFiles({
390+
checkLocalFiles,
391+
validateExternals: opts?.validateExternals,
392+
checkExternalLinks,
393+
});
394+
371395
return errors;
372396
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
About
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<a href="about.html">About</a>
2+
<a href="http://localhost/about.html">About Absolute</a>

packages/check-html-links/test-node/formatErrors.test.js

+10-10
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,19 @@ describe('formatErrors', () => {
2020
const result = await executeAndFormat('fixtures/test-case');
2121
expect(result.trim().split('\n')).to.deep.equal([
2222
'1. missing id="my-teams" in fixtures/test-case/price/index.html',
23-
' from fixtures/test-case/history/index.html:1:9 via href="/price/#my-teams"',
23+
' from fixtures/test-case/history/index.html:1:9 via href="/price/#my-teams"',
2424
'',
25-
'2. missing file fixtures/test-case/about/images/team.png',
26-
' from fixtures/test-case/about/index.html:3:10 via src="./images/team.png"',
25+
' 2. missing file fixtures/test-case/about/images/team.png',
26+
' from fixtures/test-case/about/index.html:3:10 via src="./images/team.png"',
2727
'',
28-
'3. missing reference target fixtures/test-case/aboot',
29-
' from fixtures/test-case/about/index.html:6:11 via href="/aboot"',
30-
' from fixtures/test-case/history/index.html:4:11 via href="/aboot"',
31-
' from fixtures/test-case/index.html:4:11 via href="/aboot"',
32-
' ... 2 more references to this target',
28+
' 3. missing reference target fixtures/test-case/aboot',
29+
' from fixtures/test-case/about/index.html:6:11 via href="/aboot"',
30+
' from fixtures/test-case/history/index.html:4:11 via href="/aboot"',
31+
' from fixtures/test-case/index.html:4:11 via href="/aboot"',
32+
' ... 2 more references to this target',
3333
'',
34-
'4. missing reference target fixtures/test-case/prce',
35-
' from fixtures/test-case/index.html:1:9 via href="./prce"',
34+
' 4. missing reference target fixtures/test-case/prce',
35+
' from fixtures/test-case/index.html:1:9 via href="./prce"',
3636
]);
3737
});
3838
});

packages/check-html-links/test-node/validateFolder.test.js

+30
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,36 @@ describe('validateFolder', () => {
104104
]);
105105
});
106106

107+
it('validates links with own absolute base url as internal', async () => {
108+
const { errors, cleanup } = await execute('fixtures/internal-own-absolute-base-path', {
109+
validateExternals: true,
110+
absoluteBaseUrl: 'http://localhost',
111+
});
112+
expect(cleanup(errors)).to.deep.equal([]);
113+
});
114+
115+
it('validates all full urls if there is no absoluteBaseUrl provided', async () => {
116+
const { errors, cleanup } = await execute('fixtures/internal-own-absolute-base-path', {
117+
validateExternals: true,
118+
});
119+
expect(cleanup(errors)).to.deep.equal([
120+
{
121+
filePath: 'fixtures/internal-own-absolute-base-path/index.html',
122+
onlyAnchorMissing: false,
123+
usage: [
124+
{
125+
anchor: '',
126+
attribute: 'href',
127+
character: 9,
128+
file: 'fixtures/internal-own-absolute-base-path/index.html',
129+
line: 1,
130+
value: 'http://localhost/about.html',
131+
},
132+
],
133+
},
134+
]);
135+
});
136+
107137
it('groups multiple usage of the same missing file', async () => {
108138
const { errors, cleanup } = await execute('fixtures/internal-links-to-same-file');
109139
expect(cleanup(errors)).to.deep.equal([

packages/check-html-links/types/main.d.ts

+2
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,12 @@ export interface Error {
2929
export interface Options {
3030
ignoreLinkPatterns: string[] | null;
3131
validateExternals: boolean;
32+
absoluteBaseUrl: string;
3233
}
3334

3435
export interface CheckHtmlLinksCliOptions extends Options {
3536
printOnError: boolean;
3637
rootDir: string;
3738
continueOnError: boolean;
39+
absoluteBaseUrl: string;
3840
}

packages/cli/src/RocketLint.js

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ export class RocketLint {
2424
.command('lint')
2525
.option('-i, --input-dir <path>', 'path to where to search for source files')
2626
.option('-b, --build-html', 'do a quick html only build and then check links')
27+
.option('-e, --validate-externals', 'validate external links')
2728
.action(async options => {
2829
const { cliOptions, ...lintOptions } = options;
2930
cli.setOptions({
@@ -77,6 +78,7 @@ export class RocketLint {
7778
rootDir: folderToCheck,
7879
printOnError: true,
7980
continueOnError: false,
81+
absoluteBaseUrl: this.cli.options.absoluteBaseUrl,
8082
...userCheckHtmlLinksOptions,
8183
});
8284

site/pages/10--docs/30--guides/50--go-live.rocket.md

+12
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,18 @@ rocket lint --build-html
6868
6969
Note: We can do this as 2-4 generally does not impact links/references (as long as the optimizations scripts do not have related bugs)
7070
71+
### Check external links
72+
73+
`rocket lint` can also check external links.
74+
This will do a HEAD/GET request to every external link which means if all is green that the website only links to still active websites.
75+
It however also means that it will do a lot of network requests which will take a while.
76+
77+
Use with care.
78+
79+
```bash
80+
rocket lint --validate-externals
81+
```
82+
7183
## Add a Not Found Page
7284
7385
When a user enters a URL that does not exist, a "famous" 404 Page Not Found error occurs.

0 commit comments

Comments
 (0)