Skip to content

Commit 22e5b3a

Browse files
committed
feat(safeHtml): add safeHtml API to remove attr/tags
1 parent 4419f97 commit 22e5b3a

File tree

6 files changed

+183
-3
lines changed

6 files changed

+183
-3
lines changed

README.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# html5parser
22

3-
A simple and fast html5 parser, the result could be manipulated like
3+
A very tiny and fast html5 AST parser, the result could be manipulated like
44
ECMAScript ESTree, especially about the attributes.
55

66
## Introduction
@@ -76,6 +76,20 @@ export function tokenize(input: string): IToken[];
7676

7777
// Utils API, walk the ast tree
7878
export function walk(ast: INode[], options: IWalkOptions): void;
79+
80+
// get safe html, remove danger tag/attributes with whitelist
81+
export function safeHtml(
82+
html: string,
83+
options?: Partial<SafeHtmlOptions>,
84+
): string;
85+
86+
// you can get default value of the options at ./src/safeHtml.ts
87+
export interface SafeHtmlOptions {
88+
allowedTags: string[];
89+
allowedAttrs: string[];
90+
tagAllowedAttrs: Record<string, string[]>;
91+
allowedUrl: RegExp;
92+
}
7993
```
8094

8195
## Abstract Syntax Tree Spec

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "html5parser",
33
"description": "A fast, accurate AST parser for HTML5",
4-
"version": "1.1.2",
4+
"version": "1.2.0",
55
"author": "acrazing <[email protected]>",
66
"keywords": [
77
"html5",

src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,5 @@ export * from './types';
1313
export * from './parse';
1414
export * from './tokenize';
1515
export * from './utils';
16+
export * from './config';
17+
export * from './safeHtml';

src/safeHtml.spec.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* @since 2020-09-09 23:37:28
3+
* @author acrazing <[email protected]>
4+
*/
5+
6+
import { safeHtml } from './safeHtml';
7+
8+
const htmlInput = `
9+
<div>
10+
<h1>H1</h1>
11+
<h2>H2</h2>
12+
\t<script>Script</script>
13+
\t<style>Style</style>
14+
<p class="class" style="padding: 0">
15+
<span>Span</span>
16+
<table>
17+
<tr><td>TD</td></tr>
18+
</table>
19+
<img src="hello world" id="omit" />
20+
</p>
21+
<a href="/download.html" target="_blank" about="about">Download<span>child</span></a>
22+
<a href="javascript:" target="_blank" about="about">Javascript<span>child</span></a>
23+
</div>
24+
`;
25+
26+
const htmlOutput = `
27+
<div>
28+
<h1>H1</h1>
29+
<h2>H2</h2>
30+
\t
31+
\t
32+
<p style="padding: 0">
33+
<span>Span</span>
34+
<table>
35+
<tr><td>TD</td></tr>
36+
</table>
37+
<img src="hello world">
38+
</p>
39+
<a href="/download.html" target="_blank">Download<span>child</span></a>
40+
<a target="_blank">Javascript<span>child</span></a>
41+
</div>
42+
`;
43+
44+
describe('safeHtml', () => {
45+
it('should stringify safe html as expected', () => {
46+
expect(safeHtml(htmlInput)).toEqual(htmlOutput);
47+
});
48+
});

src/safeHtml.ts

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* @since 2020-09-09 22:53:14
3+
* @author acrazing <[email protected]>
4+
*/
5+
6+
import { selfCloseTags } from './config';
7+
import { parse } from './parse';
8+
import { INode, SyntaxKind } from './types';
9+
10+
export interface SafeHtmlOptions {
11+
allowedTags: string[];
12+
allowedAttrs: string[];
13+
tagAllowedAttrs: Record<string, string[]>;
14+
allowedUrl: RegExp;
15+
}
16+
17+
export const safeHtmlDefaultOptions: SafeHtmlOptions = {
18+
allowedTags: [
19+
'div',
20+
'p',
21+
'h1',
22+
'h2',
23+
'h3',
24+
'h4',
25+
'h5',
26+
'h6',
27+
'ol',
28+
'ul',
29+
'li',
30+
'table',
31+
'thead',
32+
'tbody',
33+
'tr',
34+
'th',
35+
'td',
36+
'span',
37+
'a',
38+
'img',
39+
],
40+
allowedAttrs: ['style'],
41+
tagAllowedAttrs: {
42+
a: ['href', 'target'],
43+
img: ['src'],
44+
},
45+
allowedUrl: /^(?:mailto|tel|https?|ftp|[^:]*[^a-z0-9.+-][^:]*):|^[^:]*$/i,
46+
};
47+
48+
export function safeHtml(
49+
input: string,
50+
options: Partial<SafeHtmlOptions> = {},
51+
): string {
52+
const config: SafeHtmlOptions = {
53+
...safeHtmlDefaultOptions,
54+
...options,
55+
tagAllowedAttrs: {
56+
...safeHtmlDefaultOptions.tagAllowedAttrs,
57+
...options.tagAllowedAttrs,
58+
},
59+
};
60+
const ast = parse(input);
61+
return stringify(ast, config, input);
62+
}
63+
64+
function stringify(
65+
ast: INode[],
66+
config: SafeHtmlOptions,
67+
input: string,
68+
): string {
69+
return ast
70+
.map((node) => {
71+
if (node.type === SyntaxKind.Text) {
72+
return node.value;
73+
}
74+
if (config.allowedTags.indexOf(node.name) === -1) {
75+
return '';
76+
}
77+
if (selfCloseTags[node.name]) {
78+
if (node.body !== void 0) {
79+
throw new Error(
80+
`self closed tag "${node.name}" should not have body`,
81+
);
82+
}
83+
} else {
84+
if (!node.body || !node.close) {
85+
throw new Error(`tag "${node.name}" should have body and close`);
86+
}
87+
}
88+
const attrs = node.attributes
89+
.filter((a) => {
90+
if (
91+
config.allowedAttrs.indexOf(a.name.value) > -1 ||
92+
config.tagAllowedAttrs[node.name]?.indexOf(a.name.value) > -1
93+
) {
94+
if (!a.value) {
95+
return true;
96+
}
97+
if (a.name.value !== 'src' && a.name.value !== 'href') {
98+
return true;
99+
}
100+
if (config.allowedUrl.test(a.value.value)) {
101+
return true;
102+
}
103+
return false;
104+
}
105+
return false;
106+
})
107+
.map((a) => input.substring(a.start, a.end))
108+
.join(' ');
109+
const head = '<' + node.rawName + (attrs ? ' ' + attrs : '') + '>';
110+
if (!node.body) {
111+
return head;
112+
}
113+
return head + stringify(node.body, config, input) + `</${node.rawName}>`;
114+
})
115+
.join('');
116+
}

tsconfig.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"compilerOptions": {
33
"target": "es5",
44
"module": "esnext",
5-
"lib": ["esnext", "dom"],
5+
"lib": ["es5", "dom"],
66
"allowJs": false,
77
"jsx": "react",
88
"declaration": false,

0 commit comments

Comments
 (0)