Skip to content

Commit f2eb530

Browse files
authored
Sanitizing html after markdown and tool/inject-html directive rendering (#2833)
* Sanitizing html after markdown and tool/inject-html directive rendering * Making sanitizing hidden and off by deafult * dart run ginder build * Tests of HTML sanitizing * Fix number of classes * Moved html sanitizer tests * Removed the script removing hack
1 parent b4fa868 commit f2eb530

12 files changed

+367
-15
lines changed

Diff for: CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 4.1.0-dev
2+
* Experimental feature: HTML output from markdown rendering, `{@tool}` and
3+
`{@inject-html}` is sanitized when hidden option `--sanitize-html` is passed.
4+
15
## 4.0.0
26
* BREAKING CHANGE: Refactors to support NNBD and adapt to new analyzer
37
changes are technically semver breaking. If you make extensive use of

Diff for: dartdoc_options.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
dartdoc:
22
linkToSource:
33
root: '.'
4-
uriTemplate: 'https://github.com/dart-lang/dartdoc/blob/v4.0.0/%f%#L%l%'
4+
uriTemplate: 'https://github.com/dart-lang/dartdoc/blob/v4.1.0-dev/%f%#L%l%'

Diff for: lib/src/dartdoc_options.dart

+6
Original file line numberDiff line numberDiff line change
@@ -1311,6 +1311,8 @@ class DartdocOptionContext extends DartdocOptionContextBase
13111311

13121312
bool get injectHtml => optionSet['injectHtml'].valueAt(context);
13131313

1314+
bool get sanitizeHtml => optionSet['sanitizeHtml'].valueAt(context);
1315+
13141316
bool get excludeFooterVersion =>
13151317
optionSet['excludeFooterVersion'].valueAt(context);
13161318

@@ -1460,6 +1462,10 @@ Future<List<DartdocOption>> createDartdocOptions(
14601462
DartdocOptionArgOnly<bool>('injectHtml', false, resourceProvider,
14611463
help: 'Allow the use of the {@inject-html} directive to inject raw '
14621464
'HTML into dartdoc output.'),
1465+
DartdocOptionArgOnly<bool>('sanitizeHtml', false, resourceProvider,
1466+
hide: true,
1467+
help: 'Sanitize HTML generated from markdown, {@tool} and '
1468+
'{@inject-html} directives.'),
14631469
DartdocOptionArgOnly<String>(
14641470
'input', resourceProvider.pathContext.current, resourceProvider,
14651471
optionIs: OptionKind.dir,

Diff for: lib/src/generator/templates.runtime_renderers.dart

+1
Original file line numberDiff line numberDiff line change
@@ -15260,6 +15260,7 @@ const _invisibleGetters = {
1526015260
'includeExternal',
1526115261
'includeSource',
1526215262
'injectHtml',
15263+
'sanitizeHtml',
1526315264
'excludeFooterVersion',
1526415265
'tools',
1526515266
'inputDir',

Diff for: lib/src/model/documentation.dart

+3-2
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ class Documentation {
5151
}
5252
_hasExtendedDocs = parseResult.hasExtendedDocs;
5353

54-
var renderResult =
55-
_renderer.render(parseResult.nodes, processFullDocs: processFullDocs);
54+
var renderResult = _renderer.render(parseResult.nodes,
55+
processFullDocs: processFullDocs,
56+
sanitizeHtml: _element.config.sanitizeHtml);
5657

5758
if (processFullDocs) {
5859
_asHtml = renderResult.asHtml;

Diff for: lib/src/render/documentation_renderer.dart

+267-9
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,17 @@
22
// for details. All rights reserved. Use of this source code is governed by a
33
// BSD-style license that can be found in the LICENSE file.
44

5-
import 'package:html/parser.dart' show parse;
5+
import 'package:html/dom.dart' as dom;
6+
import 'package:html/parser.dart' show parseFragment;
7+
68
import 'package:markdown/markdown.dart' as md;
79
import 'package:meta/meta.dart';
810

911
abstract class DocumentationRenderer {
1012
DocumentationRenderResult render(
1113
List<md.Node> nodes, {
1214
@required bool processFullDocs,
15+
@required bool sanitizeHtml,
1316
});
1417
}
1518

@@ -20,16 +23,16 @@ class DocumentationRendererHtml implements DocumentationRenderer {
2023
DocumentationRenderResult render(
2124
List<md.Node> nodes, {
2225
@required bool processFullDocs,
26+
@required bool sanitizeHtml,
2327
}) {
2428
if (nodes.isEmpty) {
2529
return DocumentationRenderResult.empty;
2630
}
31+
2732
var rawHtml = md.HtmlRenderer().render(nodes);
28-
var asHtmlDocument = parse(rawHtml);
29-
for (var s in asHtmlDocument.querySelectorAll('script')) {
30-
s.remove();
31-
}
32-
for (var pre in asHtmlDocument.querySelectorAll('pre')) {
33+
var asHtmlFragment = parseFragment(rawHtml);
34+
35+
for (var pre in asHtmlFragment.querySelectorAll('pre')) {
3336
if (pre.children.length > 1 && pre.children.first.localName != 'code') {
3437
continue;
3538
}
@@ -44,16 +47,21 @@ class DocumentationRendererHtml implements DocumentationRenderer {
4447
// Assume the user intended Dart if there are no other classes present.
4548
if (!specifiesLanguage) pre.classes.add('language-dart');
4649
}
50+
51+
if (sanitizeHtml) {
52+
_sanitize(asHtmlFragment);
53+
}
54+
4755
var asHtml = '';
4856

4957
if (processFullDocs) {
5058
// `trim` fixes an issue with line ending differences between Mac and
5159
// Windows.
52-
asHtml = asHtmlDocument.body.innerHtml?.trim();
60+
asHtml = asHtmlFragment.outerHtml.trim();
5361
}
54-
var asOneLiner = asHtmlDocument.body.children.isEmpty
62+
var asOneLiner = asHtmlFragment.children.isEmpty
5563
? ''
56-
: asHtmlDocument.body.children.first.innerHtml;
64+
: asHtmlFragment.children.first.innerHtml;
5765

5866
return DocumentationRenderResult(asHtml: asHtml, asOneLiner: asOneLiner);
5967
}
@@ -68,3 +76,253 @@ class DocumentationRenderResult {
6876
const DocumentationRenderResult(
6977
{@required this.asHtml, @required this.asOneLiner});
7078
}
79+
80+
bool _allowClassName(String className) =>
81+
className == 'deprecated' || className.startsWith('language-');
82+
83+
Iterable<String> _addLinkRel(String uri) {
84+
final u = Uri.tryParse(uri);
85+
if (u.host.isNotEmpty) {
86+
// TODO(jonasfj): Consider allowing non-ugc links for trusted sites.
87+
return ['ugc'];
88+
}
89+
return [];
90+
}
91+
92+
void _sanitize(dom.Node node) {
93+
if (node is dom.Element) {
94+
final tagName = node.localName.toUpperCase();
95+
if (!_allowedElements.contains(tagName)) {
96+
node.remove();
97+
return;
98+
}
99+
node.attributes.removeWhere((k, v) {
100+
final attrName = k.toString();
101+
if (attrName == 'class') {
102+
node.classes.removeWhere((cn) => !_allowClassName(cn));
103+
return node.classes.isEmpty;
104+
}
105+
return !_isAttributeAllowed(tagName, attrName, v);
106+
});
107+
if (tagName == 'A') {
108+
final href = node.attributes['href'];
109+
if (href != null) {
110+
final rels = _addLinkRel(href);
111+
if (rels != null && rels.isNotEmpty) {
112+
node.attributes['rel'] = rels.join(' ');
113+
}
114+
}
115+
}
116+
}
117+
if (node.hasChildNodes()) {
118+
// doing it in reverse order, because we could otherwise skip one, when a
119+
// node is removed...
120+
for (var i = node.nodes.length - 1; i >= 0; i--) {
121+
_sanitize(node.nodes[i]);
122+
}
123+
}
124+
}
125+
126+
bool _isAttributeAllowed(String tagName, String attrName, String value) {
127+
if (_alwaysAllowedAttributes.contains(attrName)) return true;
128+
129+
// Special validators for special attributes on special tags (href/src/cite)
130+
final attributeValidators = _elementAttributeValidators[tagName];
131+
if (attributeValidators == null) {
132+
return false;
133+
}
134+
135+
final validator = attributeValidators[attrName];
136+
if (validator == null) {
137+
return false;
138+
}
139+
140+
return validator(value);
141+
}
142+
143+
// Inspired by the set of HTML tags allowed in GFM.
144+
final _allowedElements = <String>{
145+
'H1',
146+
'H2',
147+
'H3',
148+
'H4',
149+
'H5',
150+
'H6',
151+
'H7',
152+
'H8',
153+
'BR',
154+
'B',
155+
'I',
156+
'STRONG',
157+
'EM',
158+
'A',
159+
'PRE',
160+
'CODE',
161+
'IMG',
162+
'TT',
163+
'DIV',
164+
'INS',
165+
'DEL',
166+
'SUP',
167+
'SUB',
168+
'P',
169+
'OL',
170+
'UL',
171+
'TABLE',
172+
'THEAD',
173+
'TBODY',
174+
'TFOOT',
175+
'BLOCKQUOTE',
176+
'DL',
177+
'DT',
178+
'DD',
179+
'KBD',
180+
'Q',
181+
'SAMP',
182+
'VAR',
183+
'HR',
184+
'RUBY',
185+
'RT',
186+
'RP',
187+
'LI',
188+
'TR',
189+
'TD',
190+
'TH',
191+
'S',
192+
'STRIKE',
193+
'SUMMARY',
194+
'DETAILS',
195+
'CAPTION',
196+
'FIGURE',
197+
'FIGCAPTION',
198+
'ABBR',
199+
'BDO',
200+
'CITE',
201+
'DFN',
202+
'MARK',
203+
'SMALL',
204+
'SPAN',
205+
'TIME',
206+
'WBR',
207+
};
208+
209+
// Inspired by the set of HTML attributes allowed in GFM.
210+
final _alwaysAllowedAttributes = <String>{
211+
'abbr',
212+
'accept',
213+
'accept-charset',
214+
'accesskey',
215+
'action',
216+
'align',
217+
'alt',
218+
'aria-describedby',
219+
'aria-hidden',
220+
'aria-label',
221+
'aria-labelledby',
222+
'axis',
223+
'border',
224+
'cellpadding',
225+
'cellspacing',
226+
'char',
227+
'charoff',
228+
'charset',
229+
'checked',
230+
'clear',
231+
'cols',
232+
'colspan',
233+
'color',
234+
'compact',
235+
'coords',
236+
'datetime',
237+
'dir',
238+
'disabled',
239+
'enctype',
240+
'for',
241+
'frame',
242+
'headers',
243+
'height',
244+
'hreflang',
245+
'hspace',
246+
'ismap',
247+
'label',
248+
'lang',
249+
'maxlength',
250+
'media',
251+
'method',
252+
'multiple',
253+
'name',
254+
'nohref',
255+
'noshade',
256+
'nowrap',
257+
'open',
258+
'prompt',
259+
'readonly',
260+
'rel',
261+
'rev',
262+
'rows',
263+
'rowspan',
264+
'rules',
265+
'scope',
266+
'selected',
267+
'shape',
268+
'size',
269+
'span',
270+
'start',
271+
'summary',
272+
'tabindex',
273+
'target',
274+
'title',
275+
'type',
276+
'usemap',
277+
'valign',
278+
'value',
279+
'vspace',
280+
'width',
281+
'itemprop',
282+
};
283+
284+
bool _alwaysAllowed(String _) => true;
285+
286+
bool _validLink(String url) {
287+
try {
288+
final uri = Uri.parse(url);
289+
return uri.isScheme('https') ||
290+
uri.isScheme('http') ||
291+
uri.isScheme('mailto') ||
292+
!uri.hasScheme;
293+
} on FormatException {
294+
return false;
295+
}
296+
}
297+
298+
bool _validUrl(String url) {
299+
try {
300+
final uri = Uri.parse(url);
301+
return uri.isScheme('https') || uri.isScheme('http') || !uri.hasScheme;
302+
} on FormatException {
303+
return false;
304+
}
305+
}
306+
307+
final _citeAttributeValidator = <String, bool Function(String)>{
308+
'cite': _validUrl,
309+
};
310+
311+
final _elementAttributeValidators =
312+
<String, Map<String, bool Function(String)>>{
313+
'A': {
314+
'href': _validLink,
315+
},
316+
'IMG': {
317+
'src': _validUrl,
318+
'longdesc': _validUrl,
319+
},
320+
'DIV': {
321+
'itemscope': _alwaysAllowed,
322+
'itemtype': _alwaysAllowed,
323+
},
324+
'BLOCKQUOTE': _citeAttributeValidator,
325+
'DEL': _citeAttributeValidator,
326+
'INS': _citeAttributeValidator,
327+
'Q': _citeAttributeValidator,
328+
};

Diff for: lib/src/version.dart

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
// Generated code. Do not modify.
2-
const packageVersion = '4.0.0';
2+
const packageVersion = '4.1.0-dev';

Diff for: pubspec.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: dartdoc
2-
# Run `grind build` after updating.
3-
version: 4.0.0
2+
# Run `dart run grinder build` after updating.
3+
version: 4.1.0-dev
44
description: A non-interactive HTML documentation generator for Dart source code.
55
homepage: https://github.com/dart-lang/dartdoc
66
environment:

0 commit comments

Comments
 (0)