Skip to content

Commit 2e2084f

Browse files
committed
fix(parser): use raw tag close #6
1 parent 0f94d76 commit 2e2084f

File tree

8 files changed

+2020
-2002
lines changed

8 files changed

+2020
-2002
lines changed

package.json

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,19 @@
4242
},
4343
"homepage": "https://github.com/acrazing/html5parser#readme",
4444
"devDependencies": {
45-
"@types/fs-extra": "^8.0.1",
46-
"@types/jest": "^24.0.25",
47-
"@types/node": "^13.1.4",
48-
"@types/node-fetch": "^2.5.4",
49-
"fs-extra": "^8.1.0",
50-
"husky": "^3.1.0",
51-
"jest": "^24.9.0",
52-
"lint-staged": "^9.5.0",
53-
"node-fetch": "^2.6.0",
45+
"@types/fs-extra": "^9.0.1",
46+
"@types/jest": "^26.0.13",
47+
"@types/node": "^14.6.4",
48+
"@types/node-fetch": "^2.5.7",
49+
"fs-extra": "^9.0.1",
50+
"husky": "^4.3.0",
51+
"jest": "^26.4.2",
52+
"lint-staged": "^10.3.0",
53+
"node-fetch": "^2.6.1",
5454
"npm-run-all": "^4.1.5",
55-
"prettier": "^1.19.1",
56-
"ts-jest": "^24.2.0",
57-
"typescript": "^3.7.4"
55+
"prettier": "^2.1.1",
56+
"ts-jest": "^26.3.0",
57+
"typescript": "^4.0.2"
5858
},
5959
"jest": {
6060
"moduleFileExtensions": [
@@ -110,6 +110,6 @@
110110
"endOfLine": "lf"
111111
},
112112
"dependencies": {
113-
"tslib": "^1.10.0"
113+
"tslib": "^2.0.1"
114114
}
115115
}

src/config.ts

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,10 @@ function createMap<T>(
1212
keys: string,
1313
value: T,
1414
): { [key: number]: T; [key: string]: T } {
15-
return keys.split(',').reduce(
16-
(pre, now) => {
17-
pre[now] = value;
18-
return pre;
19-
},
20-
{} as any,
21-
);
15+
return keys.split(',').reduce((pre, now) => {
16+
pre[now] = value;
17+
return pre;
18+
}, Object.create(null));
2219
}
2320

2421
export const selfCloseTags = createMap<true>(

src/parse.spec.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ import {
1919
SyntaxKind,
2020
} from './types';
2121

22-
let index = 0;
22+
export let index = 0;
2323

24-
function text(input: string, start = index): IText {
24+
export function text(input: string, start = index): IText {
2525
return {
2626
type: SyntaxKind.Text,
2727
start: start,
@@ -30,7 +30,7 @@ function text(input: string, start = index): IText {
3030
};
3131
}
3232

33-
function tag(
33+
export function tag(
3434
input: string,
3535
name: string,
3636
open: IText,
@@ -375,7 +375,7 @@ const scenes: Array<{
375375
text('<DIV>', 0),
376376
[],
377377
[],
378-
text('</div>', 5),
378+
text('</DIV>', 5),
379379
0,
380380
'DIV',
381381
),

src/parse.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ function parseOpenTag() {
255255
function parseCloseTag() {
256256
let _context = tagChain;
257257
while (true) {
258-
if (!_context || token.value.startsWith(_context.tag.name)) {
258+
if (!_context || token.value.trim() === _context.tag.name) {
259259
break;
260260
}
261261
_context = _context.parent;
@@ -266,7 +266,7 @@ function parseCloseTag() {
266266
_context.tag.close = createLiteral(
267267
token.start - 2,
268268
token.end + 1,
269-
`</${token.value}>`,
269+
buffer.substring(token.start - 2, token.end + 1),
270270
);
271271
_context.tag.end = _context.tag.close.end;
272272
_context = _context.parent;

src/test/issue_6.spec.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* @since 2020-09-09 22:04:54
3+
* @author acrazing <[email protected]>
4+
*/
5+
6+
import { parse } from '../parse';
7+
import { tag, text } from '../parse.spec';
8+
import { tokenize, TokenKind } from '../tokenize';
9+
import { token, tokenIndex } from '../tokenize.spec';
10+
11+
describe('issue #6', () => {
12+
it('should tokenize upper case tag to lower', () => {
13+
expect(tokenize('<Test></Test>')).toEqual([
14+
token('test', TokenKind.OpenTag, 1),
15+
token('', TokenKind.OpenTagEnd),
16+
token('test', TokenKind.CloseTag, tokenIndex + 3),
17+
]);
18+
});
19+
it('should parse upper case as expected', () => {
20+
expect(parse('<Test></Test>')).toEqual([
21+
tag(
22+
'<Test></Test>',
23+
'test',
24+
text('<Test>', 0),
25+
[],
26+
[],
27+
text('</Test>'),
28+
0,
29+
'Test',
30+
),
31+
]);
32+
});
33+
});

src/tokenize.spec.ts

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,20 @@ interface ICase {
1717
tokens: IToken[];
1818
}
1919

20-
let index = 0;
20+
export let tokenIndex = 0;
2121

22-
function token(
22+
export function token(
2323
value: string,
2424
type: TokenKind = TokenKind.Literal,
25-
start = index,
25+
start = tokenIndex,
2626
) {
2727
const v = {
2828
start: start,
2929
end: start + value.length,
3030
value,
3131
type,
3232
};
33-
index = v.end;
33+
tokenIndex = v.end;
3434
return v;
3535
}
3636

@@ -74,7 +74,7 @@ const cases: ICase[] = [
7474
token(' ', TokenKind.Whitespace),
7575
token('"g7\'"', TokenKind.AttrValueDq),
7676
token('', TokenKind.OpenTagEnd),
77-
token('div', TokenKind.CloseTag, index + 3),
77+
token('div', TokenKind.CloseTag, tokenIndex + 3),
7878
],
7979
},
8080
{
@@ -123,8 +123,8 @@ const cases: ICase[] = [
123123
token('"st"', TokenKind.AttrValueDq),
124124
token('u', TokenKind.AttrValueNq),
125125
token('', TokenKind.OpenTagEnd),
126-
token('M', void 0, index + 1),
127-
token('div', TokenKind.CloseTag, index + 2),
126+
token('M', void 0, tokenIndex + 1),
127+
token('div', TokenKind.CloseTag, tokenIndex + 2),
128128
],
129129
},
130130
{
@@ -163,7 +163,7 @@ const cases: ICase[] = [
163163
token('', TokenKind.OpenTag, 1),
164164
token('? hello world ?'),
165165
token('', TokenKind.OpenTagEnd),
166-
token('!', TokenKind.OpenTag, index + 2),
166+
token('!', TokenKind.OpenTag, tokenIndex + 2),
167167
token('- hello world -'),
168168
token('', TokenKind.OpenTagEnd),
169169
],
@@ -175,31 +175,31 @@ const cases: ICase[] = [
175175
tokens: [
176176
token('a1', TokenKind.OpenTag, 1),
177177
token('', TokenKind.OpenTagEnd),
178-
token('b2', TokenKind.OpenTag, index + 2),
178+
token('b2', TokenKind.OpenTag, tokenIndex + 2),
179179
token('/', TokenKind.OpenTagEnd),
180-
token('c3', TokenKind.OpenTag, index + 2),
180+
token('c3', TokenKind.OpenTag, tokenIndex + 2),
181181
token(' ', TokenKind.Whitespace),
182182
token('/', TokenKind.OpenTagEnd),
183-
token('d4', TokenKind.OpenTag, index + 2),
183+
token('d4', TokenKind.OpenTag, tokenIndex + 2),
184184
token(' ', TokenKind.Whitespace),
185185
token('/', TokenKind.AttrValueNq),
186186
token(' ', TokenKind.Whitespace),
187187
token('', TokenKind.OpenTagEnd),
188-
token('e5', TokenKind.OpenTag, index + 2),
188+
token('e5', TokenKind.OpenTag, tokenIndex + 2),
189189
token(' ', TokenKind.Whitespace),
190190
token('f6', TokenKind.AttrValueNq),
191191
token('/', TokenKind.OpenTagEnd),
192-
token('g7', TokenKind.OpenTag, index + 2),
192+
token('g7', TokenKind.OpenTag, tokenIndex + 2),
193193
token(' ', TokenKind.Whitespace),
194194
token('/', TokenKind.AttrValueNq),
195195
token('h8', TokenKind.AttrValueNq),
196196
token('', TokenKind.OpenTagEnd),
197-
token('i9', TokenKind.OpenTag, index + 2),
197+
token('i9', TokenKind.OpenTag, tokenIndex + 2),
198198
token(' ', TokenKind.Whitespace),
199199
token('/', TokenKind.AttrValueNq),
200200
token('j10', TokenKind.AttrValueNq),
201201
token('/', TokenKind.OpenTagEnd),
202-
token('k11', TokenKind.OpenTag, index + 2),
202+
token('k11', TokenKind.OpenTag, tokenIndex + 2),
203203
token('/', TokenKind.AttrValueNq),
204204
token('/', TokenKind.OpenTagEnd),
205205
],
@@ -209,7 +209,7 @@ const cases: ICase[] = [
209209
input: '</div></ div >',
210210
tokens: [
211211
token('div', TokenKind.CloseTag, 2),
212-
token(' div ', TokenKind.CloseTag, index + 3),
212+
token(' div ', TokenKind.CloseTag, tokenIndex + 3),
213213
],
214214
},
215215
{
@@ -231,9 +231,9 @@ const cases: ICase[] = [
231231
tokens: [
232232
token('script', TokenKind.OpenTag, 1),
233233
token('', TokenKind.OpenTagEnd),
234-
token('</div>', TokenKind.Literal, index + 1),
234+
token('</div>', TokenKind.Literal, tokenIndex + 1),
235235
token('</script'),
236-
token('script ', TokenKind.CloseTag, index + 2),
236+
token('script ', TokenKind.CloseTag, tokenIndex + 2),
237237
],
238238
},
239239
{
@@ -242,9 +242,9 @@ const cases: ICase[] = [
242242
tokens: [
243243
token('style', TokenKind.OpenTag, 1),
244244
token('', TokenKind.OpenTagEnd),
245-
token('</div>', TokenKind.Literal, index + 1),
245+
token('</div>', TokenKind.Literal, tokenIndex + 1),
246246
token('</style'),
247-
token('style ', TokenKind.CloseTag, index + 2),
247+
token('style ', TokenKind.CloseTag, tokenIndex + 2),
248248
],
249249
},
250250
];

src/types.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,11 @@ export interface IAttribute extends IBaseNode {
3535

3636
export interface ITag extends IBaseNode {
3737
type: SyntaxKind.Tag;
38+
// original open tag, <Div id="id">
3839
open: IText;
40+
// lower case tag name, div
3941
name: string;
42+
// original case tag name, Div
4043
rawName: string;
4144
attributes: IAttribute[];
4245
// the attribute map, if `options.setAttributeMap` is `true`
@@ -47,6 +50,7 @@ export interface ITag extends IBaseNode {
4750
| Array<ITag | IText> // with close tag
4851
| undefined // self closed
4952
| null; // EOF before open tag end
53+
// original close tag, </DIV >
5054
close:
5155
| IText // with close tag
5256
| undefined // self closed

0 commit comments

Comments
 (0)