Skip to content

Commit d92d3c3

Browse files
fix: fixed small bug with case insensitive range matching
1 parent c0f279b commit d92d3c3

File tree

7 files changed

+84
-30
lines changed

7 files changed

+84
-30
lines changed

assembly/__spec_tests__/generated.spec.ts

+15-6
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,11 @@ it("line: 152 - matches ([\\da-f:]+)$ against 'abc'", () => {
776776
expect(match.matches[0]).toBe("abc".substring(0, 3));
777777
expect(match.matches[1]).toBe("abc".substring(0, 3));
778778
});
779-
xit("line: 153 - aspect [Actual]: <Match>null vs [Expected]: Not <Match>null issue", () => {});
779+
it("line: 153 - matches ([\\da-f:]+)$ against 'fed'", () => {
780+
const match = exec("([\\da-f:]+)$", "fed", "is");
781+
expect(match.matches[0]).toBe("fed".substring(0, 3));
782+
expect(match.matches[1]).toBe("fed".substring(0, 3));
783+
});
780784
it("line: 154 - matches ([\\da-f:]+)$ against 'E'", () => {
781785
const match = exec("([\\da-f:]+)$", "E", "is");
782786
expect(match.matches[0]).toBe("E".substring(0, 1));
@@ -1044,8 +1048,8 @@ xit("line: 199 - non capturing groups not supported", () => {});
10441048
xit("line: 200 - non capturing groups not supported", () => {});
10451049
xit("line: 201 - non capturing groups not supported", () => {});
10461050
xit("line: 202 - non capturing groups not supported", () => {});
1047-
xit("line: 203 - aspect [Actual]: <Match>null vs [Expected]: Not <Match>null issue", () => {});
1048-
xit("line: 204 - aspect [Actual]: <Match>null vs [Expected]: Not <Match>null issue", () => {});
1051+
xit("line: 203 - test appears to be incorrect?", () => {});
1052+
xit("line: 204 - test appears to be incorrect?", () => {});
10491053
it("line: 205 - matches ^ a\\ b[c ]d $ against 'abcd'", () => {
10501054
expectNotMatch("^ a\\ b[c ]d $", ["abcd"]);
10511055
});
@@ -1352,7 +1356,7 @@ it("line: 1083 - matches ^[ab]{1,3}(ab*?|b) against 'The quick brown fox'", () =
13521356
xit("line: 1084 - back references are not supported", () => {});
13531357
xit("line: 1085 - back references are not supported", () => {});
13541358
xit("line: 1086 - test encoding issue", () => {});
1355-
xit("line: 1087 - requires triage", () => {});
1359+
xit("line: 1087 - test requires a substring function", () => {});
13561360
xit("line: 1088 - requires triage", () => {});
13571361
it("line: 1089 - matches abc\\x0def\\x00pqr\\x000xyz\\x0000AB against 'abc456 abc\x0def\x00pqr\x000xyz\x0000ABCDE'", () => {
13581362
const match = exec(
@@ -1506,8 +1510,13 @@ it("line: 1144 - matches ^[W-c]+$ against 'WXY_^abc'", () => {
15061510
const match = exec("^[W-c]+$", "WXY_^abc", "s");
15071511
expect(match.matches[0]).toBe("WXY_^abc".substring(0, 8));
15081512
});
1509-
xit("line: 1145 - as-pect test issue", () => {});
1510-
xit("line: 1146 - as-pect test issue", () => {});
1513+
it("line: 1145 - matches ^[W-c]+$ against 'wxy'", () => {
1514+
expectNotMatch("^[W-c]+$", ["wxy"]);
1515+
});
1516+
it("line: 1146 - matches ^[W-c]+$ against 'WXY_^abc'", () => {
1517+
const match = exec("^[W-c]+$", "WXY_^abc", "is");
1518+
expect(match.matches[0]).toBe("WXY_^abc".substring(0, 8));
1519+
});
15111520
xit("line: 1147 - requires triage", () => {});
15121521
xit("line: 1148 - requires triage", () => {});
15131522
xit("line: 1149 - requires triage", () => {});

assembly/__tests__/character-sets.spec.ts

+9
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,12 @@ it("treats - as a literal in negated sets", () => {
4343
expectNotMatch("[^-abc]", ["-", "a", "b", "c"]);
4444
expectMatch("[^-abc]", ["1", "A"]);
4545
});
46+
47+
it("supports case insensitive matching", () => {
48+
// simple ranges
49+
expectMatch("[a-c]", ["A", "C", "a", "c"], "i");
50+
expectNotMatch("[a-c]", ["D", "d"], "i");
51+
// complex
52+
expectMatch("[W-c]", ["W", "w", "C", "c"], "i");
53+
expectNotMatch("[W-c]", ["V", "v", "D", "d"], "i");
54+
});

assembly/__tests__/utils.ts

+12-4
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,24 @@
11
import { RegExp, Match } from "..";
22

3-
export function expectMatch(regex: string, arr: string[]): void {
4-
let regexp = new RegExp(regex);
3+
export function expectMatch(
4+
regex: string,
5+
arr: string[],
6+
flags: string = ""
7+
): void {
8+
let regexp = new RegExp(regex, flags);
59
for (let i = 0; i < arr.length; i++) {
610
const value = arr[i];
711
const match = exec(regexp, value);
812
expect(match.matches[0]).toStrictEqual(value);
913
}
1014
}
1115

12-
export function expectNotMatch(regex: string, arr: string[]): void {
13-
let regexp = new RegExp(regex);
16+
export function expectNotMatch(
17+
regex: string,
18+
arr: string[],
19+
flags: string = ""
20+
): void {
21+
let regexp = new RegExp(regex, flags);
1422
for (let i = 0; i < arr.length; i++) {
1523
const match = regexp.exec(arr[i]);
1624
expect(match).toBeNull(

assembly/nfa/matcher.ts

+27-7
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
NodeType,
99
} from "../parser/node";
1010
import { Flags } from "../regexp";
11+
import { Range } from "../util";
1112

1213
const enum MatcherType {
1314
Character,
@@ -36,7 +37,10 @@ export class Matcher {
3637
node: CharacterRangeNode,
3738
flags: Flags
3839
): CharacterRangeMatcher {
39-
return new CharacterRangeMatcher(node.from, node.to, flags.ignoreCase);
40+
return new CharacterRangeMatcher(
41+
new Range(node.from, node.to),
42+
flags.ignoreCase
43+
);
4044
}
4145

4246
static fromCharacterSetNode(
@@ -89,20 +93,36 @@ export class CharacterMatcher extends Matcher {
8993
}
9094
}
9195

96+
const LOWERCASE_LETTERS = new Range(Char.a, Char.z);
97+
const UPPERCASE_LETTERS = new Range(Char.A, Char.Z);
98+
const UPPER_LOWER_OFFSET = Char.a - Char.A;
99+
92100
export class CharacterRangeMatcher extends Matcher {
93-
constructor(private from: u32, private to: u32, private ignoreCase: bool) {
101+
private ranges: Range[];
102+
103+
constructor(private range: Range, ignoreCase: bool) {
94104
super(MatcherType.CharacterRange);
105+
this.ranges = [range];
106+
95107
if (ignoreCase) {
96-
this.from |= 0x20;
97-
this.to |= 0x20;
108+
const lowerIntersect = range.intersection(LOWERCASE_LETTERS);
109+
if (lowerIntersect) {
110+
this.ranges.push(lowerIntersect.offset(-UPPER_LOWER_OFFSET));
111+
}
112+
const upperIntersect = range.intersection(UPPERCASE_LETTERS);
113+
if (upperIntersect) {
114+
this.ranges.push(upperIntersect.offset(UPPER_LOWER_OFFSET));
115+
}
98116
}
99117
}
100118

101119
matches(code: u32): bool {
102-
if (this.ignoreCase) {
103-
code |= 0x20;
120+
for (let i = 0, len = this.ranges.length; i < len; i++) {
121+
if (code >= u32(this.ranges[i].from) && code <= u32(this.ranges[i].to)) {
122+
return true;
123+
}
104124
}
105-
return code >= this.from && code <= this.to;
125+
return false;
106126
}
107127
}
108128

assembly/util.ts

+14
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,17 @@ export function replaceAtIndex<T>(arr: T[], index: u32, item: T): T[] {
1111
unchecked((res[index] = item));
1212
return res;
1313
}
14+
15+
export class Range {
16+
constructor(public from: i32, public to: i32) {}
17+
18+
intersection(other: Range): Range | null {
19+
const lower = i32(Math.max(this.from, other.from));
20+
const upper = i32(Math.min(this.to, other.to));
21+
return lower < upper ? new Range(lower, upper) : null;
22+
}
23+
24+
offset(value: i32): Range {
25+
return new Range(this.from + value, this.to + value);
26+
}
27+
}

spec/test-generator.js

+5-7
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,11 @@ const knownIssues = {
2121
1288,
2222
],
2323
"test contains an octal escape sequence": [1102],
24+
// the test results measure captured groups using character length / locations
25+
// see: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/length
26+
// this is tricky to reproduce
27+
"test requires a substring function": [1087],
2428
"requires triage": [
25-
1087,
2629
1363,
2730
1369,
2831
1163,
@@ -32,15 +35,10 @@ const knownIssues = {
3235
1413,
3336
...range(1301, 1308),
3437
],
35-
"as-pect test issue": [1145, 1146],
3638
"test indicates a malformed regex, whereas it appears OK in JS": [1189],
3739
"test regex contains syntax not supported in JS": [82, 1158, 281],
3840
"the test behaviour differs between PCRE and JS": [290],
39-
"aspect [Actual]: <Match>null vs [Expected]: Not <Match>null issue": [
40-
153,
41-
203,
42-
204,
43-
],
41+
"test appears to be incorrect?": [203, 204],
4442
};
4543

4644
const hasKnownIssue = (index) => {

ts/index.ts

+2-6
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@ globalAny.log = console.log;
55

66
import { RegExp } from "../assembly/regexp";
77

8-
const regexObj = new RegExp("^(a){1,3}");
9-
const match = regexObj.exec("abc");
8+
const regexObj = new RegExp("[a-c]", "i");
9+
const match = regexObj.exec("A");
1010
console.log(JSON.stringify(match, null, 2));
11-
12-
const regexObj2 = new RegExp("(a|b)c|a(b|c)");
13-
const match2 = regexObj2.exec("ab");
14-
console.log(JSON.stringify(match2, null, 2));

0 commit comments

Comments
 (0)