Skip to content

Commit 1b72e04

Browse files
vpetrovykh1st1
authored andcommitted
Bugfix for an apparent infinite loop in grammar. (#163)
This is a case of semantically equivalent regex not being computationally equivalent. The issue also relates to how negative character classes (e.g. `[^abc]`) are treated. In the end I had to replace something of the form `(A | B)*` with `A* (BA*)*` to avoid grinding the regex engine to a halt. Issue #150.
1 parent cc2b832 commit 1b72e04

File tree

6 files changed

+238
-18
lines changed

6 files changed

+238
-18
lines changed

Diff for: grammars/MagicPython.cson

+4-6
Original file line numberDiff line numberDiff line change
@@ -942,7 +942,7 @@ repository:
942942
{{ | }}
943943
| (?:
944944
{
945-
\\w*? (\\.[[:alpha:]_]\\w*? | \\[[^\\]'"]+\\])*?
945+
\\w* (\\.[[:alpha:]_]\\w* | \\[[^\\]'"]+\\])*
946946
(![rsa])?
947947
( : \\w? [<>=^]? [-+ ]? \\#?
948948
\\d* ,? (\\.\\d+)? [bcdeEfFgGnosxX%]? )?
@@ -964,13 +964,11 @@ repository:
964964
(?x)
965965
(
966966
{
967-
\\w*? (\\.[[:alpha:]_]\\w*? | \\[[^\\]'"]+\\])*?
967+
\\w* (\\.[[:alpha:]_]\\w* | \\[[^\\]'"]+\\])*
968968
(![rsa])?
969969
(:)
970-
(
971-
[^'"{}\\n]+?
972-
|
973-
\\{ [^'"}\\n]*? \\}
970+
[^'"{}\\n]* (?:
971+
\\{ [^'"}\\n]*? \\} [^'"{}\\n]*
974972
)*
975973
}
976974
)

Diff for: grammars/MagicPython.tmLanguage

+4-6
Original file line numberDiff line numberDiff line change
@@ -1444,7 +1444,7 @@
14441444
{{ | }}
14451445
| (?:
14461446
{
1447-
\w*? (\.[[:alpha:]_]\w*? | \[[^\]'"]+\])*?
1447+
\w* (\.[[:alpha:]_]\w* | \[[^\]'"]+\])*
14481448
(![rsa])?
14491449
( : \w? [&lt;&gt;=^]? [-+ ]? \#?
14501450
\d* ,? (\.\d+)? [bcdeEfFgGnosxX%]? )?
@@ -1477,13 +1477,11 @@
14771477
<string>(?x)
14781478
(
14791479
{
1480-
\w*? (\.[[:alpha:]_]\w*? | \[[^\]'"]+\])*?
1480+
\w* (\.[[:alpha:]_]\w* | \[[^\]'"]+\])*
14811481
(![rsa])?
14821482
(:)
1483-
(
1484-
[^'"{}\n]+?
1485-
|
1486-
\{ [^'"}\n]*? \}
1483+
[^'"{}\n]* (?:
1484+
\{ [^'"}\n]*? \} [^'"{}\n]*
14871485
)*
14881486
}
14891487
)

Diff for: grammars/src/MagicPython.syntax.yaml

+4-6
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,7 @@ repository:
791791
{{ | }}
792792
| (?:
793793
{
794-
\w*? (\.[[:alpha:]_]\w*? | \[[^\]'"]+\])*?
794+
\w* (\.[[:alpha:]_]\w* | \[[^\]'"]+\])*
795795
(![rsa])?
796796
( : \w? [<>=^]? [-+ ]? \#?
797797
\d* ,? (\.\d+)? [bcdeEfFgGnosxX%]? )?
@@ -818,13 +818,11 @@ repository:
818818
(?x)
819819
(
820820
{
821-
\w*? (\.[[:alpha:]_]\w*? | \[[^\]'"]+\])*?
821+
\w* (\.[[:alpha:]_]\w* | \[[^\]'"]+\])*
822822
(![rsa])?
823823
(:)
824-
(
825-
[^'"{}\n]+?
826-
|
827-
\{ [^'"}\n]*? \}
824+
[^'"{}\n]* (?:
825+
\{ [^'"}\n]*? \} [^'"{}\n]*
828826
)*
829827
}
830828
)

Diff for: test/atom-spec/python-spec.js

+144
Original file line numberDiff line numberDiff line change
@@ -12853,6 +12853,150 @@ describe("Grammar Tests", function() {
1285312853
expect(tokens[3][13].scopes).toEqual(["source.python","comment.line.number-sign.python"]);
1285412854
});
1285512855

12856+
it("test/strings/bug1.py",
12857+
function() {
12858+
tokens = grammar.tokenizeLines("# issue 150\nrecord = {\n \"a\": {k: str(v) for k, v in foo if \"\"}\n}")
12859+
expect(tokens[0][0].value).toBe("#");
12860+
expect(tokens[0][0].scopes).toEqual(["source.python","comment.line.number-sign.python","punctuation.definition.comment.python"]);
12861+
expect(tokens[0][1].value).toBe(" issue 150");
12862+
expect(tokens[0][1].scopes).toEqual(["source.python","comment.line.number-sign.python"]);
12863+
expect(tokens[1][0].value).toBe("record");
12864+
expect(tokens[1][0].scopes).toEqual(["source.python"]);
12865+
expect(tokens[1][1].value).toBe(" ");
12866+
expect(tokens[1][1].scopes).toEqual(["source.python"]);
12867+
expect(tokens[1][2].value).toBe("=");
12868+
expect(tokens[1][2].scopes).toEqual(["source.python","keyword.operator.assignment.python"]);
12869+
expect(tokens[1][3].value).toBe(" ");
12870+
expect(tokens[1][3].scopes).toEqual(["source.python"]);
12871+
expect(tokens[1][4].value).toBe("{");
12872+
expect(tokens[1][4].scopes).toEqual(["source.python","punctuation.definition.dict.begin.python"]);
12873+
expect(tokens[2][0].value).toBe(" ");
12874+
expect(tokens[2][0].scopes).toEqual(["source.python"]);
12875+
expect(tokens[2][1].value).toBe("\"");
12876+
expect(tokens[2][1].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.begin.python"]);
12877+
expect(tokens[2][2].value).toBe("a");
12878+
expect(tokens[2][2].scopes).toEqual(["source.python","string.quoted.single.python"]);
12879+
expect(tokens[2][3].value).toBe("\"");
12880+
expect(tokens[2][3].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.end.python"]);
12881+
expect(tokens[2][4].value).toBe(":");
12882+
expect(tokens[2][4].scopes).toEqual(["source.python","punctuation.separator.dict.python"]);
12883+
expect(tokens[2][5].value).toBe(" ");
12884+
expect(tokens[2][5].scopes).toEqual(["source.python"]);
12885+
expect(tokens[2][6].value).toBe("{");
12886+
expect(tokens[2][6].scopes).toEqual(["source.python","punctuation.definition.dict.begin.python"]);
12887+
expect(tokens[2][7].value).toBe("k");
12888+
expect(tokens[2][7].scopes).toEqual(["source.python"]);
12889+
expect(tokens[2][8].value).toBe(":");
12890+
expect(tokens[2][8].scopes).toEqual(["source.python","punctuation.separator.dict.python"]);
12891+
expect(tokens[2][9].value).toBe(" ");
12892+
expect(tokens[2][9].scopes).toEqual(["source.python"]);
12893+
expect(tokens[2][10].value).toBe("str");
12894+
expect(tokens[2][10].scopes).toEqual(["source.python","meta.function-call.python","support.type.python"]);
12895+
expect(tokens[2][11].value).toBe("(");
12896+
expect(tokens[2][11].scopes).toEqual(["source.python","meta.function-call.python","punctuation.definition.arguments.begin.python"]);
12897+
expect(tokens[2][12].value).toBe("v");
12898+
expect(tokens[2][12].scopes).toEqual(["source.python","meta.function-call.python","meta.function-call.arguments.python"]);
12899+
expect(tokens[2][13].value).toBe(")");
12900+
expect(tokens[2][13].scopes).toEqual(["source.python","meta.function-call.python","punctuation.definition.arguments.end.python"]);
12901+
expect(tokens[2][14].value).toBe(" ");
12902+
expect(tokens[2][14].scopes).toEqual(["source.python"]);
12903+
expect(tokens[2][15].value).toBe("for");
12904+
expect(tokens[2][15].scopes).toEqual(["source.python","keyword.control.flow.python"]);
12905+
expect(tokens[2][16].value).toBe(" ");
12906+
expect(tokens[2][16].scopes).toEqual(["source.python"]);
12907+
expect(tokens[2][17].value).toBe("k");
12908+
expect(tokens[2][17].scopes).toEqual(["source.python"]);
12909+
expect(tokens[2][18].value).toBe(",");
12910+
expect(tokens[2][18].scopes).toEqual(["source.python","punctuation.separator.element.python"]);
12911+
expect(tokens[2][19].value).toBe(" ");
12912+
expect(tokens[2][19].scopes).toEqual(["source.python"]);
12913+
expect(tokens[2][20].value).toBe("v");
12914+
expect(tokens[2][20].scopes).toEqual(["source.python"]);
12915+
expect(tokens[2][21].value).toBe(" ");
12916+
expect(tokens[2][21].scopes).toEqual(["source.python"]);
12917+
expect(tokens[2][22].value).toBe("in");
12918+
expect(tokens[2][22].scopes).toEqual(["source.python","keyword.operator.logical.python"]);
12919+
expect(tokens[2][23].value).toBe(" ");
12920+
expect(tokens[2][23].scopes).toEqual(["source.python"]);
12921+
expect(tokens[2][24].value).toBe("foo");
12922+
expect(tokens[2][24].scopes).toEqual(["source.python"]);
12923+
expect(tokens[2][25].value).toBe(" ");
12924+
expect(tokens[2][25].scopes).toEqual(["source.python"]);
12925+
expect(tokens[2][26].value).toBe("if");
12926+
expect(tokens[2][26].scopes).toEqual(["source.python","keyword.control.flow.python"]);
12927+
expect(tokens[2][27].value).toBe(" ");
12928+
expect(tokens[2][27].scopes).toEqual(["source.python"]);
12929+
expect(tokens[2][28].value).toBe("\"");
12930+
expect(tokens[2][28].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.begin.python"]);
12931+
expect(tokens[2][29].value).toBe("\"");
12932+
expect(tokens[2][29].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.end.python"]);
12933+
expect(tokens[2][30].value).toBe("}");
12934+
expect(tokens[2][30].scopes).toEqual(["source.python","punctuation.definition.dict.end.python"]);
12935+
expect(tokens[3][0].value).toBe("}");
12936+
expect(tokens[3][0].scopes).toEqual(["source.python","punctuation.definition.dict.end.python"]);
12937+
});
12938+
12939+
it("test/strings/bug2.py",
12940+
function() {
12941+
tokens = grammar.tokenizeLines("# issue 150\ncmd = \"git-clang-format --style=\\\"{{BasedOnStyle: Google, ColumnLimit: 100, IndentWidth: 2, \" \\\n \"AlignConsecutiveAssignments: true}}\\\" {COMMIT_SHA} -- ./**/*.proto > {OUTPUT}\".format(")
12942+
expect(tokens[0][0].value).toBe("#");
12943+
expect(tokens[0][0].scopes).toEqual(["source.python","comment.line.number-sign.python","punctuation.definition.comment.python"]);
12944+
expect(tokens[0][1].value).toBe(" issue 150");
12945+
expect(tokens[0][1].scopes).toEqual(["source.python","comment.line.number-sign.python"]);
12946+
expect(tokens[1][0].value).toBe("cmd");
12947+
expect(tokens[1][0].scopes).toEqual(["source.python"]);
12948+
expect(tokens[1][1].value).toBe(" ");
12949+
expect(tokens[1][1].scopes).toEqual(["source.python"]);
12950+
expect(tokens[1][2].value).toBe("=");
12951+
expect(tokens[1][2].scopes).toEqual(["source.python","keyword.operator.assignment.python"]);
12952+
expect(tokens[1][3].value).toBe(" ");
12953+
expect(tokens[1][3].scopes).toEqual(["source.python"]);
12954+
expect(tokens[1][4].value).toBe("\"");
12955+
expect(tokens[1][4].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.begin.python"]);
12956+
expect(tokens[1][5].value).toBe("git-clang-format --style=");
12957+
expect(tokens[1][5].scopes).toEqual(["source.python","string.quoted.single.python"]);
12958+
expect(tokens[1][6].value).toBe("\\\"");
12959+
expect(tokens[1][6].scopes).toEqual(["source.python","string.quoted.single.python","constant.character.escape.python"]);
12960+
expect(tokens[1][7].value).toBe("{{");
12961+
expect(tokens[1][7].scopes).toEqual(["source.python","string.quoted.single.python","meta.format.brace.python","constant.character.format.placeholder.other.python"]);
12962+
expect(tokens[1][8].value).toBe("BasedOnStyle: Google, ColumnLimit: 100, IndentWidth: 2, ");
12963+
expect(tokens[1][8].scopes).toEqual(["source.python","string.quoted.single.python"]);
12964+
expect(tokens[1][9].value).toBe("\"");
12965+
expect(tokens[1][9].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.end.python"]);
12966+
expect(tokens[1][10].value).toBe(" ");
12967+
expect(tokens[1][10].scopes).toEqual(["source.python"]);
12968+
expect(tokens[1][11].value).toBe("\\");
12969+
expect(tokens[1][11].scopes).toEqual(["source.python","punctuation.separator.continuation.line.python"]);
12970+
expect(tokens[1][12].value).toBe("");
12971+
expect(tokens[1][12].scopes).toEqual(["source.python"]);
12972+
expect(tokens[2][0].value).toBe(" ");
12973+
expect(tokens[2][0].scopes).toEqual(["source.python"]);
12974+
expect(tokens[2][1].value).toBe("\"");
12975+
expect(tokens[2][1].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.begin.python"]);
12976+
expect(tokens[2][2].value).toBe("AlignConsecutiveAssignments: true");
12977+
expect(tokens[2][2].scopes).toEqual(["source.python","string.quoted.single.python"]);
12978+
expect(tokens[2][3].value).toBe("}}");
12979+
expect(tokens[2][3].scopes).toEqual(["source.python","string.quoted.single.python","meta.format.brace.python","constant.character.format.placeholder.other.python"]);
12980+
expect(tokens[2][4].value).toBe("\\\"");
12981+
expect(tokens[2][4].scopes).toEqual(["source.python","string.quoted.single.python","constant.character.escape.python"]);
12982+
expect(tokens[2][5].value).toBe(" ");
12983+
expect(tokens[2][5].scopes).toEqual(["source.python","string.quoted.single.python"]);
12984+
expect(tokens[2][6].value).toBe("{COMMIT_SHA}");
12985+
expect(tokens[2][6].scopes).toEqual(["source.python","string.quoted.single.python","meta.format.brace.python","constant.character.format.placeholder.other.python"]);
12986+
expect(tokens[2][7].value).toBe(" -- ./**/*.proto > ");
12987+
expect(tokens[2][7].scopes).toEqual(["source.python","string.quoted.single.python"]);
12988+
expect(tokens[2][8].value).toBe("{OUTPUT}");
12989+
expect(tokens[2][8].scopes).toEqual(["source.python","string.quoted.single.python","meta.format.brace.python","constant.character.format.placeholder.other.python"]);
12990+
expect(tokens[2][9].value).toBe("\"");
12991+
expect(tokens[2][9].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.end.python"]);
12992+
expect(tokens[2][10].value).toBe(".");
12993+
expect(tokens[2][10].scopes).toEqual(["source.python","punctuation.separator.period.python"]);
12994+
expect(tokens[2][11].value).toBe("format");
12995+
expect(tokens[2][11].scopes).toEqual(["source.python","meta.function-call.python","meta.function-call.generic.python"]);
12996+
expect(tokens[2][12].value).toBe("(");
12997+
expect(tokens[2][12].scopes).toEqual(["source.python","meta.function-call.python","punctuation.definition.arguments.begin.python"]);
12998+
});
12999+
1285613000
it("test/strings/bytes1.py",
1285713001
function() {
1285813002
tokens = grammar.tokenizeLines("a = b\"\"\"\nmultiline \"binary\" string \\\n\n \\xf1 \\u1234aaaa \\U1234aaaa\n\n \\N{BLACK SPADE SUIT}\n\"\"\"")

Diff for: test/strings/bug1.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# issue 150
2+
record = {
3+
"a": {k: str(v) for k, v in foo if ""}
4+
}
5+
6+
7+
8+
9+
# : comment.line.number-sign.python, punctuation.definition.comment.python, source.python
10+
issue 150 : comment.line.number-sign.python, source.python
11+
record : source.python
12+
: source.python
13+
= : keyword.operator.assignment.python, source.python
14+
: source.python
15+
{ : punctuation.definition.dict.begin.python, source.python
16+
: source.python
17+
" : punctuation.definition.string.begin.python, source.python, string.quoted.single.python
18+
a : source.python, string.quoted.single.python
19+
" : punctuation.definition.string.end.python, source.python, string.quoted.single.python
20+
: : punctuation.separator.dict.python, source.python
21+
: source.python
22+
{ : punctuation.definition.dict.begin.python, source.python
23+
k : source.python
24+
: : punctuation.separator.dict.python, source.python
25+
: source.python
26+
str : meta.function-call.python, source.python, support.type.python
27+
( : meta.function-call.python, punctuation.definition.arguments.begin.python, source.python
28+
v : meta.function-call.arguments.python, meta.function-call.python, source.python
29+
) : meta.function-call.python, punctuation.definition.arguments.end.python, source.python
30+
: source.python
31+
for : keyword.control.flow.python, source.python
32+
: source.python
33+
k : source.python
34+
, : punctuation.separator.element.python, source.python
35+
: source.python
36+
v : source.python
37+
: source.python
38+
in : keyword.operator.logical.python, source.python
39+
: source.python
40+
foo : source.python
41+
: source.python
42+
if : keyword.control.flow.python, source.python
43+
: source.python
44+
" : punctuation.definition.string.begin.python, source.python, string.quoted.single.python
45+
" : punctuation.definition.string.end.python, source.python, string.quoted.single.python
46+
} : punctuation.definition.dict.end.python, source.python
47+
} : punctuation.definition.dict.end.python, source.python

Diff for: test/strings/bug2.py

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# issue 150
2+
cmd = "git-clang-format --style=\"{{BasedOnStyle: Google, ColumnLimit: 100, IndentWidth: 2, " \
3+
"AlignConsecutiveAssignments: true}}\" {COMMIT_SHA} -- ./**/*.proto > {OUTPUT}".format(
4+
5+
6+
7+
8+
# : comment.line.number-sign.python, punctuation.definition.comment.python, source.python
9+
issue 150 : comment.line.number-sign.python, source.python
10+
cmd : source.python
11+
: source.python
12+
= : keyword.operator.assignment.python, source.python
13+
: source.python
14+
" : punctuation.definition.string.begin.python, source.python, string.quoted.single.python
15+
git-clang-format --style= : source.python, string.quoted.single.python
16+
\" : constant.character.escape.python, source.python, string.quoted.single.python
17+
{{ : constant.character.format.placeholder.other.python, meta.format.brace.python, source.python, string.quoted.single.python
18+
BasedOnStyle: Google, ColumnLimit: 100, IndentWidth: 2, : source.python, string.quoted.single.python
19+
" : punctuation.definition.string.end.python, source.python, string.quoted.single.python
20+
: source.python
21+
\ : punctuation.separator.continuation.line.python, source.python
22+
: source.python
23+
: source.python
24+
" : punctuation.definition.string.begin.python, source.python, string.quoted.single.python
25+
AlignConsecutiveAssignments: true : source.python, string.quoted.single.python
26+
}} : constant.character.format.placeholder.other.python, meta.format.brace.python, source.python, string.quoted.single.python
27+
\" : constant.character.escape.python, source.python, string.quoted.single.python
28+
: source.python, string.quoted.single.python
29+
{COMMIT_SHA} : constant.character.format.placeholder.other.python, meta.format.brace.python, source.python, string.quoted.single.python
30+
-- ./**/*.proto > : source.python, string.quoted.single.python
31+
{OUTPUT} : constant.character.format.placeholder.other.python, meta.format.brace.python, source.python, string.quoted.single.python
32+
" : punctuation.definition.string.end.python, source.python, string.quoted.single.python
33+
. : punctuation.separator.period.python, source.python
34+
format : meta.function-call.generic.python, meta.function-call.python, source.python
35+
( : meta.function-call.python, punctuation.definition.arguments.begin.python, source.python

0 commit comments

Comments
 (0)