Skip to content

Commit b237248

Browse files
committed
fix(ogenregex): print ECMAScript regexp literals
1 parent 660b2c5 commit b237248

File tree

2 files changed

+121
-43
lines changed

2 files changed

+121
-43
lines changed

ogenregex/ogenregex.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ var _ = []Regexp{
2222
}
2323

2424
type goRegexp struct {
25-
exp *regexp.Regexp
25+
orig string
26+
exp *regexp.Regexp
2627
}
2728

2829
func (r goRegexp) Match(s []byte) (bool, error) {
@@ -34,7 +35,7 @@ func (r goRegexp) MatchString(s string) (bool, error) {
3435
}
3536

3637
func (r goRegexp) String() string {
37-
return r.exp.String()
38+
return r.orig
3839
}
3940

4041
type regexp2Regexp struct {
@@ -67,7 +68,7 @@ type Regexp interface {
6768
func Compile(exp string) (Regexp, error) {
6869
if converted, ok := Convert(exp); ok {
6970
if re, err := regexp.Compile(converted); err == nil {
70-
return goRegexp{re}, nil
71+
return goRegexp{orig: exp, exp: re}, nil
7172
}
7273
}
7374
re, err := regexp2.Compile(exp, regexp2.ECMAScript|regexp2.Unicode)

ogenregex/ogenregex_test.go

Lines changed: 117 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,68 +9,68 @@ import (
99

1010
func TestCompile(t *testing.T) {
1111
type testCase struct {
12-
input string
13-
wantType Regexp
14-
wantString string
15-
wantErr bool
12+
input string
13+
wantType Regexp
14+
wantErr bool
1615
}
1716

1817
tests := []testCase{
1918
// Conversion is not required.
20-
{`\x20`, goRegexp{}, `\x20`, false},
21-
{`\v`, goRegexp{}, `\v`, false},
22-
{`\t`, goRegexp{}, `\t`, false},
23-
{`\n`, goRegexp{}, `\n`, false},
24-
{`\d`, goRegexp{}, `\d`, false},
25-
{`\w`, goRegexp{}, `\w`, false},
26-
{`\w{1}`, goRegexp{}, `\w{1}`, false},
27-
{`\w{1,}`, goRegexp{}, `\w{1,}`, false},
28-
{`\w{1,2}`, goRegexp{}, `\w{1,2}`, false},
29-
{`\b`, goRegexp{}, `\b`, false},
30-
{`\B`, goRegexp{}, `\B`, false},
31-
{`\.`, goRegexp{}, `\.`, false},
32-
{`\[`, goRegexp{}, `\[`, false},
33-
{`\]`, goRegexp{}, `\]`, false},
34-
{`\(`, goRegexp{}, `\(`, false},
35-
{`\)`, goRegexp{}, `\)`, false},
36-
{`\{`, goRegexp{}, `\{`, false},
37-
{`\}`, goRegexp{}, `\}`, false},
38-
{`\\`, goRegexp{}, `\\`, false},
39-
{`\$`, goRegexp{}, `\$`, false},
19+
{`\0`, goRegexp{}, false},
20+
{`\x20`, goRegexp{}, false},
21+
{`\v`, goRegexp{}, false},
22+
{`\t`, goRegexp{}, false},
23+
{`\n`, goRegexp{}, false},
24+
{`\d`, goRegexp{}, false},
25+
{`\w`, goRegexp{}, false},
26+
{`\w{1}`, goRegexp{}, false},
27+
{`\w{1,}`, goRegexp{}, false},
28+
{`\w{1,2}`, goRegexp{}, false},
29+
{`\b`, goRegexp{}, false},
30+
{`\B`, goRegexp{}, false},
31+
{`\.`, goRegexp{}, false},
32+
{`\[`, goRegexp{}, false},
33+
{`\]`, goRegexp{}, false},
34+
{`\(`, goRegexp{}, false},
35+
{`\)`, goRegexp{}, false},
36+
{`\{`, goRegexp{}, false},
37+
{`\}`, goRegexp{}, false},
38+
{`\\`, goRegexp{}, false},
39+
{`\$`, goRegexp{}, false},
4040

4141
// Simplification.
42-
{`\u000a`, goRegexp{}, `\x{000a}`, false},
43-
{`\u{000a}`, goRegexp{}, `\x{000a}`, false},
42+
{`\u000a`, goRegexp{}, false},
43+
{`\u{000a}`, goRegexp{}, false},
4444
// "\z" just unnecessarily escapes the 'z'.
45-
{`\z`, goRegexp{}, `z`, false},
45+
{`\z`, goRegexp{}, false},
4646

4747
// Conversion is required.
4848
//
4949
// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes#types.
5050
//
5151
// In ECMA-262, \c + [a-fA-F] is a control character.
52-
{`\ca`, goRegexp{}, `\x01`, false},
53-
{`\cA`, goRegexp{}, `\x01`, false},
54-
{`\cb`, goRegexp{}, `\x02`, false},
55-
{`\cB`, goRegexp{}, `\x02`, false},
52+
{`\ca`, goRegexp{}, false},
53+
{`\cA`, goRegexp{}, false},
54+
{`\cb`, goRegexp{}, false},
55+
{`\cB`, goRegexp{}, false},
5656
// In ECMA-262, \b in a character class is a backspace.
57-
{`[\b]`, goRegexp{}, `[\x08]`, false},
57+
{`[\b]`, goRegexp{}, false},
5858
// ECMA-262 dot matches any single character except line terminators: \n, \r, \u2028 or \u2029.
59-
{`.*`, goRegexp{}, re2Dot + `*`, false},
59+
{`.*`, goRegexp{}, false},
6060
// Whitespace characters in ECMA-262 differ from those in RE2.
6161
//
6262
// Whitespace characters in ECMA-262:
6363
// [ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]
64-
{`\s`, goRegexp{}, `[` + whitespaceChars + `]`, false},
65-
{`\S`, goRegexp{}, `[^` + whitespaceChars + `]`, false},
66-
{`[\s]`, goRegexp{}, `[` + whitespaceChars + `]`, false},
64+
{`\s`, goRegexp{}, false},
65+
{`\S`, goRegexp{}, false},
66+
{`[\s]`, goRegexp{}, false},
6767

6868
// Use regexp2.
69-
{`^(?!examples/)`, regexp2Regexp{}, `^(?!examples/)`, false},
69+
{`^(?!examples/)`, regexp2Regexp{}, false},
7070

7171
// Error.
72-
{")", nil, ``, true},
73-
{"(?`)", nil, ``, true},
72+
{")", nil, true},
73+
{"(?`)", nil, true},
7474
}
7575
for i, tt := range tests {
7676
tt := tt
@@ -87,7 +87,84 @@ func TestCompile(t *testing.T) {
8787
a.NoError(err)
8888
a.NotPanics(func() { MustCompile(tt.input) })
8989
a.IsType(tt.wantType, got)
90-
a.Equal(tt.wantString, got.String())
90+
a.Equal(tt.input, got.String())
91+
})
92+
}
93+
}
94+
95+
func TestConvert(t *testing.T) {
96+
type testCase struct {
97+
input string
98+
wantString string
99+
wantOk bool
100+
}
101+
102+
tests := []testCase{
103+
// Conversion is not required.
104+
{`\0`, `\0`, true},
105+
{`\x20`, `\x20`, true},
106+
{`\v`, `\v`, true},
107+
{`\t`, `\t`, true},
108+
{`\n`, `\n`, true},
109+
{`\d`, `\d`, true},
110+
{`\w`, `\w`, true},
111+
{`\w{1}`, `\w{1}`, true},
112+
{`\w{1,}`, `\w{1,}`, true},
113+
{`\w{1,2}`, `\w{1,2}`, true},
114+
{`\b`, `\b`, true},
115+
{`\B`, `\B`, true},
116+
{`\.`, `\.`, true},
117+
{`\[`, `\[`, true},
118+
{`\]`, `\]`, true},
119+
{`\(`, `\(`, true},
120+
{`\)`, `\)`, true},
121+
{`\{`, `\{`, true},
122+
{`\}`, `\}`, true},
123+
{`\\`, `\\`, true},
124+
{`\$`, `\$`, true},
125+
126+
// Simplification.
127+
{`\u000a`, `\x{000a}`, true},
128+
{`\u{000a}`, `\x{000a}`, true},
129+
// "\z" just unnecessarily escapes the 'z'.
130+
{`\z`, `z`, true},
131+
132+
// Conversion is required.
133+
//
134+
// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes#types.
135+
//
136+
// In ECMA-262, \c + [a-fA-F] is a control character.
137+
{`\ca`, `\x01`, true},
138+
{`\cA`, `\x01`, true},
139+
{`\cb`, `\x02`, true},
140+
{`\cB`, `\x02`, true},
141+
// In ECMA-262, \b in a character class is a backspace.
142+
{`[\b]`, `[\x08]`, true},
143+
// ECMA-262 dot matches any single character except line terminators: \n, \r, \u2028 or \u2029.
144+
{`.*`, re2Dot + `*`, true},
145+
// Whitespace characters in ECMA-262 differ from those in RE2.
146+
//
147+
// Whitespace characters in ECMA-262:
148+
// [ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]
149+
{`\s`, `[` + whitespaceChars + `]`, true},
150+
{`\S`, `[^` + whitespaceChars + `]`, true},
151+
{`[\s]`, `[` + whitespaceChars + `]`, true},
152+
153+
// Use regexp2.
154+
{`^(?!examples/)`, ``, false},
155+
156+
// Error.
157+
{")", ``, false},
158+
{"(?`)", ``, false},
159+
}
160+
for i, tt := range tests {
161+
tt := tt
162+
t.Run(fmt.Sprintf("Test%d", i+1), func(t *testing.T) {
163+
a := require.New(t)
164+
165+
got, ok := Convert(tt.input)
166+
a.Equal(tt.wantOk, ok, "%q", tt.input)
167+
a.Equal(tt.wantString, got)
91168
})
92169
}
93170
}

0 commit comments

Comments
 (0)