Skip to content

Commit

Permalink
fix(ogenregex): print ECMAScript regexp literals
Browse files Browse the repository at this point in the history
  • Loading branch information
tdakkota committed Jan 26, 2025
1 parent 660b2c5 commit b237248
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 43 deletions.
7 changes: 4 additions & 3 deletions ogenregex/ogenregex.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ var _ = []Regexp{
}

type goRegexp struct {
exp *regexp.Regexp
orig string
exp *regexp.Regexp
}

func (r goRegexp) Match(s []byte) (bool, error) {
Expand All @@ -34,7 +35,7 @@ func (r goRegexp) MatchString(s string) (bool, error) {
}

func (r goRegexp) String() string {
return r.exp.String()
return r.orig
}

type regexp2Regexp struct {
Expand Down Expand Up @@ -67,7 +68,7 @@ type Regexp interface {
func Compile(exp string) (Regexp, error) {
if converted, ok := Convert(exp); ok {
if re, err := regexp.Compile(converted); err == nil {
return goRegexp{re}, nil
return goRegexp{orig: exp, exp: re}, nil
}
}
re, err := regexp2.Compile(exp, regexp2.ECMAScript|regexp2.Unicode)
Expand Down
157 changes: 117 additions & 40 deletions ogenregex/ogenregex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,68 +9,68 @@ import (

func TestCompile(t *testing.T) {
type testCase struct {
input string
wantType Regexp
wantString string
wantErr bool
input string
wantType Regexp
wantErr bool
}

tests := []testCase{
// Conversion is not required.
{`\x20`, goRegexp{}, `\x20`, false},
{`\v`, goRegexp{}, `\v`, false},
{`\t`, goRegexp{}, `\t`, false},
{`\n`, goRegexp{}, `\n`, false},
{`\d`, goRegexp{}, `\d`, false},
{`\w`, goRegexp{}, `\w`, false},
{`\w{1}`, goRegexp{}, `\w{1}`, false},
{`\w{1,}`, goRegexp{}, `\w{1,}`, false},
{`\w{1,2}`, goRegexp{}, `\w{1,2}`, false},
{`\b`, goRegexp{}, `\b`, false},
{`\B`, goRegexp{}, `\B`, false},
{`\.`, goRegexp{}, `\.`, false},
{`\[`, goRegexp{}, `\[`, false},
{`\]`, goRegexp{}, `\]`, false},
{`\(`, goRegexp{}, `\(`, false},
{`\)`, goRegexp{}, `\)`, false},
{`\{`, goRegexp{}, `\{`, false},
{`\}`, goRegexp{}, `\}`, false},
{`\\`, goRegexp{}, `\\`, false},
{`\$`, goRegexp{}, `\$`, false},
{`\0`, goRegexp{}, false},
{`\x20`, goRegexp{}, false},
{`\v`, goRegexp{}, false},
{`\t`, goRegexp{}, false},
{`\n`, goRegexp{}, false},
{`\d`, goRegexp{}, false},
{`\w`, goRegexp{}, false},
{`\w{1}`, goRegexp{}, false},
{`\w{1,}`, goRegexp{}, false},
{`\w{1,2}`, goRegexp{}, false},
{`\b`, goRegexp{}, false},
{`\B`, goRegexp{}, false},
{`\.`, goRegexp{}, false},
{`\[`, goRegexp{}, false},
{`\]`, goRegexp{}, false},
{`\(`, goRegexp{}, false},
{`\)`, goRegexp{}, false},
{`\{`, goRegexp{}, false},
{`\}`, goRegexp{}, false},
{`\\`, goRegexp{}, false},
{`\$`, goRegexp{}, false},

// Simplification.
{`\u000a`, goRegexp{}, `\x{000a}`, false},
{`\u{000a}`, goRegexp{}, `\x{000a}`, false},
{`\u000a`, goRegexp{}, false},
{`\u{000a}`, goRegexp{}, false},
// "\z" just unnecessarily escapes the 'z'.
{`\z`, goRegexp{}, `z`, false},
{`\z`, goRegexp{}, false},

// Conversion is required.
//
// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes#types.
//
// In ECMA-262, \c + [a-fA-F] is a control character.
{`\ca`, goRegexp{}, `\x01`, false},
{`\cA`, goRegexp{}, `\x01`, false},
{`\cb`, goRegexp{}, `\x02`, false},
{`\cB`, goRegexp{}, `\x02`, false},
{`\ca`, goRegexp{}, false},
{`\cA`, goRegexp{}, false},
{`\cb`, goRegexp{}, false},
{`\cB`, goRegexp{}, false},
// In ECMA-262, \b in a character class is a backspace.
{`[\b]`, goRegexp{}, `[\x08]`, false},
{`[\b]`, goRegexp{}, false},
// ECMA-262 dot matches any single character except line terminators: \n, \r, \u2028 or \u2029.
{`.*`, goRegexp{}, re2Dot + `*`, false},
{`.*`, goRegexp{}, false},
// Whitespace characters in ECMA-262 differ from those in RE2.
//
// Whitespace characters in ECMA-262:
// [ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]
{`\s`, goRegexp{}, `[` + whitespaceChars + `]`, false},
{`\S`, goRegexp{}, `[^` + whitespaceChars + `]`, false},
{`[\s]`, goRegexp{}, `[` + whitespaceChars + `]`, false},
{`\s`, goRegexp{}, false},
{`\S`, goRegexp{}, false},
{`[\s]`, goRegexp{}, false},

// Use regexp2.
{`^(?!examples/)`, regexp2Regexp{}, `^(?!examples/)`, false},
{`^(?!examples/)`, regexp2Regexp{}, false},

// Error.
{")", nil, ``, true},
{"(?`)", nil, ``, true},
{")", nil, true},
{"(?`)", nil, true},
}
for i, tt := range tests {
tt := tt
Expand All @@ -87,7 +87,84 @@ func TestCompile(t *testing.T) {
a.NoError(err)
a.NotPanics(func() { MustCompile(tt.input) })
a.IsType(tt.wantType, got)
a.Equal(tt.wantString, got.String())
a.Equal(tt.input, got.String())
})
}
}

func TestConvert(t *testing.T) {
type testCase struct {
input string
wantString string
wantOk bool
}

tests := []testCase{
// Conversion is not required.
{`\0`, `\0`, true},
{`\x20`, `\x20`, true},
{`\v`, `\v`, true},
{`\t`, `\t`, true},
{`\n`, `\n`, true},
{`\d`, `\d`, true},
{`\w`, `\w`, true},
{`\w{1}`, `\w{1}`, true},
{`\w{1,}`, `\w{1,}`, true},
{`\w{1,2}`, `\w{1,2}`, true},
{`\b`, `\b`, true},
{`\B`, `\B`, true},
{`\.`, `\.`, true},
{`\[`, `\[`, true},
{`\]`, `\]`, true},
{`\(`, `\(`, true},
{`\)`, `\)`, true},
{`\{`, `\{`, true},
{`\}`, `\}`, true},
{`\\`, `\\`, true},
{`\$`, `\$`, true},

// Simplification.
{`\u000a`, `\x{000a}`, true},
{`\u{000a}`, `\x{000a}`, true},
// "\z" just unnecessarily escapes the 'z'.
{`\z`, `z`, true},

// Conversion is required.
//
// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes#types.
//
// In ECMA-262, \c + [a-fA-F] is a control character.
{`\ca`, `\x01`, true},
{`\cA`, `\x01`, true},
{`\cb`, `\x02`, true},
{`\cB`, `\x02`, true},
// In ECMA-262, \b in a character class is a backspace.
{`[\b]`, `[\x08]`, true},
// ECMA-262 dot matches any single character except line terminators: \n, \r, \u2028 or \u2029.
{`.*`, re2Dot + `*`, true},
// Whitespace characters in ECMA-262 differ from those in RE2.
//
// Whitespace characters in ECMA-262:
// [ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]
{`\s`, `[` + whitespaceChars + `]`, true},
{`\S`, `[^` + whitespaceChars + `]`, true},
{`[\s]`, `[` + whitespaceChars + `]`, true},

// Use regexp2.
{`^(?!examples/)`, ``, false},

// Error.
{")", ``, false},
{"(?`)", ``, false},
}
for i, tt := range tests {
tt := tt
t.Run(fmt.Sprintf("Test%d", i+1), func(t *testing.T) {
a := require.New(t)

got, ok := Convert(tt.input)
a.Equal(tt.wantOk, ok, "%q", tt.input)
a.Equal(tt.wantString, got)
})
}
}

0 comments on commit b237248

Please sign in to comment.