diff --git a/ogenregex/ogenregex.go b/ogenregex/ogenregex.go index 4cc0f7e56..5fc22c143 100644 --- a/ogenregex/ogenregex.go +++ b/ogenregex/ogenregex.go @@ -22,7 +22,8 @@ var _ = []Regexp{ } type goRegexp struct { - exp *regexp.Regexp + orig string + exp *regexp.Regexp } func (r goRegexp) Match(s []byte) (bool, error) { @@ -34,7 +35,7 @@ func (r goRegexp) MatchString(s string) (bool, error) { } func (r goRegexp) String() string { - return r.exp.String() + return r.orig } type regexp2Regexp struct { @@ -67,7 +68,7 @@ type Regexp interface { func Compile(exp string) (Regexp, error) { if converted, ok := Convert(exp); ok { if re, err := regexp.Compile(converted); err == nil { - return goRegexp{re}, nil + return goRegexp{orig: exp, exp: re}, nil } } re, err := regexp2.Compile(exp, regexp2.ECMAScript|regexp2.Unicode) diff --git a/ogenregex/ogenregex_test.go b/ogenregex/ogenregex_test.go index bb6d09b47..39a9c3fcd 100644 --- a/ogenregex/ogenregex_test.go +++ b/ogenregex/ogenregex_test.go @@ -9,68 +9,68 @@ import ( func TestCompile(t *testing.T) { type testCase struct { - input string - wantType Regexp - wantString string - wantErr bool + input string + wantType Regexp + wantErr bool } tests := []testCase{ // Conversion is not required. - {`\x20`, goRegexp{}, `\x20`, false}, - {`\v`, goRegexp{}, `\v`, false}, - {`\t`, goRegexp{}, `\t`, false}, - {`\n`, goRegexp{}, `\n`, false}, - {`\d`, goRegexp{}, `\d`, false}, - {`\w`, goRegexp{}, `\w`, false}, - {`\w{1}`, goRegexp{}, `\w{1}`, false}, - {`\w{1,}`, goRegexp{}, `\w{1,}`, false}, - {`\w{1,2}`, goRegexp{}, `\w{1,2}`, false}, - {`\b`, goRegexp{}, `\b`, false}, - {`\B`, goRegexp{}, `\B`, false}, - {`\.`, goRegexp{}, `\.`, false}, - {`\[`, goRegexp{}, `\[`, false}, - {`\]`, goRegexp{}, `\]`, false}, - {`\(`, goRegexp{}, `\(`, false}, - {`\)`, goRegexp{}, `\)`, false}, - {`\{`, goRegexp{}, `\{`, false}, - {`\}`, goRegexp{}, `\}`, false}, - {`\\`, goRegexp{}, `\\`, false}, - {`\$`, goRegexp{}, `\$`, false}, + {`\0`, goRegexp{}, false}, + {`\x20`, goRegexp{}, false}, + {`\v`, goRegexp{}, false}, + {`\t`, goRegexp{}, false}, + {`\n`, goRegexp{}, false}, + {`\d`, goRegexp{}, false}, + {`\w`, goRegexp{}, false}, + {`\w{1}`, goRegexp{}, false}, + {`\w{1,}`, goRegexp{}, false}, + {`\w{1,2}`, goRegexp{}, false}, + {`\b`, goRegexp{}, false}, + {`\B`, goRegexp{}, false}, + {`\.`, goRegexp{}, false}, + {`\[`, goRegexp{}, false}, + {`\]`, goRegexp{}, false}, + {`\(`, goRegexp{}, false}, + {`\)`, goRegexp{}, false}, + {`\{`, goRegexp{}, false}, + {`\}`, goRegexp{}, false}, + {`\\`, goRegexp{}, false}, + {`\$`, goRegexp{}, false}, // Simplification. - {`\u000a`, goRegexp{}, `\x{000a}`, false}, - {`\u{000a}`, goRegexp{}, `\x{000a}`, false}, + {`\u000a`, goRegexp{}, false}, + {`\u{000a}`, goRegexp{}, false}, // "\z" just unnecessarily escapes the 'z'. - {`\z`, goRegexp{}, `z`, false}, + {`\z`, goRegexp{}, false}, // Conversion is required. // // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes#types. // // In ECMA-262, \c + [a-fA-F] is a control character. - {`\ca`, goRegexp{}, `\x01`, false}, - {`\cA`, goRegexp{}, `\x01`, false}, - {`\cb`, goRegexp{}, `\x02`, false}, - {`\cB`, goRegexp{}, `\x02`, false}, + {`\ca`, goRegexp{}, false}, + {`\cA`, goRegexp{}, false}, + {`\cb`, goRegexp{}, false}, + {`\cB`, goRegexp{}, false}, // In ECMA-262, \b in a character class is a backspace. - {`[\b]`, goRegexp{}, `[\x08]`, false}, + {`[\b]`, goRegexp{}, false}, // ECMA-262 dot matches any single character except line terminators: \n, \r, \u2028 or \u2029. - {`.*`, goRegexp{}, re2Dot + `*`, false}, + {`.*`, goRegexp{}, false}, // Whitespace characters in ECMA-262 differ from those in RE2. // // Whitespace characters in ECMA-262: // [ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff] - {`\s`, goRegexp{}, `[` + whitespaceChars + `]`, false}, - {`\S`, goRegexp{}, `[^` + whitespaceChars + `]`, false}, - {`[\s]`, goRegexp{}, `[` + whitespaceChars + `]`, false}, + {`\s`, goRegexp{}, false}, + {`\S`, goRegexp{}, false}, + {`[\s]`, goRegexp{}, false}, // Use regexp2. - {`^(?!examples/)`, regexp2Regexp{}, `^(?!examples/)`, false}, + {`^(?!examples/)`, regexp2Regexp{}, false}, // Error. - {")", nil, ``, true}, - {"(?`)", nil, ``, true}, + {")", nil, true}, + {"(?`)", nil, true}, } for i, tt := range tests { tt := tt @@ -87,7 +87,84 @@ func TestCompile(t *testing.T) { a.NoError(err) a.NotPanics(func() { MustCompile(tt.input) }) a.IsType(tt.wantType, got) - a.Equal(tt.wantString, got.String()) + a.Equal(tt.input, got.String()) + }) + } +} + +func TestConvert(t *testing.T) { + type testCase struct { + input string + wantString string + wantOk bool + } + + tests := []testCase{ + // Conversion is not required. + {`\0`, `\0`, true}, + {`\x20`, `\x20`, true}, + {`\v`, `\v`, true}, + {`\t`, `\t`, true}, + {`\n`, `\n`, true}, + {`\d`, `\d`, true}, + {`\w`, `\w`, true}, + {`\w{1}`, `\w{1}`, true}, + {`\w{1,}`, `\w{1,}`, true}, + {`\w{1,2}`, `\w{1,2}`, true}, + {`\b`, `\b`, true}, + {`\B`, `\B`, true}, + {`\.`, `\.`, true}, + {`\[`, `\[`, true}, + {`\]`, `\]`, true}, + {`\(`, `\(`, true}, + {`\)`, `\)`, true}, + {`\{`, `\{`, true}, + {`\}`, `\}`, true}, + {`\\`, `\\`, true}, + {`\$`, `\$`, true}, + + // Simplification. + {`\u000a`, `\x{000a}`, true}, + {`\u{000a}`, `\x{000a}`, true}, + // "\z" just unnecessarily escapes the 'z'. + {`\z`, `z`, true}, + + // Conversion is required. + // + // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes#types. + // + // In ECMA-262, \c + [a-fA-F] is a control character. + {`\ca`, `\x01`, true}, + {`\cA`, `\x01`, true}, + {`\cb`, `\x02`, true}, + {`\cB`, `\x02`, true}, + // In ECMA-262, \b in a character class is a backspace. + {`[\b]`, `[\x08]`, true}, + // ECMA-262 dot matches any single character except line terminators: \n, \r, \u2028 or \u2029. + {`.*`, re2Dot + `*`, true}, + // Whitespace characters in ECMA-262 differ from those in RE2. + // + // Whitespace characters in ECMA-262: + // [ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff] + {`\s`, `[` + whitespaceChars + `]`, true}, + {`\S`, `[^` + whitespaceChars + `]`, true}, + {`[\s]`, `[` + whitespaceChars + `]`, true}, + + // Use regexp2. + {`^(?!examples/)`, ``, false}, + + // Error. + {")", ``, false}, + {"(?`)", ``, false}, + } + for i, tt := range tests { + tt := tt + t.Run(fmt.Sprintf("Test%d", i+1), func(t *testing.T) { + a := require.New(t) + + got, ok := Convert(tt.input) + a.Equal(tt.wantOk, ok, "%q", tt.input) + a.Equal(tt.wantString, got) }) } }