Skip to content

Commit 0d612af

Browse files
committed
Support nesting non-capturing regexp groups, and expose utility for 'NormalizePathname' behavior
1 parent 346047a commit 0d612af

File tree

5 files changed

+180
-50
lines changed

5 files changed

+180
-50
lines changed

README.md

+14-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ import pathToRegexp "github.com/soongo/path-to-regexp"
2121
// pathToRegexp.MustCompile(path, options) // like Compile but panics if the error is non-nil
2222
// pathToRegexp.Match(path, options) // options can be nil
2323
// pathToRegexp.MustMatch(path, options) // like Match but panics if the error is non-nil
24-
// pathToRegexp.Must(regexp, err) // wraps a call to a function returning (*regexp2.Regexp, error) and panics if the error is non-nil.
24+
// pathToRegexp.Must(regexp, err) // wraps a call to a function returning (*regexp2.Regexp, error) and panics if the error is non-nil
25+
// pathToRegexp.EncodeURI(str) // encodes characters in URI, like javascript's encodeURI
26+
// pathToRegexp.EncodeURIComponent(str) // encodes characters in URI except `;/?:@&=+$,#`, like javascript's encodeURIComponent
27+
// pathToRegexp.NormalizePathname(str) // return a normalized string
2528
```
2629

2730
- **path** A string, array or slice of strings, or a regular expression with type *github.com/dlclark/regexp2.Regexp.
@@ -217,6 +220,16 @@ fmt.Printf("%#v\n", match("/user/123"))
217220
match("/invalid") //=> nil
218221
```
219222

223+
### Normalize Pathname
224+
225+
The `NormalizePathname` function will return a normalized string for matching with `PathToRegexp`.
226+
227+
```js
228+
re := pathToRegexp.Must(pathToRegexp.PathToRegexp("/caf\u00E9", nil, nil))
229+
input := pathToRegexp.EncodeURI("/cafe\u0301");
230+
re.MatchString(pathToRegexp.NormalizePathname(input)); //=> true, nil
231+
```
232+
220233
### Parse
221234

222235
The `Parse` function will return a list of strings and tokens from a path string:

go.mod

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,7 @@ module github.com/soongo/path-to-regexp
22

33
go 1.13
44

5-
require github.com/dlclark/regexp2 v1.2.0
5+
require (
6+
github.com/dlclark/regexp2 v1.2.0
7+
golang.org/x/text v0.3.2
8+
)

go.sum

+3
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
github.com/dlclark/regexp2 v1.2.0 h1:8sAhBGEM0dRWogWqWyQeIJnxjWO6oIjl8FKqREDsGfk=
22
github.com/dlclark/regexp2 v1.2.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
3+
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
4+
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
5+
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

path_to_regexp.go

+89-40
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,14 @@ import (
1111
"reflect"
1212
"strconv"
1313
"strings"
14+
"unicode"
1415
"unsafe"
1516

17+
"golang.org/x/text/runes"
18+
1619
"github.com/dlclark/regexp2"
20+
"golang.org/x/text/transform"
21+
"golang.org/x/text/unicode/norm"
1722
)
1823

1924
// Token is parsed from path. For example, using `/user/:id`, `tokens` will
@@ -88,6 +93,53 @@ const defaultDelimiter = "/"
8893
var escapeRegexp = regexp2.MustCompile("([.+*?=^!:${}()[\\]|/\\\\])", regexp2.None)
8994
var tokenRegexp = regexp2.MustCompile("\\((?!\\?)", regexp2.None)
9095

96+
func normalize(str string) string {
97+
t := transform.Chain(norm.NFC, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
98+
normStr, _, _ := transform.String(t, str)
99+
return normStr
100+
}
101+
102+
// NormalizePathname normalizes a pathname for matching, replaces multiple slashes
103+
// with a single slash and normalizes unicode characters to "NFC". When using this method,
104+
// `decode` should be an identity function so you don't decode strings twice.
105+
func NormalizePathname(pathname string) string {
106+
r := regexp2.MustCompile("\\/+", regexp2.None)
107+
str, err := r.Replace(DecodeURIComponent(pathname, nil),
108+
"/", -1, -1)
109+
if err != nil {
110+
panic(err)
111+
}
112+
return normalize(str)
113+
}
114+
115+
// Balanced bracket helper function.
116+
func balanced(open string, close string, str string, index int) int {
117+
count, i, arr := 0, index, strings.Split(str, "")
118+
119+
for i < len(arr) {
120+
if arr[i] == "\\" {
121+
i += 2
122+
continue
123+
}
124+
125+
if arr[i] == close {
126+
count--
127+
128+
if count == 0 {
129+
return i + 1
130+
}
131+
}
132+
133+
if arr[i] == open {
134+
count++
135+
}
136+
137+
i++
138+
}
139+
140+
return -1
141+
}
142+
91143
// Parse a string for the raw tokens.
92144
func Parse(str string, o *Options) []interface{} {
93145
tokens, tokenIndex, index, path, isEscaped := make([]interface{}, 0), 0, 0, "", false
@@ -136,49 +188,27 @@ func Parse(str string, o *Options) []interface{} {
136188
}
137189

138190
if index < length && arr[index] == "(" {
139-
prev, balanced, invalidGroup := index, 1, false
140-
if index+1 < length && arr[index+1] == "?" {
141-
panic("Path pattern must be a capturing group")
142-
}
143-
144-
for index++; index < length; index++ {
145-
if arr[index] == "\\" {
146-
pattern += strings.Join(arr[index:min(index+2, length)], "")
147-
index++
148-
continue
191+
end := balanced("(", ")", str, index)
192+
193+
// False positive on matching brackets.
194+
if end > -1 {
195+
pattern = strings.Join(arr[index+1:end-1], "")
196+
index = end
197+
if pattern[0] == '?' {
198+
panic("Path pattern must be a capturing group")
149199
}
150200

151-
if arr[index] == ")" {
152-
balanced--
153-
if balanced == 0 {
154-
index++
155-
break
201+
r := regexp2.MustCompile("\\((?=[^?])", regexp2.None)
202+
if ok, _ := r.MatchString(pattern); ok {
203+
validPattern, err := r.Replace(pattern, "(?:", -1, -1)
204+
if err != nil {
205+
panic(err)
156206
}
157-
}
158207

159-
pattern += string(arr[index])
160-
161-
if arr[index] == "(" {
162-
balanced++
163-
164-
// Better errors on nested capturing groups.
165-
if index+1 >= length || arr[index+1] != "?" {
166-
pattern += "?:"
167-
invalidGroup = true
168-
}
208+
panic(fmt.Sprintf("Capturing groups are not allowed in pattern, "+
209+
"use a non-capturing group: (%s)", validPattern))
169210
}
170211
}
171-
172-
if invalidGroup {
173-
panic(fmt.Sprintf("Capturing groups are not allowed in pattern, "+
174-
"use a non-capturing group: (%s)", pattern))
175-
}
176-
177-
// False positive.
178-
if balanced > 0 {
179-
index = prev
180-
pattern = ""
181-
}
182212
}
183213

184214
// Add regular characters to the path string.
@@ -271,7 +301,7 @@ func Match(path interface{}, o *Options) (func(string) *MatchResult, error) {
271301
return nil, err
272302
}
273303

274-
return regexpToFunction(re, tokens), nil
304+
return regexpToFunction(re, tokens, o), nil
275305
}
276306

277307
// MustMatch is like Match but panics if err occur in match function.
@@ -284,8 +314,13 @@ func MustMatch(path interface{}, o *Options) func(string) *MatchResult {
284314
}
285315

286316
// Create a path match function from `path-to-regexp` output.
287-
func regexpToFunction(re *regexp2.Regexp, tokens []Token) func(string) *MatchResult {
288-
decode := DecodeURIComponent
317+
func regexpToFunction(re *regexp2.Regexp, tokens []Token, o *Options) func(string) *MatchResult {
318+
decode := func(str string, token interface{}) string {
319+
return str
320+
}
321+
if o != nil && o.Decode != nil {
322+
decode = o.Decode
323+
}
289324

290325
return func(pathname string) *MatchResult {
291326
m, err := re.FindStringMatch(pathname)
@@ -513,6 +548,20 @@ func toMap(data interface{}) map[interface{}]interface{} {
513548
return m
514549
}
515550

551+
func EncodeURI(str string) string {
552+
excludes := ";/?:@&=+$,#"
553+
arr := strings.Split(str, "")
554+
result := ""
555+
for _, v := range arr {
556+
if strings.Contains(excludes, v) {
557+
result += v
558+
} else {
559+
result += EncodeURIComponent(v, nil)
560+
}
561+
}
562+
return result
563+
}
564+
516565
func EncodeURIComponent(str string, token interface{}) string {
517566
r := url.QueryEscape(str)
518567
r = strings.Replace(r, "+", "%20", -1)

path_to_regexp_test.go

+70-8
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,17 @@ var tests = []a{
202202
a{"/route", "route"},
203203
&MatchResult{Path: "/route", Index: 0, Params: m{"test": "route"}},
204204
},
205+
a{
206+
"/caf%C3%A9",
207+
a{"/caf%C3%A9", "caf%C3%A9"},
208+
&MatchResult{Path: "/caf%C3%A9", Index: 0, Params: m{"test": "caf%C3%A9"}},
209+
},
210+
a{
211+
"/caf%C3%A9",
212+
a{"/caf%C3%A9", "caf%C3%A9"},
213+
&MatchResult{Path: "/caf%C3%A9", Index: 0, Params: m{"test": "café"}},
214+
&Options{Decode: DecodeURIComponent},
215+
},
205216
},
206217
a{
207218
a{m{}, nil},
@@ -2774,14 +2785,14 @@ var tests = []a{
27742785
},
27752786

27762787
/**
2777-
* Nested parenthesis.
2788+
* Nested parentheses.
27782789
*/
27792790
{
2780-
"/:foo(\\d+(?:\\.\\d+)?)",
2781-
&Options{},
2791+
"/:test(\\d+(?:\\.\\d+)?)",
2792+
nil,
27822793
a{
27832794
Token{
2784-
Name: "foo",
2795+
Name: "test",
27852796
Prefix: "/",
27862797
Delimiter: "/",
27872798
Optional: false,
@@ -2791,12 +2802,40 @@ var tests = []a{
27912802
},
27922803
a{
27932804
a{"/123", a{"/123", "123"}},
2805+
a{"/abc", nil},
2806+
a{"/123/abc", nil},
27942807
a{"/123.123", a{"/123.123", "123.123"}},
2808+
a{"/123.abc", nil},
2809+
},
2810+
a{
2811+
a{m{"test": 123}, "/123"},
2812+
a{m{"test": 123.123}, "/123.123"},
2813+
a{m{"test": "abc"}, nil},
2814+
a{m{"test": "123"}, "/123"},
2815+
a{m{"test": "123.123"}, "/123.123"},
2816+
a{m{"test": "123.abc"}, nil},
2817+
},
2818+
},
2819+
{
2820+
"/:test((?!login)[^/]+)",
2821+
nil,
2822+
a{
2823+
Token{
2824+
Name: "test",
2825+
Prefix: "/",
2826+
Delimiter: "/",
2827+
Optional: false,
2828+
Repeat: false,
2829+
Pattern: "(?!login)[^/]+",
2830+
},
27952831
},
27962832
a{
2797-
a{m{"foo": 123}, "/123"},
2798-
a{m{"foo": 123.123}, "/123.123"},
2799-
a{m{"foo": "123"}, "/123"},
2833+
a{"/route", a{"/route", "route"}},
2834+
a{"/login", nil},
2835+
},
2836+
a{
2837+
a{m{"test": "route"}, "/route"},
2838+
a{m{"test": "login"}, nil},
28002839
},
28012840
},
28022841
}
@@ -3027,8 +3066,13 @@ func TestPathToRegexp(t *testing.T) {
30273066
params = io[2].(*MatchResult)
30283067
}
30293068

3069+
var options *Options
3070+
if len(io) >= 4 && io[3] != nil {
3071+
options = io[3].(*Options)
3072+
}
3073+
30303074
if path, ok := path.(string); ok && params != nil {
3031-
match := MustMatch(path, nil)
3075+
match := MustMatch(path, options)
30323076
t.Run(message+" params", func(t *testing.T) {
30333077
m := match(pathname.(string))
30343078
if !params.equals(m) {
@@ -3119,6 +3163,24 @@ func TestPathToRegexp(t *testing.T) {
31193163
})
31203164
})
31213165

3166+
t.Run("normalize pathname", func(t *testing.T) {
3167+
t.Run("should match normalized pathnames", func(t *testing.T) {
3168+
re := Must(PathToRegexp("/caf\u00E9", nil, nil))
3169+
input := EncodeURI("/cafe\u0301")
3170+
3171+
result := exec(re, input)
3172+
if result != nil {
3173+
t.Errorf("got %v want %v", result, nil)
3174+
}
3175+
3176+
want := []string{"/caf\u00E9"}
3177+
result = exec(re, NormalizePathname(input))
3178+
if !reflect.DeepEqual(result, want) {
3179+
t.Errorf("got %v want %v", result, want)
3180+
}
3181+
})
3182+
})
3183+
31223184
t.Run("path should be string, or strings, or a regular expression", func(t *testing.T) {
31233185
_, err := PathToRegexp(123, nil, nil)
31243186
if err == nil {

0 commit comments

Comments
 (0)