Skip to content

Commit 790ebc1

Browse files
committed
Change normalize behavior
1 parent 00aac59 commit 790ebc1

File tree

3 files changed

+108
-68
lines changed

3 files changed

+108
-68
lines changed

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,8 @@ import pathToRegexp "github.com/soongo/path-to-regexp"
3232
// pathToRegexp.Match(path, options) // options can be nil
3333
// pathToRegexp.MustMatch(path, options) // like Match but panics if the error is non-nil
3434
// pathToRegexp.Must(regexp, err) // wraps a call to a function returning (*regexp2.Regexp, error) and panics if the error is non-nil
35-
// pathToRegexp.EncodeURI(str) // encodes characters in URI, like javascript's encodeURI
36-
// pathToRegexp.EncodeURIComponent(str) // encodes characters in URI except `;/?:@&=+$,#`, like javascript's encodeURIComponent
37-
// pathToRegexp.NormalizePathname(str) // return a normalized string
35+
// pathToRegexp.EncodeURI(str) // encodes characters in URI except `;/?:@&=+$,#`, like javascript's encodeURI
36+
// pathToRegexp.EncodeURIComponent(str) // encodes characters in URI, like javascript's encodeURIComponent
3837
```
3938

4039
- **path** A string, array or slice of strings, or a regular expression with type *github.com/dlclark/regexp2.Regexp.
@@ -236,10 +235,13 @@ The `NormalizePathname` function will return a normalized string for matching wi
236235

237236
```js
238237
re := pathToRegexp.Must(pathToRegexp.PathToRegexp("/caf\u00E9", nil, nil))
239-
input := pathToRegexp.EncodeURI("/cafe\u0301");
238+
input := pathToRegexp.EncodeURI("/caf\u00E9");
239+
re.MatchString(input) //=> false, nil
240240
re.MatchString(pathToRegexp.NormalizePathname(input)); //=> true, nil
241241
```
242242

243+
**Note:** It may be preferable to implement something in your own library that normalizes the pathname for matching. E.g. [`URL`](https://developer.mozilla.org/en-US/docs/Web/API/URL) automatically URI encodes paths for you, which would result in a consistent match.
244+
243245
### Parse
244246

245247
The `Parse` function will return a list of strings and tokens from a path string:

path_to_regexp.go

Lines changed: 83 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,73 @@ const defaultDelimiter = "/"
9393
var escapeRegexp = regexp2.MustCompile("([.+*?=^!:${}()[\\]|/\\\\])", regexp2.None)
9494
var tokenRegexp = regexp2.MustCompile("\\((?!\\?)", regexp2.None)
9595

96-
func normalize(str string) string {
97-
t := transform.Chain(norm.NFC, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
96+
// EncodeURIComponent encodes a text string as a valid component of a Uniform
97+
// Resource Identifier (URI).
98+
func EncodeURIComponent(str string) string {
99+
r := url.QueryEscape(str)
100+
r = strings.Replace(r, "+", "%20", -1)
101+
return r
102+
}
103+
104+
// Gets the unencoded version of an encoded component of a Uniform Resource
105+
// Identifier (URI).
106+
func DecodeURIComponent(str string) string {
107+
r, err := url.QueryUnescape(str)
108+
if err != nil {
109+
panic(err)
110+
}
111+
return r
112+
}
113+
114+
// Encodes a text string as a valid Uniform Resource Identifier (URI)
115+
func encodeURI(str string) string {
116+
excludes := ";/?:@&=+$,#"
117+
arr := strings.Split(str, "")
118+
result := ""
119+
for _, v := range arr {
120+
if strings.Contains(excludes, v) {
121+
result += v
122+
} else {
123+
result += EncodeURIComponent(v)
124+
}
125+
}
126+
return result
127+
}
128+
129+
// Gets the unencoded version of an encoded Uniform Resource Identifier (URI).
130+
func decodeURI(str string) string {
131+
magicWords := "1@X#y!Z" // not a good idea
132+
excludes := []string{"%3B", "%2F", "%3F", "%3A", "%40", "%26", "%3D", "%2B", "%24", "%2C", "%23"}
133+
r := regexp2.MustCompile(strings.Join(excludes, "|"), regexp2.None)
134+
135+
str, _ = r.ReplaceFunc(str, func(m regexp2.Match) string {
136+
return strings.Replace(m.String(), "%", magicWords, -1)
137+
}, -1, -1)
138+
139+
str = decodeURIComponent(str, nil)
140+
141+
for i, v := range excludes {
142+
excludes[i] = magicWords + strings.TrimPrefix(v, "%")
143+
}
144+
r = regexp2.MustCompile(strings.Join(excludes, "|"), regexp2.None)
145+
146+
str, _ = r.ReplaceFunc(str, func(m regexp2.Match) string {
147+
return strings.Replace(m.String(), magicWords, "%", -1)
148+
}, -1, -1)
149+
150+
return str
151+
}
152+
153+
// Returns the String value result of normalizing the string into the normalization form
154+
// named by form as specified in Unicode Standard Annex #15, Unicode Normalization Forms.
155+
// param form Applicable values: "NFC", "NFD", "NFKC", or "NFKD", If not specified default
156+
// is "NFC"
157+
func normalize(str string, form ...norm.Form) string {
158+
f := norm.NFC
159+
if len(form) > 0 {
160+
f = form[0]
161+
}
162+
t := transform.Chain(f, runes.Remove(runes.In(unicode.Mn)), f)
98163
normStr, _, _ := transform.String(t, str)
99164
return normStr
100165
}
@@ -103,13 +168,13 @@ func normalize(str string) string {
103168
// with a single slash and normalizes unicode characters to "NFC". When using this method,
104169
// `decode` should be an identity function so you don't decode strings twice.
105170
func NormalizePathname(pathname string) string {
171+
pathname = decodeURI(pathname)
106172
r := regexp2.MustCompile("\\/+", regexp2.None)
107-
str, err := r.Replace(DecodeURIComponent(pathname, nil),
108-
"/", -1, -1)
173+
pathname, err := r.Replace(pathname, "/", -1, -1)
109174
if err != nil {
110175
panic(err)
111176
}
112-
return normalize(str)
177+
return pathname
113178
}
114179

115180
// Balanced bracket helper function.
@@ -146,7 +211,7 @@ func Parse(str string, o *Options) []interface{} {
146211
if o == nil {
147212
o = &Options{}
148213
}
149-
defaultDelimiter := orString(o.Delimiter, defaultDelimiter)
214+
defaultDelimiter := anyString(o.Delimiter, defaultDelimiter)
150215
whitelist := o.Whitelist
151216

152217
// use list to deal with unicode in str
@@ -240,7 +305,7 @@ func Parse(str string, o *Options) []interface{} {
240305

241306
repeat := index < length && (arr[index] == "+" || arr[index] == "*")
242307
optional := index < length && (arr[index] == "?" || arr[index] == "*")
243-
delimiter := orString(prefix, defaultDelimiter)
308+
delimiter := anyString(prefix, defaultDelimiter)
244309

245310
// Increment `i` past modifier token.
246311
if repeat || optional {
@@ -366,7 +431,7 @@ func tokensToFunction(tokens []interface{}, o *Options) (
366431
o = &Options{}
367432
}
368433
reFlags := flags(o)
369-
encode, validate := EncodeURIComponent, true
434+
encode, validate := encodeURIComponent, true
370435
if o.Encode != nil {
371436
encode = o.Encode
372437
}
@@ -484,14 +549,8 @@ func tokensToFunction(tokens []interface{}, o *Options) (
484549
}, nil
485550
}
486551

487-
func min(x, y int) int {
488-
if x < y {
489-
return x
490-
}
491-
return y
492-
}
493-
494-
func orString(str ...string) string {
552+
// Returns the first non empty string
553+
func anyString(str ...string) string {
495554
for _, v := range str {
496555
if v != "" {
497556
return v
@@ -500,26 +559,7 @@ func orString(str ...string) string {
500559
return ""
501560
}
502561

503-
func indexOf(in interface{}, elem interface{}) int {
504-
inValue := reflect.ValueOf(in)
505-
elemValue := reflect.ValueOf(elem)
506-
inType := inValue.Type()
507-
508-
if inType.Kind() == reflect.String {
509-
return strings.Index(inValue.String(), elemValue.String())
510-
}
511-
512-
if inType.Kind() == reflect.Slice {
513-
for i := 0; i < inValue.Len(); i++ {
514-
if reflect.DeepEqual(inValue.Index(i).Interface(), elem) {
515-
return i
516-
}
517-
}
518-
}
519-
520-
return -1
521-
}
522-
562+
// Returns the index of str in string slice
523563
func stringIndexOf(arr []string, str string) int {
524564
for i, v := range arr {
525565
if v == str {
@@ -529,6 +569,7 @@ func stringIndexOf(arr []string, str string) int {
529569
return -1
530570
}
531571

572+
// Transform data which is reflect.Slice, reflect.Array to slice
532573
func toSlice(data interface{}) []interface{} {
533574
v := reflect.ValueOf(data)
534575
length := v.Len()
@@ -539,6 +580,7 @@ func toSlice(data interface{}) []interface{} {
539580
return arr
540581
}
541582

583+
// Transform data which is reflect.Map to map
542584
func toMap(data interface{}) map[interface{}]interface{} {
543585
v, m := reflect.ValueOf(data), make(map[interface{}]interface{})
544586
for _, k := range v.MapKeys() {
@@ -548,32 +590,12 @@ func toMap(data interface{}) map[interface{}]interface{} {
548590
return m
549591
}
550592

551-
func EncodeURI(str string) string {
552-
excludes := ";/?:@&=+$,#"
553-
arr := strings.Split(str, "")
554-
result := ""
555-
for _, v := range arr {
556-
if strings.Contains(excludes, v) {
557-
result += v
558-
} else {
559-
result += EncodeURIComponent(v, nil)
560-
}
561-
}
562-
return result
563-
}
564-
565-
func EncodeURIComponent(str string, token interface{}) string {
566-
r := url.QueryEscape(str)
567-
r = strings.Replace(r, "+", "%20", -1)
568-
return r
593+
func encodeURIComponent(str string, token interface{}) string {
594+
return EncodeURIComponent(str)
569595
}
570596

571-
func DecodeURIComponent(str string, token interface{}) string {
572-
r, err := url.QueryUnescape(str)
573-
if err != nil {
574-
panic(err)
575-
}
576-
return r
597+
func decodeURIComponent(str string, token interface{}) string {
598+
return DecodeURIComponent(str)
577599
}
578600

579601
// Escape a regular expression string.
@@ -687,7 +709,7 @@ func tokensToRegExp(rawTokens []interface{}, tokens *[]Token, o *Options) (*rege
687709
}
688710
}
689711

690-
delimiter := orString(o.Delimiter, defaultDelimiter)
712+
delimiter := anyString(o.Delimiter, defaultDelimiter)
691713
arr := make([]string, len(ends)+1)
692714
for i, v := range ends {
693715
v = escapeString(v)

path_to_regexp_test.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ var tests = []a{
211211
"/caf%C3%A9",
212212
a{"/caf%C3%A9", "caf%C3%A9"},
213213
&MatchResult{Path: "/caf%C3%A9", Index: 0, Params: m{"test": "café"}},
214-
&Options{Decode: DecodeURIComponent},
214+
&Options{Decode: decodeURIComponent},
215215
},
216216
},
217217
a{
@@ -3166,19 +3166,35 @@ func TestPathToRegexp(t *testing.T) {
31663166
t.Run("normalize pathname", func(t *testing.T) {
31673167
t.Run("should match normalized pathnames", func(t *testing.T) {
31683168
re := Must(PathToRegexp("/caf\u00E9", nil, nil))
3169-
input := EncodeURI("/cafe\u0301")
3169+
input := encodeURI("/cafe\u0301")
31703170

31713171
result := exec(re, input)
31723172
if result != nil {
31733173
t.Errorf("got %v want %v", result, nil)
31743174
}
31753175

31763176
want := []string{"/caf\u00E9"}
3177-
result = exec(re, NormalizePathname(input))
3177+
result = exec(re, normalize(NormalizePathname(input)))
31783178
if !reflect.DeepEqual(result, want) {
31793179
t.Errorf("got %v want %v", result, want)
31803180
}
31813181
})
3182+
3183+
t.Run("should not normalize encoded slash", func(t *testing.T) {
3184+
input, want := "/test/route%2F", "/test/route%2F"
3185+
result := NormalizePathname(input)
3186+
if result != want {
3187+
t.Errorf("got %s want %s", result, want)
3188+
}
3189+
})
3190+
3191+
t.Run("should fix repeated slashes", func(t *testing.T) {
3192+
input, want := encodeURI("/test///route"), "/test/route"
3193+
result := NormalizePathname(input)
3194+
if result != want {
3195+
t.Errorf("got %s want %s", result, want)
3196+
}
3197+
})
31823198
})
31833199

31843200
t.Run("path should be string, or strings, or a regular expression", func(t *testing.T) {

0 commit comments

Comments
 (0)