diff --git a/Readme.md b/Readme.md index eaa60b2..8636253 100644 --- a/Readme.md +++ b/Readme.md @@ -24,29 +24,9 @@ const { match, compile, parse } = require("path-to-regexp"); // parse(path, options?) ``` -### Match - -The `match` function returns a function for transforming paths into parameters: - -- **path** A string. -- **options** _(optional)_ (See [parse](#parse) for more options) - - **sensitive** Regexp will be case sensitive. (default: `false`) - - **end** Validate the match reaches the end of the string. (default: `true`) - - **decode** Function for decoding strings to params, or `false` to disable all processing. (default: `decodeURIComponent`) - -```js -const fn = match("/foo/:bar"); -``` - -**Please note:** `path-to-regexp` is intended for ordered data (e.g. pathnames, hostnames). It can not handle arbitrarily ordered data (e.g. query strings, URL fragments, JSON, etc). - ### Parameters -Parameters match arbitrary strings in a path by matching up to the end of the segment, or up to any proceeding tokens. - -#### Named parameters - -Named parameters are defined by prefixing a colon to the parameter name (`:foo`). Parameter names can use any valid unicode identifier characters, similar to JavaScript. +Parameters match arbitrary strings in a path by matching up to the end of the segment, or up to any proceeding tokens. They are defined by prefixing a colon to the parameter name (`:foo`). Parameter names can use any valid JavaScript identifier, or be double quoted to use other characters (`:"param-name"`). ```js const fn = match("/:foo/:bar"); @@ -55,137 +35,54 @@ fn("/test/route"); //=> { path: '/test/route', params: { foo: 'test', bar: 'route' } } ``` -##### Custom matching parameters - -Parameters can have a custom regexp, which overrides the default match (`[^/]+`). For example, you can match digits or names in a path: - -```js -const exampleNumbers = match("/icon-:foo(\\d+).png"); - -exampleNumbers("/icon-123.png"); -//=> { path: '/icon-123.png', params: { foo: '123' } } - -exampleNumbers("/icon-abc.png"); -//=> false - -const exampleWord = pathToRegexp("/(user|u)"); - -exampleWord("/u"); -//=> { path: '/u', params: { '0': 'u' } } - -exampleWord("/users"); -//=> false -``` - -**Tip:** Backslashes need to be escaped with another backslash in JavaScript strings. - -#### Unnamed parameters - -It is possible to define a parameter without a name. The name will be numerically indexed: - -```js -const fn = match("/:foo/(.*)"); - -fn("/test/route"); -//=> { path: '/test/route', params: { '0': 'route', foo: 'test' } } -``` - -#### Custom prefix and suffix - -Parameters can be wrapped in `{}` to create custom prefixes or suffixes for your segment: - -```js -const fn = match("{/:attr1}?{-:attr2}?{-:attr3}?"); - -fn("/test"); -//=> { path: '/test', params: { attr1: 'test' } } - -fn("/test-test"); -//=> { path: '/test-test', params: { attr1: 'test', attr2: 'test' } } -``` - -#### Modifiers - -Modifiers are used after parameters with custom prefixes and suffixes (`{}`). - -##### Optional - -Parameters can be suffixed with a question mark (`?`) to make the parameter optional. - -```js -const fn = match("/:foo{/:bar}?"); - -fn("/test"); -//=> { path: '/test', params: { foo: 'test' } } - -fn("/test/route"); -//=> { path: '/test/route', params: { foo: 'test', bar: 'route' } } -``` - -##### Zero or more +### Wildcard -Parameters can be suffixed with an asterisk (`*`) to denote a zero or more parameter matches. +Wildcard parameters match one or more characters across multiple segments. They are defined the same way as regular parameters, but are prefixed with an asterisk (`*foo`). ```js -const fn = match("{/:foo}*"); - -fn("/foo"); -//=> { path: '/foo', params: { foo: [ 'foo' ] } } +const fn = match("/*splat"); fn("/bar/baz"); -//=> { path: '/bar/baz', params: { foo: [ 'bar', 'baz' ] } } +//=> { path: '/bar/baz', params: { splat: [ 'bar', 'baz' ] } } ``` -##### One or more +### Braces -Parameters can be suffixed with a plus sign (`+`) to denote a one or more parameter matches. +Simple brace expansion can be used to define multiple versions of a patch to match. It's also an effective way to create optional things to match. ```js -const fn = match("{/:foo}+"); +const fn = match("/{a,b,:other}"); -fn("/"); -//=> false +fn("/a"); +//=> { path: '/a', params: {} } -fn("/bar/baz"); -//=> { path: '/bar/baz', params: { foo: [ 'bar', 'baz' ] } } +fn("/c"); +//=> { path: '/c', params: { other: 'c' } } ``` -##### Custom separator - -By default, parameters set the separator as the `prefix + suffix` of the token. Using `;` you can modify this: - -```js -const fn = match("/name{/:parts;-}+"); +## Match -fn("/name"); -//=> false - -fn("/bar/1-2-3"); -//=> { path: '/name/1-2-3', params: { parts: [ '1', '2', '3' ] } } -``` - -#### Wildcard +The `match` function returns a function for transforming paths into parameters: -A wildcard is also supported. It is roughly equivalent to `(.*)`. +- **path** A string. +- **options** _(optional)_ (See [parse](#parse) for more options) + - **sensitive** Regexp will be case sensitive. (default: `false`) + - **end** Validate the match reaches the end of the string. (default: `true`) + - **decode** Function for decoding strings to params, or `false` to disable all processing. (default: `decodeURIComponent`) ```js -const fn = match("/*"); - -fn("/"); -//=> { path: '/', params: {} } - -fn("/bar/baz"); -//=> { path: '/bar/baz', params: { '0': [ 'bar', 'baz' ] } } +const fn = match("/foo/:bar"); ``` -### Compile ("Reverse" Path-To-RegExp) +**Please note:** `path-to-regexp` is intended for ordered data (e.g. pathnames, hostnames). It can not handle arbitrarily ordered data (e.g. query strings, URL fragments, JSON, etc). + +## Compile ("Reverse" Path-To-RegExp) The `compile` function will return a function for transforming parameters into a valid path: - **path** A string. - **options** (See [parse](#parse) for more options) - **sensitive** Regexp will be case sensitive. (default: `false`) - - **validate** When `false` the function can produce an invalid (unmatched) path. (default: `true`) - **encode** Function for encoding input strings for output into the path, or `false` to disable entirely. (default: `encodeURIComponent`) ```js @@ -194,26 +91,21 @@ const toPath = compile("/user/:id"); toPath({ id: "name" }); //=> "/user/name" toPath({ id: "café" }); //=> "/user/caf%C3%A9" -// When disabling `encode`, you need to make sure inputs are encoded correctly. No arrays are accepted. -const toPathRaw = compile("/user/:id", { encode: false }); - -toPathRaw({ id: "%3A%2F" }); //=> "/user/%3A%2F" -toPathRaw({ id: ":/" }); //=> Throws, "/user/:/" when `validate` is `false`. - -const toPathRepeated = compile("{/:segment}+"); +const toPathRepeated = compile("/*segment"); toPathRepeated({ segment: ["foo"] }); //=> "/foo" toPathRepeated({ segment: ["a", "b", "c"] }); //=> "/a/b/c" -const toPathRegexp = compile("/user/:id(\\d+)"); +// When disabling `encode`, you need to make sure inputs are encoded correctly. No arrays are accepted. +const toPathRaw = compile("/user/:id", { encode: false }); -toPathRegexp({ id: "123" }); //=> "/user/123" +toPathRaw({ id: "%3A%2F" }); //=> "/user/%3A%2F" ``` ## Developers - If you are rewriting paths with match and compile, consider using `encode: false` and `decode: false` to keep raw paths passed around. -- To ensure matches work on paths containing characters usually encoded, consider using [encodeurl](https://github.com/pillarjs/encodeurl) for `encodePath`. +- To ensure matches work on paths containing characters usually encoded, such as emoji, consider using [encodeurl](https://github.com/pillarjs/encodeurl) for `encodePath`. ### Parse @@ -222,18 +114,11 @@ The `parse` function accepts a string and returns `TokenData`, the set of tokens - **path** A string. - **options** _(optional)_ - **delimiter** The default delimiter for segments, e.g. `[^/]` for `:named` parameters. (default: `'/'`) - - **encodePath** A function for encoding input strings. (default: `x => x`, recommended: [`encodeurl`](https://github.com/pillarjs/encodeurl) for unicode encoding) + - **encodePath** A function for encoding input strings. (default: `x => x`, recommended: [`encodeurl`](https://github.com/pillarjs/encodeurl)) ### Tokens -The `tokens` returned by `TokenData` is an array of strings or keys, represented as objects, with the following properties: - -- `name` The name of the token -- `prefix` _(optional)_ The prefix string for the segment (e.g. `"/"`) -- `suffix` _(optional)_ The suffix string for the segment (e.g. `""`) -- `pattern` _(optional)_ The pattern defined to match this token -- `modifier` _(optional)_ The modifier character used for the segment (e.g. `?`) -- `separator` _(optional)_ The string used to separate repeated parameters +`TokenData` is a sequence of tokens, currently of types `text`, `parameter`, `wildcard`, or `group`. ### Custom path @@ -242,7 +127,10 @@ In some applications, you may not be able to use the `path-to-regexp` syntax, bu ```js import { TokenData, match } from "path-to-regexp"; -const tokens = ["/", { name: "foo" }]; +const tokens = [ + { type: "text", value: "/" }, + { type: "parameter", name: "foo" }, +]; const path = new TokenData(tokens, "/"); const fn = $match(path); @@ -253,55 +141,34 @@ fn("/test"); //=> { path: '/test', index: 0, params: { foo: 'test' } } An effort has been made to ensure ambiguous paths from previous releases throw an error. This means you might be seeing an error when things worked before. -### Unexpected `?`, `*`, or `+` - -In previous major versions `/` and `.` were used as implicit prefixes of parameters. So `/:key?` was implicitly `{/:key}?`. For example: - -- `/:key?` → `{/:key}?` or `/:key*` → `{/:key}*` or `/:key+` → `{/:key}+` -- `.:key?` → `{.:key}?` or `.:key*` → `{.:key}*` or `.:key+` → `{.:key}+` -- `:key?` → `{:key}?` or `:key*` → `{:key}*` or `:key+` → `{:key}+` +### Unexpected `?` or `+` -### Unexpected `;` +In past releases, `?`, `*`, and `+` were used to denote optional or repeating parameters. As an alternative, try these: -Used as a [custom separator](#custom-separator) for repeated parameters. +- For optional (`?`), use an empty segment in a group such as `/:file{.:ext,}`. +- For repeating (`+`), only wildcard matching is supported, such as `/*glob`. +- For optional repeating (`*`), use a group and a wildcard parameter such as `/{*glob,}`. -### Unexpected `!`, `@`, or `,` +### Unexpected `(`, `)`, `[`, `]`, etc. -These characters have been reserved for future use. - -### Missing separator - -Repeated parameters must have a separator to be valid. For example, `{:foo}*` can't be used. Separators can be defined manually, such as `{:foo;/}*`, or they default to the suffix and prefix with the parameter, such as `{/:foo}*`. +Previous major versions contained features that aren't currently supported, such as custom prefixes and suffixes for parameters, and the ability to set a parameter regexp. To avoid ambiguity any character used to alter the regexp of a previous release has been reserved in this release. ### Missing parameter name -Parameter names, the part after `:`, must be a valid JavaScript identifier. For example, it cannot start with a number or dash. If you want a parameter name that uses these characters you can wrap the name in quotes, e.g. `:"my-name"`. +Parameter names, the part after `:` or `*`, must be a valid JavaScript identifier. For example, it cannot start with a number or contain a dash. If you want a parameter name that uses these characters you can wrap the name in quotes, e.g. `:"my-name"`. ### Unterminated quote Parameter names can be wrapped in double quote characters, and this error means you forgot to close the quote character. -### Pattern cannot start with "?" - -Parameters in `path-to-regexp` must be basic groups. However, you can use features that require the `?` nested within the pattern. For example, `:foo((?!login)[^/]+)` is valid, but `:foo(?!login)` is not. - -### Capturing groups are not allowed - -A parameter pattern can not contain nested capturing groups. - -### Unbalanced or missing pattern - -A parameter pattern must have the expected number of parentheses. An unbalanced amount, such as `((?!login)` implies something has been written that is invalid. Check you didn't forget any parentheses. - ### Express <= 4.x Path-To-RegExp breaks compatibility with Express <= `4.x` in the following ways: -- The only part of the string that is a regex is within `()`. - - In Express.js 4.x, everything was passed as-is after a simple replacement, so you could write `/[a-z]+` to match `/test`. -- The `?` optional character must be used after `{}`. +- Regexp characters can no longer be provided. +- The optional character `?` is no longer supported, use brace expansion instead: `/:file{.:ext,}`. - Some characters have new meaning or have been reserved (`{}?*+@!;`). -- The parameter name now supports all unicode identifier characters, previously it was only `[a-z0-9]`. +- The parameter name now supports all JavaScript identifier characters, previously it was only `[a-z0-9]`. ## License diff --git a/package.json b/package.json index e1c220a..a57212a 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "dist/" ], "scripts": { + "bench": "vitest bench", "build": "ts-scripts build", "format": "ts-scripts format", "lint": "ts-scripts lint", diff --git a/scripts/redos.ts b/scripts/redos.ts index f83e52d..946e2a5 100644 --- a/scripts/redos.ts +++ b/scripts/redos.ts @@ -1,17 +1,32 @@ import { checkSync } from "recheck"; import { match } from "../src/index.js"; -import { MATCH_TESTS } from "../src/cases.spec.js"; let safe = 0; let fail = 0; -const TESTS = new Set(MATCH_TESTS.map((test) => test.path)); -// const TESTS = [ -// ":path([^\\.]+).:ext", -// ":path.:ext(\\w+)", -// ":path{.:ext([^\\.]+)}", -// "/:path.:ext(\\\\w+)", -// ]; +const TESTS = [ + "/:path.:ext", + "/:path-:ext", + "/:path\\(:ext\\)", + "/:path|:ext|", + "/:foo/:bar-:baz", + "/:foo/:bar-:baz/:qux", + "/:foo/:bar.json.:ext", + "/*foo/:bar/*baz", + "/@:foo-:baz@", + "/:foo{.:ext}", + "/:foo{.:ext,}", + "/:foo{|:ext|,}", + "/:foo{/:bar,}/:baz", + "/user{,s}/:id", + "/user/{en,de,cn}/:id", + "/user/{en,de,cn}/{1,2,3}", + "/user/{en,de,cn/{1,2,3}}", + "/user/{en,de,cn/{}}", + "/user/{en,de,cn/{}/test}", + "/user/{en,de,cn}/{x}", + "/books/*section/:title", +]; for (const path of TESTS) { const { re } = match(path) as any; diff --git a/src/cases.spec.ts b/src/cases.spec.ts index 508b946..cc82740 100644 --- a/src/cases.spec.ts +++ b/src/cases.spec.ts @@ -34,31 +34,56 @@ export interface MatchTestSet { export const PARSER_TESTS: ParserTestSet[] = [ { path: "/", - expected: ["/"], + expected: [{ type: "text", value: "/" }], }, { path: "/:test", - expected: ["/", { name: "test" }], + expected: [ + { type: "text", value: "/" }, + { type: "param", name: "test" }, + ], }, { path: '/:"0"', - expected: ["/", { name: "0" }], + expected: [ + { type: "text", value: "/" }, + { type: "param", name: "0" }, + ], }, { path: "/:_", - expected: ["/", { name: "_" }], + expected: [ + { type: "text", value: "/" }, + { type: "param", name: "_" }, + ], }, { path: "/:café", - expected: ["/", { name: "café" }], + expected: [ + { type: "text", value: "/" }, + { type: "param", name: "café" }, + ], }, { path: '/:"123"', - expected: ["/", { name: "123" }], + expected: [ + { type: "text", value: "/" }, + { type: "param", name: "123" }, + ], }, { path: '/:"1\\"\\2\\"3"', - expected: ["/", { name: '1"2"3' }], + expected: [ + { type: "text", value: "/" }, + { type: "param", name: '1"2"3' }, + ], + }, + { + path: "/*star", + expected: [ + { type: "text", value: "/" }, + { type: "wildcard", name: "star" }, + ], }, ]; @@ -2204,7 +2229,7 @@ export const MATCH_TESTS: MatchTestSet[] = [ * Multi character delimiters. */ { - path: "%25:foo{%25:bar}?", + path: "%25:foo{%25:bar,}", options: { delimiter: "%25", }, diff --git a/src/index.bench.ts b/src/index.bench.ts new file mode 100644 index 0000000..9d39219 --- /dev/null +++ b/src/index.bench.ts @@ -0,0 +1,42 @@ +import { bench } from "vitest"; +import { match } from "./index.js"; + +const PATHS: string[] = [ + "/xyz", + "/user", + "/user/123", + "/" + "a".repeat(32_000), + "/-" + "-a".repeat(8_000) + "/-", + "/||||\x00|" + "||".repeat(27387) + "|\x00".repeat(27387) + "/||/", +]; + +const STATIC_PATH_MATCH = match("/user"); +const SIMPLE_PATH_MATCH = match("/user/:id"); +const MULTI_SEGMENT_MATCH = match("/:x/:y"); +const MULTI_PATTERN_MATCH = match("/:x-:y"); +const TRICKY_PATTERN_MATCH = match("/:foo|:bar|"); +const ASTERISK_MATCH = match("/*foo"); + +bench("static path", () => { + for (const path of PATHS) STATIC_PATH_MATCH(path); +}); + +bench("simple path", () => { + for (const path of PATHS) SIMPLE_PATH_MATCH(path); +}); + +bench("multi segment", () => { + for (const path of PATHS) MULTI_SEGMENT_MATCH(path); +}); + +bench("multi pattern", () => { + for (const path of PATHS) MULTI_PATTERN_MATCH(path); +}); + +bench("tricky pattern", () => { + for (const path of PATHS) TRICKY_PATTERN_MATCH(path); +}); + +bench("asterisk", () => { + for (const path of PATHS) ASTERISK_MATCH(path); +}); diff --git a/src/index.spec.ts b/src/index.spec.ts index ef019c9..014c0f5 100644 --- a/src/index.spec.ts +++ b/src/index.spec.ts @@ -6,89 +6,94 @@ import { PARSER_TESTS, COMPILE_TESTS, MATCH_TESTS } from "./cases.spec.js"; * Dynamically generate the entire test suite. */ describe("path-to-regexp", () => { - describe("arguments", () => { - it("should throw on non-capturing pattern", () => { - expect(() => match("/:foo(?:\\d+(\\.\\d+)?)")).toThrow( + describe("parse errors", () => { + it("should throw on unbalanced group", () => { + expect(() => parse("/{:foo,")).toThrow( new TypeError( - 'Pattern cannot start with "?" at 6: https://git.new/pathToRegexpError', + "Unexpected END at 7, expected }: https://git.new/pathToRegexpError", ), ); }); - - it("should throw on nested capturing group", () => { - expect(() => match("/:foo(\\d+(\\.\\d+)?)")).toThrow( + it("should throw on nested unbalanced group", () => { + expect(() => parse("/{:foo/{x,y}")).toThrow( new TypeError( - "Capturing groups are not allowed at 9: https://git.new/pathToRegexpError", + "Unexpected END at 12, expected }: https://git.new/pathToRegexpError", ), ); }); - it("should throw on unbalanced pattern", () => { - expect(() => match("/:foo(abc")).toThrow( + it("should throw on missing param name", () => { + expect(() => parse("/:/")).toThrow( new TypeError( - "Unbalanced pattern at 5: https://git.new/pathToRegexpError", + "Missing parameter name at 2: https://git.new/pathToRegexpError", ), ); }); - it("should throw on unmatched )", function () { - expect(() => match("/:fooab)c")).toThrow( - new TypeError("Unmatched ) at 7: https://git.new/pathToRegexpError"), - ); - }); - - it("should throw on unmatched ) after other patterns", function () { - expect(() => match("/:test(\\w+)/:foo(\\d+))")).toThrow( - new TypeError("Unmatched ) at 21: https://git.new/pathToRegexpError"), - ); - }); - - it("should throw on missing pattern", () => { - expect(() => match("/:foo()")).toThrow( + it("should throw on missing wildcard name", () => { + expect(() => parse("/*/")).toThrow( new TypeError( - "Missing pattern at 5: https://git.new/pathToRegexpError", + "Missing parameter name at 2: https://git.new/pathToRegexpError", ), ); }); - it("should throw on missing name", () => { - expect(() => match("/:(test)")).toThrow( + it("should throw on unterminated quote", () => { + expect(() => parse('/:"foo')).toThrow( new TypeError( - "Missing parameter name at 2: https://git.new/pathToRegexpError", + "Unterminated quote at 2: https://git.new/pathToRegexpError", ), ); }); + }); - it("should throw on nested groups", () => { - expect(() => match("/{a{b:foo}}")).toThrow( - new TypeError( - "Unexpected { at 3, expected }: https://git.new/pathToRegexpError", - ), - ); + describe("compile errors", () => { + it("should throw when a param is missing", () => { + const toPath = compile("/a/:b/c"); + + expect(() => { + toPath(); + }).toThrow(new TypeError("Missing parameters: b")); }); - it("should throw on repeat parameters without a separator", () => { - expect(() => match("{:x}*")).toThrow( - new TypeError( - `Missing separator for "x": https://git.new/pathToRegexpError`, - ), - ); + it("should throw when all group children are invalid", () => { + const toPath = compile("/a/{:a,:b,:c}"); + + expect(() => { + toPath(); + }).toThrow(new TypeError("Missing parameters: a, b, c")); }); - it("should throw on unterminated quote", () => { - expect(() => match('/:"foo')).toThrow( - new TypeError( - "Unterminated quote at 2: https://git.new/pathToRegexpError", - ), - ); + it("should throw when expecting a repeated value", () => { + const toPath = compile("/*foo"); + + expect(() => { + toPath({ foo: [] }); + }).toThrow(new TypeError('Expected "foo" to be a non-empty array')); }); - it("should throw on invalid *", () => { - expect(() => match("/:foo*")).toThrow( - new TypeError( - "Unexpected * at 5, you probably want `/*` or `{/:foo}*`: https://git.new/pathToRegexpError", - ), - ); + it("should throw when param gets an array", () => { + const toPath = compile("/:foo"); + + expect(() => { + toPath({ foo: [] }); + }).toThrow(new TypeError('Expected "foo" to be a string')); + }); + + it("should throw when a wildcard is not an array", () => { + const toPath = compile("/*foo"); + + expect(() => { + toPath({ foo: "a" }); + }).toThrow(new TypeError('Expected "foo" to be a non-empty array')); + }); + + it("should throw when a wildcard array value is not a string", () => { + const toPath = compile("/*foo"); + + expect(() => { + toPath({ foo: [1, "a"] as any }); + }).toThrow(new TypeError('Expected "foo/0" to be a string')); }); }); @@ -126,64 +131,6 @@ describe("path-to-regexp", () => { }); }, ); - - describe("compile errors", () => { - it("should throw when a required param is undefined", () => { - const toPath = compile("/a/:b/c"); - - expect(() => { - toPath(); - }).toThrow(new TypeError('Expected "b" to be a string')); - }); - - it("should throw when it does not match the pattern", () => { - const toPath = compile("/:foo(\\d+)"); - - expect(() => { - toPath({ foo: "abc" }); - }).toThrow(new TypeError('Invalid value for "foo": "abc"')); - }); - - it("should throw when expecting a repeated value", () => { - const toPath = compile("{/:foo}+"); - - expect(() => { - toPath({ foo: [] }); - }).toThrow(new TypeError('Invalid value for "foo": ""')); - }); - - it("should throw when not expecting a repeated value", () => { - const toPath = compile("/:foo"); - - expect(() => { - toPath({ foo: [] }); - }).toThrow(new TypeError('Expected "foo" to be a string')); - }); - - it("should throw when a repeated param is not an array", () => { - const toPath = compile("{/:foo}+"); - - expect(() => { - toPath({ foo: "a" }); - }).toThrow(new TypeError('Expected "foo" to be an array')); - }); - - it("should throw when an array value is not a string", () => { - const toPath = compile("{/:foo}+"); - - expect(() => { - toPath({ foo: [1, "a"] as any }); - }).toThrow(new TypeError('Expected "foo/0" to be a string')); - }); - - it("should throw when repeated value does not match", () => { - const toPath = compile("{/:foo(\\d+)}+"); - - expect(() => { - toPath({ foo: ["1", "2", "3", "a"] }); - }).toThrow(new TypeError('Invalid value for "foo": "/1/2/3/a"')); - }); - }); }); /** diff --git a/src/index.ts b/src/index.ts index 2692df0..c96064e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -57,19 +57,20 @@ export interface CompileOptions extends PathOptions { type TokenType = | "{" | "}" - | ";" - | "*" - | "+" - | "?" - | "NAME" - | "PATTERN" + | "," + | "WILDCARD" + | "PARAM" | "CHAR" | "ESCAPED" | "END" - // Reserved for use. - | "!" - | "@" - | ","; + // Reserved for use or ambiguous due to past use. + | "(" + | ")" + | "[" + | "]" + | "+" + | "?" + | "!"; /** * Tokenizer results. @@ -81,153 +82,121 @@ interface LexToken { } const SIMPLE_TOKENS: Record = { - "!": "!", - "@": "@", - ";": ";", + // Groups. + "{": "{", + "}": "}", ",": ",", - "*": "*", + // Reserved. + "(": "(", + ")": ")", + "[": "[", + "]": "]", "+": "+", "?": "?", - "{": "{", - "}": "}", + "!": "!", }; +/** + * Escape a regular expression string. + */ +function escape(str: string) { + return str.replace(/[.+*?^${}()[\]|/\\]/g, "\\$&"); +} + +/** + * Get the flags for a regexp from the options. + */ +function toFlags(options: { sensitive?: boolean }) { + return options.sensitive ? "s" : "is"; +} + /** * Tokenize input string. */ -function lexer(str: string) { +function* lexer(str: string): Generator { const chars = [...str]; - const tokens: LexToken[] = []; let i = 0; - while (i < chars.length) { - const value = chars[i]; - const type = SIMPLE_TOKENS[value]; - - if (type) { - tokens.push({ type, index: i++, value }); - continue; - } - - if (value === "\\") { - tokens.push({ type: "ESCAPED", index: i++, value: chars[i++] }); - continue; - } - - if (value === ":") { - let name = ""; - - if (ID_START.test(chars[++i])) { - name += chars[i]; - while (ID_CONTINUE.test(chars[++i])) { - name += chars[i]; - } - } else if (chars[i] === '"') { - let pos = i; - - while (i < chars.length) { - if (chars[++i] === '"') { - i++; - pos = 0; - break; - } - - if (chars[i] === "\\") { - name += chars[++i]; - } else { - name += chars[i]; - } - } - - if (pos) { - throw new TypeError(`Unterminated quote at ${pos}: ${DEBUG_URL}`); - } - } + function name() { + let value = ""; - if (!name) { - throw new TypeError(`Missing parameter name at ${i}: ${DEBUG_URL}`); - } - - tokens.push({ type: "NAME", index: i, value: name }); - continue; - } - - if (value === "(") { - const pos = i++; - let count = 1; - let pattern = ""; - - if (chars[i] === "?") { - throw new TypeError( - `Pattern cannot start with "?" at ${i}: ${DEBUG_URL}`, - ); + if (ID_START.test(chars[++i])) { + value += chars[i]; + while (ID_CONTINUE.test(chars[++i])) { + value += chars[i]; } + } else if (chars[i] === '"') { + let pos = i; while (i < chars.length) { - if (chars[i] === "\\") { - pattern += chars[i++] + chars[i++]; - continue; + if (chars[++i] === '"') { + i++; + pos = 0; + break; } - if (chars[i] === ")") { - count--; - if (count === 0) { - i++; - break; - } - } else if (chars[i] === "(") { - count++; - if (chars[i + 1] !== "?") { - throw new TypeError( - `Capturing groups are not allowed at ${i}: ${DEBUG_URL}`, - ); - } + if (chars[i] === "\\") { + value += chars[++i]; + } else { + value += chars[i]; } - - pattern += chars[i++]; - } - - if (count) { - throw new TypeError(`Unbalanced pattern at ${pos}: ${DEBUG_URL}`); } - if (!pattern) { - throw new TypeError(`Missing pattern at ${pos}: ${DEBUG_URL}`); + if (pos) { + throw new TypeError(`Unterminated quote at ${pos}: ${DEBUG_URL}`); } - - tokens.push({ type: "PATTERN", index: i, value: pattern }); - continue; } - if (value === ")") { - throw new TypeError(`Unmatched ) at ${i}: ${DEBUG_URL}`); + if (!value) { + throw new TypeError(`Missing parameter name at ${i}: ${DEBUG_URL}`); } - tokens.push({ type: "CHAR", index: i, value: chars[i++] }); + return value; } - tokens.push({ type: "END", index: i, value: "" }); + while (i < chars.length) { + const value = chars[i]; + const type = SIMPLE_TOKENS[value]; + + if (type) { + yield { type, index: i++, value }; + } else if (value === "\\") { + yield { type: "ESCAPED", index: i++, value: chars[i++] }; + } else if (value === ":") { + const value = name(); + yield { type: "PARAM", index: i, value }; + } else if (value === "*") { + const value = name(); + yield { type: "WILDCARD", index: i, value }; + } else { + yield { type: "CHAR", index: i, value: chars[i++] }; + } + } - return new Iter(tokens); + return { type: "END", index: i, value: "" }; } class Iter { - index = 0; + #peek?: LexToken; - constructor(private tokens: LexToken[]) {} + constructor(private tokens: Generator) {} peek(): LexToken { - return this.tokens[this.index]; + if (!this.#peek) { + const next = this.tokens.next(); + this.#peek = next.value; + } + return this.#peek; } - tryConsume(type: LexToken["type"]): string | undefined { + tryConsume(type: TokenType): string | undefined { const token = this.peek(); if (token.type !== type) return; - this.index++; + this.#peek = undefined; // Reset after consumed. return token.value; } - consume(type: LexToken["type"]): string { + consume(type: TokenType): string { const value = this.tryConsume(type); if (value !== undefined) return value; const { type: nextType, index } = this.peek(); @@ -244,12 +213,55 @@ class Iter { } return result; } +} - modifier(): string | undefined { - return this.tryConsume("?") || this.tryConsume("*") || this.tryConsume("+"); - } +/** + * Plain text. + */ +export interface Text { + type: "text"; + value: string; } +/** + * A parameter designed to match arbitrary text within a segment. + */ +export interface Parameter { + type: "param"; + name: string; +} + +/** + * A wildcard parameter designed to match multiple segments. + */ +export interface Wildcard { + type: "wildcard"; + name: string; +} + +/** + * A set of possible tokens to expand when matching. + */ +export interface Group { + type: "group"; + children: Array; +} + +/** + * Types of variable parameters. + */ +export type Key = Parameter | Wildcard; + +/** + * Simple tokens. + */ +export type Sequence = Text | Key; + +/** + * A sequence of path match characters. + */ +export type Token = Sequence | Group; + /** * Tokenized path instance. Can we passed around instead of string. */ @@ -266,74 +278,85 @@ export class TokenData { export function parse(str: string, options: ParseOptions = {}): TokenData { const { encodePath = NOOP_VALUE, delimiter = encodePath(DEFAULT_DELIMITER) } = options; - const tokens: Token[] = []; - const it = lexer(str); - let key = 0; - - do { - const path = it.text(); - if (path) tokens.push(encodePath(path)); - - const name = it.tryConsume("NAME"); - const pattern = it.tryConsume("PATTERN"); - - if (name || pattern) { - tokens.push({ - name: name || String(key++), - pattern, - }); - - const next = it.peek(); - if (next.type === "*") { - throw new TypeError( - `Unexpected * at ${next.index}, you probably want \`/*\` or \`{/:foo}*\`: ${DEBUG_URL}`, - ); + const it = new Iter(lexer(str)); + + function consume(brace: boolean): [end: boolean, tokens: Token[]] { + const tokens: Token[] = []; + + while (true) { + const path = it.text(); + if (path) tokens.push({ type: "text", value: encodePath(path) }); + + const param = it.tryConsume("PARAM"); + if (param) { + tokens.push({ + type: "param", + name: param, + }); + continue; } - continue; - } + const wildcard = it.tryConsume("WILDCARD"); + if (wildcard) { + tokens.push({ + type: "wildcard", + name: wildcard, + }); + continue; + } - const asterisk = it.tryConsume("*"); - if (asterisk) { - tokens.push({ - name: String(key++), - pattern: `${negate(delimiter)}*`, - modifier: "*", - separator: delimiter, - }); - continue; - } + const open = it.tryConsume("{"); + if (open) { + const children: Array = []; - const open = it.tryConsume("{"); - if (open) { - const prefix = it.text(); - const name = it.tryConsume("NAME"); - const pattern = it.tryConsume("PATTERN"); - const suffix = it.text(); - const separator = it.tryConsume(";") && it.text(); - - it.consume("}"); - - const modifier = it.modifier(); - - tokens.push({ - name: name || (pattern ? String(key++) : ""), - prefix: encodePath(prefix), - suffix: encodePath(suffix), - pattern, - modifier, - separator, - }); - continue; - } + while (true) { + const [end, tokens] = consume(true); + children.push(tokens); + if (end) break; + } + + tokens.push({ + type: "group", + children: children, + }); + continue; + } + + if (brace) { + const comma = it.tryConsume(","); + if (comma) return [false, tokens]; + it.consume("}"); + } else { + it.consume("END"); + } - it.consume("END"); - break; - } while (true); + return [true, tokens]; + } + } + const [, tokens] = consume(false); return new TokenData(tokens, delimiter); } +/** + * Transform tokens into a path building function. + */ +export function $compile

( + data: TokenData, + options: CompileOptions, +): PathFunction

{ + const { encode = encodeURIComponent } = options; + const fn = tokensToFunction(data.tokens, data.delimiter, encode); + + return function path(data: P = {} as P) { + const [path, ...missing] = fn(data); + if (missing.length) { + throw new TypeError(`Missing parameters: ${missing.join(", ")}`); + } + return path; + }; +} + /** * Compile a string to a template function for the path. */ @@ -347,120 +370,91 @@ export function compile

( export type ParamData = Partial>; export type PathFunction

= (data?: P) => string; -/** - * Check if a key repeats. - */ -export function isRepeat(key: Key) { - return key.modifier === "+" || key.modifier === "*"; -} +function tokensToFunction( + tokens: Token[], + delimiter: string, + encode: Encode | false, +) { + const encoders = tokens.map((token) => + tokenToFunction(token, delimiter, encode), + ); -/** - * Check if a key is optional. - */ -export function isOptional(key: Key) { - return key.modifier === "?" || key.modifier === "*"; + return (data: ParamData) => { + let path = ""; + const missing: string[] = []; + + for (const encoder of encoders) { + const [value, ...extras] = encoder(data); + path += value; + missing.push(...extras); + } + + return [path, ...missing]; + }; } /** * Convert a single token into a path building function. */ -function keyToFunction( - key: Key, +function tokenToFunction( + token: Token, + delimiter: string, encode: Encode | false, -): (data: ParamData) => string { - const encodeValue = encode || NOOP_VALUE; - const { prefix = "", suffix = "", separator = suffix + prefix } = key; +): (data: ParamData) => string[] { + if (token.type === "text") return () => [token.value]; - if (encode && isRepeat(key)) { - const stringify = (value: string, index: number) => { - if (typeof value !== "string") { - throw new TypeError(`Expected "${key.name}/${index}" to be a string`); - } - return encodeValue(value); - }; + if (token.type === "group") { + const fns = token.children.map((child) => + tokensToFunction(child, delimiter, encode), + ); - const compile = (value: unknown) => { - if (!Array.isArray(value)) { - throw new TypeError(`Expected "${key.name}" to be an array`); + return (data) => { + const allMissing: string[] = []; + for (const fn of fns) { + const [value, ...missing] = fn(data); + if (!missing.length) return [value]; + allMissing.push(...missing); } - - if (value.length === 0) return ""; - - return prefix + value.map(stringify).join(separator) + suffix; + return ["", ...allMissing]; }; + } - if (isOptional(key)) { - return (data): string => { - const value = data[key.name]; - if (value == null) return ""; - return value.length ? compile(value) : ""; - }; - } + const encodeValue = encode || NOOP_VALUE; - return (data): string => { - const value = data[key.name]; - return compile(value); - }; - } + if (token.type === "wildcard") { + return (data) => { + const value = data[token.name]; + if (value == null) return ["", token.name]; - const stringify = (value: unknown) => { - if (typeof value !== "string") { - throw new TypeError(`Expected "${key.name}" to be a string`); - } - return prefix + encodeValue(value) + suffix; - }; + if (!Array.isArray(value) || value.length === 0) { + throw new TypeError(`Expected "${token.name}" to be a non-empty array`); + } - if (isOptional(key)) { - return (data): string => { - const value = data[key.name]; - if (value == null) return ""; - return stringify(value); + return [ + value + .map((value, index) => { + if (typeof value !== "string") { + throw new TypeError( + `Expected "${token.name}/${index}" to be a string`, + ); + } + + return encodeValue(value); + }) + .join(delimiter), + ]; }; } - return (data): string => { - const value = data[key.name]; - return stringify(value); - }; -} - -/** - * Transform tokens into a path building function. - */ -export function $compile

( - data: TokenData, - options: CompileOptions, -): PathFunction

{ - const { encode = encodeURIComponent, validate = true } = options; - const flags = toFlags(options); - const sources = toRegExpSource(data, []); - - // Compile all the tokens into regexps. - const encoders: Array<(data: ParamData) => string> = data.tokens.map( - (token, index) => { - if (typeof token === "string") return () => token; - - const fn = keyToFunction(token, encode); - if (!validate) return fn; + return (data) => { + const value = data[token.name]; + if (value == null) return ["", token.name]; - const validRe = new RegExp(`^${sources[index]}$`, flags); - - return (data) => { - const value = fn(data); - if (!validRe.test(value)) { - throw new TypeError( - `Invalid value for "${token.name}": ${JSON.stringify(value)}`, - ); - } - return value; - }; - }, - ); + if (typeof value !== "string") { + throw new TypeError(`Expected "${token.name}" to be a string`); + } - return function path(data: Record = {}) { - let path = ""; - for (const encoder of encoders) path += encoder(data); - return path; + return [encodeValue(value)]; }; } @@ -490,37 +484,30 @@ export function $match

( options: MatchOptions = {}, ): MatchFunction

{ const { decode = decodeURIComponent, end = true } = options; - const { delimiter } = data; - const keys: Key[] = []; + const { tokens, delimiter } = data; const flags = toFlags(options); - const sources = toRegExpSource(data, keys); - const re = new RegExp( - `^${sources.join("")}(?=${escape(delimiter)}|$)`, - flags, - ); + const [source, keys] = toRegExp(tokens, delimiter); + + let pattern = `^${source}`; + pattern += end ? "$" : `(?=${escape(delimiter)}|$)`; + const re = new RegExp(pattern, flags); const decoders = keys.map((key) => { if (!decode) return NOOP_VALUE; - if (isRepeat(key)) { - const { prefix = "", suffix = "", separator = suffix + prefix } = key; - return (value: string) => value.split(separator).map(decode); + if (key.type === "wildcard") { + return (value: string) => value.split(delimiter).map(decode); } return decode; }); - const isValid = end - ? (a: string, b: string) => a.length === b.length - : () => true; - return Object.assign( function match(input: string) { const m = re.exec(input); if (!m) return false; const { 0: path } = m; - if (!isValid(input, path)) return false; const params = Object.create(null); for (let i = 1; i < m.length; i++) { @@ -544,104 +531,96 @@ export function match

( return $match(parse(path, options), options); } -/** - * Escape a regular expression string. - */ -function escape(str: string) { - return str.replace(/[.+*?^${}()[\]|/\\]/g, "\\$&"); -} +function toRegExp(tokens: Token[], delimiter: string): [string, Key[]] { + const regexps: string[] = []; + const keySets: Key[][] = []; -/** - * Get the flags for a regexp from the options. - */ -function toFlags(options: { sensitive?: boolean }) { - return options.sensitive ? "s" : "is"; + for (const seq of flatten(tokens, 0, [])) { + const [regexp, keys] = sequenceToRegExp(seq, delimiter); + regexps.push(regexp); + keySets.push(keys); + } + + return [union(regexps), keySets.flat()]; } -/** - * A key is a capture group in the regex. - */ -export interface Key { - name: string; - prefix?: string; - suffix?: string; - pattern?: string; - modifier?: string; - separator?: string; +function union(values: string[]) { + if (values.length === 1) return values[0]; + return `(?:${values.join("|")})`; } /** - * A token is a string (nothing special) or key metadata (capture group). + * Generate a flat list of sequence tokens from the given tokens. */ -export type Token = string | Key; +function* flatten( + tokens: Token[], + index: number, + init: Sequence[], +): Generator { + if (index === tokens.length) { + return yield init; + } + + const token = tokens[index]; + + if (token.type === "group") { + for (const child of token.children) { + const fork = init.slice(); + for (const seq of flatten(child, 0, fork)) { + yield* flatten(tokens, index + 1, seq); + } + } + + if (token.children.length) return; + } else { + init.push(token); + } + + yield* flatten(tokens, index + 1, init); +} /** - * Convert a token into a regexp string (re-used for path validation). + * Transform a flat sequence of tokens into a regular expression. */ -function toRegExpSource(data: TokenData, keys: Key[]): string[] { - const sources = Array(data.tokens.length); +function sequenceToRegExp( + tokens: Sequence[], + delimiter: string, +): [string, Key[]] { + const patterns = Array(tokens.length); + let i = tokens.length; let backtrack = ""; - - let i = data.tokens.length; + let isLastSegmentParam = true; + const keys: Key[] = []; while (i--) { - const token = data.tokens[i]; - - if (typeof token === "string") { - backtrack = token; - sources[i] = escape(token); - continue; - } - - const { - prefix = "", - suffix = "", - separator = suffix + prefix, - modifier = "", - } = token; - - const pre = escape(prefix); - const post = escape(suffix); - - if (token.name) { - backtrack = suffix || backtrack; - keys.unshift(token); - - if (isRepeat(token)) { - if (!separator) { - throw new TypeError( - `Missing separator for "${token.name}": ${DEBUG_URL}`, - ); + const token = tokens[i]; + + switch (token.type) { + case "text": + patterns[i] = escape(token.value); + backtrack = token.value; + isLastSegmentParam ||= token.value.includes(delimiter); + continue; + case "param": + case "wildcard": + if (!isLastSegmentParam && !backtrack) { + throw new TypeError(`Missing text after parameter "${token.name}"`); } - const mod = modifier === "*" ? "?" : ""; - const sep = escape(separator); - const pattern = - token.pattern || `${negate(data.delimiter, separator, backtrack)}+`; - - sources[i] = wrap( - pre, - `(?:${pattern})(?:${sep}(?:${pattern}))*`, - post, - mod, - ); - } else { - sources[i] = wrap( - pre, - token.pattern || `${negate(data.delimiter, backtrack)}+`, - post, - modifier, - ); - } - - backtrack = prefix; - } else { - sources[i] = `(?:${pre}${post})${modifier}`; - backtrack = `${prefix}${suffix}`; + patterns[i] = + token.type === "wildcard" + ? `(.+)` + : `(${negate(delimiter, isLastSegmentParam ? "" : backtrack)}+)`; + keys.unshift(token); + backtrack = ""; + isLastSegmentParam = false; + continue; + default: + throw new TypeError(`Unexpected token type at index ${i}`); } } - return sources; + return [patterns.join(""), keys]; } function negate(...args: string[]) { @@ -658,11 +637,3 @@ function negate(...args: string[]) { return `(?:(?!${values.map(escape).join("|")}).)`; } - -function wrap(pre: string, pattern: string, post: string, modifier: string) { - if (pre || post) { - return `(?:${pre}(${pattern})${post})${modifier}`; - } - - return `(${pattern})${modifier}`; -} diff --git a/tsconfig.build.json b/tsconfig.build.json index d783ab3..3db8e88 100644 --- a/tsconfig.build.json +++ b/tsconfig.build.json @@ -3,5 +3,5 @@ "compilerOptions": { "types": [] }, - "exclude": ["src/**/*.spec.ts"] + "exclude": ["src/**/*.spec.ts", "src/**/*.bench.ts"] }