From 29daeb281ddf60e3e65900f3c062cf58cd8986ca Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Thu, 19 Dec 2024 12:26:59 -0800 Subject: [PATCH] make comma-list parsing really robust --- experimental/ast/options.go | 5 + experimental/parser/diagnostics_internal.go | 3 + experimental/parser/parse_decl.go | 7 + experimental/parser/parse_delimited.go | 76 +++++- experimental/parser/parse_expr.go | 63 +++-- experimental/parser/parse_type.go | 1 + .../parser/testdata/parser/lists.proto | 65 +++++ .../testdata/parser/lists.proto.stderr.txt | 227 ++++++++++++++++++ .../parser/testdata/parser/lists.proto.yaml | 220 +++++++++++++++++ .../parser/type/generic.proto.stderr.txt | 6 +- .../testdata/parser/type/generic.proto.yaml | 4 +- experimental/report/span.go | 54 +++++ 12 files changed, 701 insertions(+), 30 deletions(-) create mode 100644 experimental/parser/testdata/parser/lists.proto create mode 100644 experimental/parser/testdata/parser/lists.proto.stderr.txt create mode 100644 experimental/parser/testdata/parser/lists.proto.yaml diff --git a/experimental/ast/options.go b/experimental/ast/options.go index e1ff72d8..d781b2f4 100644 --- a/experimental/ast/options.go +++ b/experimental/ast/options.go @@ -46,6 +46,11 @@ type Option struct { Value ExprAny } +// Span implements [report.Spanner]. +func (o Option) Span() report.Span { + return report.Join(o.Path, o.Equals, o.Value) +} + type rawOption struct { path rawPath equals token.ID diff --git a/experimental/parser/diagnostics_internal.go b/experimental/parser/diagnostics_internal.go index 796ea0d2..aff0d2a7 100644 --- a/experimental/parser/diagnostics_internal.go +++ b/experimental/parser/diagnostics_internal.go @@ -42,6 +42,9 @@ func (e errUnexpected) Diagnose(d *report.Diagnostic) { got := e.got if got == nil { got = taxa.Classify(e.what) + if got == taxa.Unknown { + got = "tokens" + } } var message report.DiagnosticOption diff --git a/experimental/parser/parse_decl.go b/experimental/parser/parse_decl.go index 223e29ac..88f9d56e 100644 --- a/experimental/parser/parse_decl.go +++ b/experimental/parser/parse_decl.go @@ -27,6 +27,10 @@ type exprComma struct { comma token.Token } +func (e exprComma) Span() report.Span { + return e.expr.Span() +} + // parseDecl parses any Protobuf declaration. // // This function will always advance cursor if it is not empty. @@ -303,6 +307,7 @@ func parseRange(p *parser, c *token.Cursor) ast.DeclRange { return expr, !expr.Nil() }, + canStart: canStartExpr, }.iter(func(expr ast.ExprAny, comma token.Token) bool { exprs = append(exprs, exprComma{expr, comma}) return true @@ -343,6 +348,7 @@ func parseTypeList(p *parser, parens token.Token, types ast.TypeList, in taxa.No ty := parseType(p, c, in.In()) return ty, !ty.Nil() }, + canStart: canStartPath, }.appendTo(types) } @@ -397,6 +403,7 @@ func parseOptions(p *parser, brackets token.Token, _ taxa.Noun) ast.CompactOptio } return option, !option.Value.Nil() }, + canStart: canStartPath, }.appendTo(options) return options diff --git a/experimental/parser/parse_delimited.go b/experimental/parser/parse_delimited.go index 8586751e..1ff93eca 100644 --- a/experimental/parser/parse_delimited.go +++ b/experimental/parser/parse_delimited.go @@ -25,7 +25,7 @@ import ( ) // delimited is a mechanism for parsing a punctuation-delimited list. -type delimited[T any] struct { +type delimited[T report.Spanner] struct { p *parser c *token.Cursor @@ -46,6 +46,9 @@ type delimited[T any] struct { // // This function is expected to exhaust parse func(*token.Cursor) (T, bool) + + // Used for skipping tokens until we can begin parsing. + canStart func(token.Token) bool } func (d delimited[T]) appendTo(commas ast.Commas[T]) { @@ -65,9 +68,11 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) { } var delim token.Token + var latest int // The index of the most recently seen delimiter. if next := d.c.Peek(); slices.Contains(d.delims, next.Text()) { _ = d.c.Pop() + latest = slices.Index(d.delims, next.Text()) d.p.Error(errUnexpected{ what: next, @@ -77,21 +82,72 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) { }) } + var needDelim bool + var mark token.CursorMark for !d.c.Done() { + ensureProgress(d.c, &mark) + + // Set if we should not diagnose a missing comma, because there was + // garbage in front of the call to parse(). + var badPrefix bool + if !d.canStart(d.c.Peek()) { + first := d.c.Pop() + var last token.Token + for !d.c.Done() && !d.canStart(d.c.Peek()) { + last = d.c.Pop() + } + + want := d.what.AsSet() + if needDelim && delim.Nil() { + want = d.delimNouns() + } + + what := report.Spanner(first) + if !last.Nil() { + what = report.Join(first, last) + } + + badPrefix = true + d.p.Error(errUnexpected{ + what: what, + where: d.in.In(), + want: want, + }) + } + v, ok := d.parse(d.c) if !ok { break } + if !badPrefix && needDelim && delim.Nil() { + d.p.Error(errUnexpected{ + what: v, + where: d.in.In(), + want: d.delimNouns(), + }).Apply( + // TODO: this should be a suggestion. + report.Snippetf(v.Span().Rune(0), "note: assuming a missing `%s` here", d.delims[latest]), + ) + } + needDelim = d.required + // Pop as many delimiters as we can. delim = token.Nil - for slices.Contains(d.delims, d.c.Peek().Text()) { + for { + which := slices.Index(d.delims, d.c.Peek().Text()) + if which < 0 { + break + } + latest = which + next := d.c.Pop() if delim.Nil() { delim = next continue } + // Diagnose all extra delimiters after the first. d.p.Error(errUnexpected{ what: next, where: d.in.In(), @@ -100,7 +156,13 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) { }).Apply(report.Snippetf(delim, "first delimiter is here")) } - if !yield(v, delim) || (d.required && delim.Nil()) { + if !yield(v, delim) { + break + } + + if delim.Nil() && d.required && !d.exhaust { + // In non-exhaust mode, if we miss a required comma just bail. + // Otherwise, go again to parse another thing. break } } @@ -121,3 +183,11 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) { }) } } + +func (d delimited[T]) delimNouns() taxa.Set { + var set taxa.Set + for _, delim := range d.delims { + set = set.With(taxa.Punct(delim, false)) + } + return set +} diff --git a/experimental/parser/parse_expr.go b/experimental/parser/parse_expr.go index 7743f4b8..0abe16eb 100644 --- a/experimental/parser/parse_expr.go +++ b/experimental/parser/parse_expr.go @@ -60,31 +60,40 @@ func parseExprInfix(p *parser, c *token.Cursor, where taxa.Place, lhs ast.ExprAn }).AsAny() case "{", "<", "[": // This is for colon-less, array or dict-valued fields. - if !next.IsLeaf() && lhs.Kind() != ast.ExprKindField { - // The previous expression cannot also be a key-value pair, since - // this messes with parsing of dicts, which are not comma-separated. - // - // In other words, consider the following, inside of an expression - // context: + if next.IsLeaf() { + break + } + + // The previous expression cannot also be a key-value pair, since + // this messes with parsing of dicts, which are not comma-separated. + // + // In other words, consider the following, inside of an expression + // context: + // + // foo: bar { ... } + // + // We want to diagnose the { as unexpected here, and it is better + // for that to be done by whatever is calling parseExpr since it + // will have more context. + // + // We also do not allow this inside of arrays, because we want + // [a {}] to parse as [a, {}] not [a: {}]. + if lhs.Kind() == ast.ExprKindField || where.Subject() == taxa.Array { + break + } + + return p.NewExprField(ast.ExprFieldArgs{ + Key: lhs, + // Why not call parseExprSolo? Suppose the following + // (invalid) production: // - // foo: bar { ... } + // foo { ... } to { ... } // - // We want to diagnose the { as unexpected here, and it is better - // for that to be done by whatever is calling parseExpr since it - // will have more context. - return p.NewExprField(ast.ExprFieldArgs{ - Key: lhs, - // Why not call parseExprSolo? Suppose the following - // (invalid) production: - // - // foo { ... } to { ... } - // - // Calling parseExprInfix will cause this to be parsed - // as a range expression, which will be diagnosed when - // we legalize. - Value: parseExprInfix(p, c, where, ast.ExprAny{}, prec+1), - }).AsAny() - } + // Calling parseExprInfix will cause this to be parsed + // as a range expression, which will be diagnosed when + // we legalize. + Value: parseExprInfix(p, c, where, ast.ExprAny{}, prec+1), + }).AsAny() } } @@ -157,7 +166,7 @@ func parseExprSolo(p *parser, c *token.Cursor, where taxa.Place) ast.ExprAny { elems := delimited[ast.ExprAny]{ p: p, c: body.Children(), - what: taxa.Expr, + what: taxa.DictField, in: in, delims: []string{",", ";"}, @@ -168,9 +177,15 @@ func parseExprSolo(p *parser, c *token.Cursor, where taxa.Place) ast.ExprAny { expr := parseExpr(p, c, in.In()) return expr, !expr.Nil() }, + canStart: canStartExpr, } if next.Text() == "[" { + elems.what = taxa.Expr + elems.delims = []string{","} + elems.required = true + elems.trailing = false + array := p.NewExprArray(body) elems.appendTo(array) return array.AsAny() diff --git a/experimental/parser/parse_type.go b/experimental/parser/parse_type.go index f0bbf1da..a06cd559 100644 --- a/experimental/parser/parse_type.go +++ b/experimental/parser/parse_type.go @@ -185,6 +185,7 @@ func parseTypeImpl(p *parser, c *token.Cursor, where taxa.Place, pathAfter bool) ty := parseType(p, c, taxa.TypeParams.In()) return ty, !ty.Nil() }, + canStart: canStartPath, }.appendTo(generic.Args()) ty = generic.AsAny() diff --git a/experimental/parser/testdata/parser/lists.proto b/experimental/parser/testdata/parser/lists.proto new file mode 100644 index 00000000..53d00eca --- /dev/null +++ b/experimental/parser/testdata/parser/lists.proto @@ -0,0 +1,65 @@ +// Copyright 2020-2024 Buf Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This test exercises every delimited list production in the language. + +option foo = []; +option foo = [1]; +option foo = [1, 2]; +option foo = [1, 2 3]; +option foo = [1, 2,, 3]; +option foo = [1, 2,, 3,]; +option foo = [,1 2,, 3,]; +option foo = [1; 2; 3]; +option foo = [a {}]; +option foo = [,]; + +option foo = { + bar: 1 + bar { + bar: 2;; + } +}; +option foo = {;bar: 1}; +option foo = {baz: 1;; baz: 1}; +option foo = {baz: 1,; baz: 1;}; +option foo = { + bar {;} + bar {,} +}; + +service S { + rpc Foo(int) returns (int); + rpc Foo(int, int) returns (int, int); + rpc Foo(int int) returns (int int); + rpc Foo(int; int) returns (int, int,); + rpc Foo(, int, int) returns (int,, int,); + rpc Foo(;) returns (,); + rpc Foo() returns (); +} + +message M { + map x; + map x; + map x; + map x; + map<,> x; + map<> x; + map<,int, int> x; + map x; + map< + int, + int, + > x; +} \ No newline at end of file diff --git a/experimental/parser/testdata/parser/lists.proto.stderr.txt b/experimental/parser/testdata/parser/lists.proto.stderr.txt new file mode 100644 index 00000000..5fb68a64 --- /dev/null +++ b/experimental/parser/testdata/parser/lists.proto.stderr.txt @@ -0,0 +1,227 @@ +error: unexpected integer literal in array expression + --> testdata/parser/lists.proto:20:20 + | +20 | option foo = [1, 2 3]; + | ^ expected `,` + | | + | note: assuming a missing `,` here + +error: unexpected extra `,` in array expression + --> testdata/parser/lists.proto:21:20 + | +21 | option foo = [1, 2,, 3]; + | -^ expected expression + | | + | first delimiter is here + +error: unexpected extra `,` in array expression + --> testdata/parser/lists.proto:22:20 + | +22 | option foo = [1, 2,, 3,]; + | -^ expected expression + | | + | first delimiter is here + +error: unexpected trailing `,` in array expression + --> testdata/parser/lists.proto:22:23 + | +22 | option foo = [1, 2,, 3,]; + | ^ + +error: unexpected leading `,` in array expression + --> testdata/parser/lists.proto:23:15 + | +23 | option foo = [,1 2,, 3,]; + | ^ expected expression + +error: unexpected integer literal in array expression + --> testdata/parser/lists.proto:23:18 + | +23 | option foo = [,1 2,, 3,]; + | ^ expected `,` + | | + | note: assuming a missing `,` here + +error: unexpected extra `,` in array expression + --> testdata/parser/lists.proto:23:20 + | +23 | option foo = [,1 2,, 3,]; + | -^ expected expression + | | + | first delimiter is here + +error: unexpected trailing `,` in array expression + --> testdata/parser/lists.proto:23:23 + | +23 | option foo = [,1 2,, 3,]; + | ^ + +error: unexpected `;` in array expression + --> testdata/parser/lists.proto:24:16 + | +24 | option foo = [1; 2; 3]; + | ^ expected `,` + +error: unexpected `;` in array expression + --> testdata/parser/lists.proto:24:19 + | +24 | option foo = [1; 2; 3]; + | ^ expected `,` + +error: unexpected message expression in array expression + --> testdata/parser/lists.proto:25:17 + | +25 | option foo = [a {}]; + | ^^ expected `,` + | | + | note: assuming a missing `,` here + +error: unexpected leading `,` in array expression + --> testdata/parser/lists.proto:26:15 + | +26 | option foo = [,]; + | ^ expected expression + +error: unexpected extra `;` in message expression + --> testdata/parser/lists.proto:31:16 + | +31 | bar: 2;; + | -^ expected message field value + | | + | first delimiter is here + +error: unexpected leading `;` in message expression + --> testdata/parser/lists.proto:34:15 + | +34 | option foo = {;bar: 1}; + | ^ expected message field value + +error: unexpected extra `;` in message expression + --> testdata/parser/lists.proto:35:22 + | +35 | option foo = {baz: 1;; baz: 1}; + | -^ expected message field value + | | + | first delimiter is here + +error: unexpected extra `;` in message expression + --> testdata/parser/lists.proto:36:22 + | +36 | option foo = {baz: 1,; baz: 1;}; + | -^ expected message field value + | | + | first delimiter is here + +error: unexpected leading `;` in message expression + --> testdata/parser/lists.proto:38:10 + | +38 | bar {;} + | ^ expected message field value + +error: unexpected leading `,` in message expression + --> testdata/parser/lists.proto:39:10 + | +39 | bar {,} + | ^ expected message field value + +error: unexpected type name in method parameter list + --> testdata/parser/lists.proto:45:17 + | +45 | rpc Foo(int int) returns (int int); + | ^^^ expected `,` + | | + | note: assuming a missing `,` here + +error: unexpected type name in method return type + --> testdata/parser/lists.proto:45:35 + | +45 | rpc Foo(int int) returns (int int); + | ^^^ expected `,` + | | + | note: assuming a missing `,` here + +error: unexpected `;` in method parameter list + --> testdata/parser/lists.proto:46:16 + | +46 | rpc Foo(int; int) returns (int, int,); + | ^ expected `,` + +error: unexpected trailing `,` in method return type + --> testdata/parser/lists.proto:46:40 + | +46 | rpc Foo(int; int) returns (int, int,); + | ^ + +error: unexpected leading `,` in method parameter list + --> testdata/parser/lists.proto:47:13 + | +47 | rpc Foo(, int, int) returns (int,, int,); + | ^ expected type + +error: unexpected extra `,` in method return type + --> testdata/parser/lists.proto:47:38 + | +47 | rpc Foo(, int, int) returns (int,, int,); + | -^ expected type + | | + | first delimiter is here + +error: unexpected trailing `,` in method return type + --> testdata/parser/lists.proto:47:43 + | +47 | rpc Foo(, int, int) returns (int,, int,); + | ^ + +error: unexpected `;` in method parameter list + --> testdata/parser/lists.proto:48:13 + | +48 | rpc Foo(;) returns (,); + | ^ expected type + +error: unexpected leading `,` in method return type + --> testdata/parser/lists.proto:48:25 + | +48 | rpc Foo(;) returns (,); + | ^ expected type + +error: unexpected type name in type parameters + --> testdata/parser/lists.proto:55:13 + | +55 | map x; + | ^^^ expected `,` + | | + | note: assuming a missing `,` here + +error: unexpected extra `,` in type parameters + --> testdata/parser/lists.proto:56:13 + | +56 | map x; + | -^ expected type + | | + | first delimiter is here + +error: unexpected leading `,` in type parameters + --> testdata/parser/lists.proto:57:9 + | +57 | map<,> x; + | ^ expected type + +error: unexpected leading `,` in type parameters + --> testdata/parser/lists.proto:59:9 + | +59 | map<,int, int> x; + | ^ expected type + +error: unexpected `;` in type parameters + --> testdata/parser/lists.proto:60:12 + | +60 | map x; + | ^ expected `,` + +error: unexpected trailing `,` in type parameters + --> testdata/parser/lists.proto:63:12 + | +63 | int, + | ^ + +encountered 33 errors diff --git a/experimental/parser/testdata/parser/lists.proto.yaml b/experimental/parser/testdata/parser/lists.proto.yaml new file mode 100644 index 00000000..c9e5e638 --- /dev/null +++ b/experimental/parser/testdata/parser/lists.proto.yaml @@ -0,0 +1,220 @@ +decls: + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.array: {} + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.array.elements: [{ literal.int_value: 1 }] + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.array.elements: [{ literal.int_value: 1 }, { literal.int_value: 2 }] + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.array.elements: + - literal.int_value: 1 + - literal.int_value: 2 + - literal.int_value: 3 + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.array.elements: + - literal.int_value: 1 + - literal.int_value: 2 + - literal.int_value: 3 + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.array.elements: + - literal.int_value: 1 + - literal.int_value: 2 + - literal.int_value: 3 + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.array.elements: + - literal.int_value: 1 + - literal.int_value: 2 + - literal.int_value: 3 + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.array.elements: + - literal.int_value: 1 + - literal.int_value: 2 + - literal.int_value: 3 + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.array.elements: [{ path.components: [{ ident: "a" }] }, { dict: {} }] + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.array: {} + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.dict.entries: + - key.path.components: [{ ident: "bar" }] + value.literal.int_value: 1 + - key.path.components: [{ ident: "bar" }] + value.dict.entries: + - key.path.components: [{ ident: "bar" }] + value.literal.int_value: 2 + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.dict.entries: + - key.path.components: [{ ident: "bar" }] + value.literal.int_value: 1 + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.dict.entries: + - key.path.components: [{ ident: "baz" }] + value.literal.int_value: 1 + - key.path.components: [{ ident: "baz" }] + value.literal.int_value: 1 + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.dict.entries: + - key.path.components: [{ ident: "baz" }] + value.literal.int_value: 1 + - key.path.components: [{ ident: "baz" }] + value.literal.int_value: 1 + - def: + kind: KIND_OPTION + name.components: [{ ident: "foo" }] + value.dict.entries: + - key.path.components: [{ ident: "bar" }] + value.dict: {} + - key.path.components: [{ ident: "bar" }] + value.dict: {} + - def: + kind: KIND_SERVICE + name.components: [{ ident: "S" }] + body.decls: + - def: + kind: KIND_METHOD + name.components: [{ ident: "Foo" }] + signature: + inputs: [{ path.components: [{ ident: "int" }] }] + outputs: [{ path.components: [{ ident: "int" }] }] + - def: + kind: KIND_METHOD + name.components: [{ ident: "Foo" }] + signature: + inputs: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + outputs: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + - def: + kind: KIND_METHOD + name.components: [{ ident: "Foo" }] + signature: + inputs: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + outputs: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + - def: + kind: KIND_METHOD + name.components: [{ ident: "Foo" }] + signature: + inputs: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + outputs: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + - def: + kind: KIND_METHOD + name.components: [{ ident: "Foo" }] + signature: + inputs: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + outputs: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + - def: + kind: KIND_METHOD + name.components: [{ ident: "Foo" }] + signature: {} + - def: + kind: KIND_METHOD + name.components: [{ ident: "Foo" }] + signature: {} + - def: + kind: KIND_MESSAGE + name.components: [{ ident: "M" }] + body.decls: + - def: + kind: KIND_FIELD + name.components: [{ ident: "x" }] + type.generic: + path.components: [{ ident: "map" }] + args: [{ path.components: [{ ident: "int" }] }] + - def: + kind: KIND_FIELD + name.components: [{ ident: "x" }] + type.generic: + path.components: [{ ident: "map" }] + args: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + - def: + kind: KIND_FIELD + name.components: [{ ident: "x" }] + type.generic: + path.components: [{ ident: "map" }] + args: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + - def: + kind: KIND_FIELD + name.components: [{ ident: "x" }] + type.generic: + path.components: [{ ident: "map" }] + args: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + - def: + kind: KIND_FIELD + name.components: [{ ident: "x" }] + type.generic.path.components: [{ ident: "map" }] + - def: + kind: KIND_FIELD + name.components: [{ ident: "x" }] + type.generic.path.components: [{ ident: "map" }] + - def: + kind: KIND_FIELD + name.components: [{ ident: "x" }] + type.generic: + path.components: [{ ident: "map" }] + args: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + - def: + kind: KIND_FIELD + name.components: [{ ident: "x" }] + type.generic: + path.components: [{ ident: "map" }] + args: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] + - def: + kind: KIND_FIELD + name.components: [{ ident: "x" }] + type.generic: + path.components: [{ ident: "map" }] + args: + - path.components: [{ ident: "int" }] + - path.components: [{ ident: "int" }] diff --git a/experimental/parser/testdata/parser/type/generic.proto.stderr.txt b/experimental/parser/testdata/parser/type/generic.proto.stderr.txt index 36ebc8b3..5a2c5f1a 100644 --- a/experimental/parser/testdata/parser/type/generic.proto.stderr.txt +++ b/experimental/parser/testdata/parser/type/generic.proto.stderr.txt @@ -1,7 +1,9 @@ -error: unexpected tokens in type parameters +error: unexpected type name in type parameters --> testdata/parser/type/generic.proto:37:13 | 37 | set x13 = 13; - | ^^^ expected type + | ^^^ expected `,` + | | + | note: assuming a missing `,` here encountered 1 error diff --git a/experimental/parser/testdata/parser/type/generic.proto.yaml b/experimental/parser/testdata/parser/type/generic.proto.yaml index cda9fd95..cc7d941d 100644 --- a/experimental/parser/testdata/parser/type/generic.proto.yaml +++ b/experimental/parser/testdata/parser/type/generic.proto.yaml @@ -134,7 +134,9 @@ decls: name.components: [{ ident: "x13" }] type.generic: path.components: [{ ident: "set" }] - args: [{ path.components: [{ ident: "Foo" }] }] + args: + - path.components: [{ ident: "Foo" }] + - path.components: [{ ident: "bar" }] value.literal.int_value: 13 - def: kind: KIND_FIELD diff --git a/experimental/report/span.go b/experimental/report/span.go index bb73a922..3afa71ee 100644 --- a/experimental/report/span.go +++ b/experimental/report/span.go @@ -21,6 +21,7 @@ import ( "strings" "sync" "unicode" + "unicode/utf8" "github.com/bufbuild/protocompile/internal/ext/slicesx" "github.com/bufbuild/protocompile/internal/iter" @@ -80,6 +81,59 @@ func (s Span) Span() Span { return s } +// RuneRange slices this span along the given rune indices. +// +// For example, s.RuneRange(0, 2) returns at most the first two runes of the +// span. +// +// Unlike slicing into a string, out-of-bounds indices are snapped to the +// boundaries of the string, and negative indices are taken from the back of +// the span. For example, s.RuneRange(-2, -1) is the final rune of the span +// (or an empty span, if s is empty). +func (s Span) RuneRange(i, j int) Span { + i = runeIdxToByteOffset(s.Text(), i) + j = runeIdxToByteOffset(s.Text(), j) + if i > j { + i, j = j, i + } + return s.File.Span(i+s.Start, j+s.Start) +} + +// Rune is a shorthand for RuneRange(i, i+1) or RuneRange(i-1, i), depending +// on the sign of i. +func (s Span) Rune(i int) Span { + if i < 0 { + return s.RuneRange(i-1, i) + } + return s.RuneRange(i, i+1) +} + +// runeIdxToByteOffset converts a rune index into s into a byte offset. +// +// If i is negative, this produces the index of the -ith rune from the end of +// the string. +// +// If i > len(s) or i < -len(s), returns len(s) or 0, respectively; i is always +// valid to index into s with. +func runeIdxToByteOffset(s string, i int) int { + for i < 0 { + i++ + if i == 0 || s == "" { + return len(s) + } + _, j := utf8.DecodeLastRuneInString(s) + s = s[:len(s)-j] + } + + for j := range s { + if i == 0 { + return j + } + i-- + } + return len(s) +} + // String implements [string.Stringer]. func (s Span) String() string { start := s.StartLoc()