Skip to content

Commit

Permalink
make comma-list parsing really robust
Browse files Browse the repository at this point in the history
  • Loading branch information
mcy committed Dec 19, 2024
1 parent 0c5adcf commit 29daeb2
Show file tree
Hide file tree
Showing 12 changed files with 701 additions and 30 deletions.
5 changes: 5 additions & 0 deletions experimental/ast/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ type Option struct {
Value ExprAny
}

// Span implements [report.Spanner].
func (o Option) Span() report.Span {
return report.Join(o.Path, o.Equals, o.Value)
}

type rawOption struct {
path rawPath
equals token.ID
Expand Down
3 changes: 3 additions & 0 deletions experimental/parser/diagnostics_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ func (e errUnexpected) Diagnose(d *report.Diagnostic) {
got := e.got
if got == nil {
got = taxa.Classify(e.what)
if got == taxa.Unknown {
got = "tokens"
}
}

var message report.DiagnosticOption
Expand Down
7 changes: 7 additions & 0 deletions experimental/parser/parse_decl.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ type exprComma struct {
comma token.Token
}

func (e exprComma) Span() report.Span {
return e.expr.Span()
}

// parseDecl parses any Protobuf declaration.
//
// This function will always advance cursor if it is not empty.
Expand Down Expand Up @@ -303,6 +307,7 @@ func parseRange(p *parser, c *token.Cursor) ast.DeclRange {

return expr, !expr.Nil()
},
canStart: canStartExpr,
}.iter(func(expr ast.ExprAny, comma token.Token) bool {
exprs = append(exprs, exprComma{expr, comma})
return true
Expand Down Expand Up @@ -343,6 +348,7 @@ func parseTypeList(p *parser, parens token.Token, types ast.TypeList, in taxa.No
ty := parseType(p, c, in.In())
return ty, !ty.Nil()
},
canStart: canStartPath,
}.appendTo(types)
}

Expand Down Expand Up @@ -397,6 +403,7 @@ func parseOptions(p *parser, brackets token.Token, _ taxa.Noun) ast.CompactOptio
}
return option, !option.Value.Nil()
},
canStart: canStartPath,
}.appendTo(options)

return options
Expand Down
76 changes: 73 additions & 3 deletions experimental/parser/parse_delimited.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
)

// delimited is a mechanism for parsing a punctuation-delimited list.
type delimited[T any] struct {
type delimited[T report.Spanner] struct {
p *parser
c *token.Cursor

Expand All @@ -46,6 +46,9 @@ type delimited[T any] struct {
//
// This function is expected to exhaust
parse func(*token.Cursor) (T, bool)

// Used for skipping tokens until we can begin parsing.
canStart func(token.Token) bool
}

func (d delimited[T]) appendTo(commas ast.Commas[T]) {
Expand All @@ -65,9 +68,11 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) {
}

var delim token.Token
var latest int // The index of the most recently seen delimiter.

if next := d.c.Peek(); slices.Contains(d.delims, next.Text()) {
_ = d.c.Pop()
latest = slices.Index(d.delims, next.Text())

d.p.Error(errUnexpected{
what: next,
Expand All @@ -77,21 +82,72 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) {
})
}

var needDelim bool
var mark token.CursorMark
for !d.c.Done() {
ensureProgress(d.c, &mark)

// Set if we should not diagnose a missing comma, because there was
// garbage in front of the call to parse().
var badPrefix bool
if !d.canStart(d.c.Peek()) {
first := d.c.Pop()
var last token.Token
for !d.c.Done() && !d.canStart(d.c.Peek()) {
last = d.c.Pop()
}

want := d.what.AsSet()
if needDelim && delim.Nil() {
want = d.delimNouns()
}

what := report.Spanner(first)
if !last.Nil() {
what = report.Join(first, last)
}

badPrefix = true
d.p.Error(errUnexpected{
what: what,
where: d.in.In(),
want: want,
})
}

v, ok := d.parse(d.c)
if !ok {
break
}

if !badPrefix && needDelim && delim.Nil() {
d.p.Error(errUnexpected{
what: v,
where: d.in.In(),
want: d.delimNouns(),
}).Apply(
// TODO: this should be a suggestion.
report.Snippetf(v.Span().Rune(0), "note: assuming a missing `%s` here", d.delims[latest]),
)
}
needDelim = d.required

// Pop as many delimiters as we can.
delim = token.Nil
for slices.Contains(d.delims, d.c.Peek().Text()) {
for {
which := slices.Index(d.delims, d.c.Peek().Text())
if which < 0 {
break
}
latest = which

next := d.c.Pop()
if delim.Nil() {
delim = next
continue
}

// Diagnose all extra delimiters after the first.
d.p.Error(errUnexpected{
what: next,
where: d.in.In(),
Expand All @@ -100,7 +156,13 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) {
}).Apply(report.Snippetf(delim, "first delimiter is here"))
}

if !yield(v, delim) || (d.required && delim.Nil()) {
if !yield(v, delim) {
break
}

if delim.Nil() && d.required && !d.exhaust {
// In non-exhaust mode, if we miss a required comma just bail.
// Otherwise, go again to parse another thing.
break
}
}
Expand All @@ -121,3 +183,11 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) {
})
}
}

func (d delimited[T]) delimNouns() taxa.Set {
var set taxa.Set
for _, delim := range d.delims {
set = set.With(taxa.Punct(delim, false))
}
return set
}
63 changes: 39 additions & 24 deletions experimental/parser/parse_expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,31 +60,40 @@ func parseExprInfix(p *parser, c *token.Cursor, where taxa.Place, lhs ast.ExprAn
}).AsAny()

case "{", "<", "[": // This is for colon-less, array or dict-valued fields.
if !next.IsLeaf() && lhs.Kind() != ast.ExprKindField {
// The previous expression cannot also be a key-value pair, since
// this messes with parsing of dicts, which are not comma-separated.
//
// In other words, consider the following, inside of an expression
// context:
if next.IsLeaf() {
break
}

// The previous expression cannot also be a key-value pair, since
// this messes with parsing of dicts, which are not comma-separated.
//
// In other words, consider the following, inside of an expression
// context:
//
// foo: bar { ... }
//
// We want to diagnose the { as unexpected here, and it is better
// for that to be done by whatever is calling parseExpr since it
// will have more context.
//
// We also do not allow this inside of arrays, because we want
// [a {}] to parse as [a, {}] not [a: {}].
if lhs.Kind() == ast.ExprKindField || where.Subject() == taxa.Array {
break
}

return p.NewExprField(ast.ExprFieldArgs{
Key: lhs,
// Why not call parseExprSolo? Suppose the following
// (invalid) production:
//
// foo: bar { ... }
// foo { ... } to { ... }
//
// We want to diagnose the { as unexpected here, and it is better
// for that to be done by whatever is calling parseExpr since it
// will have more context.
return p.NewExprField(ast.ExprFieldArgs{
Key: lhs,
// Why not call parseExprSolo? Suppose the following
// (invalid) production:
//
// foo { ... } to { ... }
//
// Calling parseExprInfix will cause this to be parsed
// as a range expression, which will be diagnosed when
// we legalize.
Value: parseExprInfix(p, c, where, ast.ExprAny{}, prec+1),
}).AsAny()
}
// Calling parseExprInfix will cause this to be parsed
// as a range expression, which will be diagnosed when
// we legalize.
Value: parseExprInfix(p, c, where, ast.ExprAny{}, prec+1),
}).AsAny()
}
}

Expand Down Expand Up @@ -157,7 +166,7 @@ func parseExprSolo(p *parser, c *token.Cursor, where taxa.Place) ast.ExprAny {
elems := delimited[ast.ExprAny]{
p: p,
c: body.Children(),
what: taxa.Expr,
what: taxa.DictField,
in: in,

delims: []string{",", ";"},
Expand All @@ -168,9 +177,15 @@ func parseExprSolo(p *parser, c *token.Cursor, where taxa.Place) ast.ExprAny {
expr := parseExpr(p, c, in.In())
return expr, !expr.Nil()
},
canStart: canStartExpr,
}

if next.Text() == "[" {
elems.what = taxa.Expr
elems.delims = []string{","}
elems.required = true
elems.trailing = false

array := p.NewExprArray(body)
elems.appendTo(array)
return array.AsAny()
Expand Down
1 change: 1 addition & 0 deletions experimental/parser/parse_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ func parseTypeImpl(p *parser, c *token.Cursor, where taxa.Place, pathAfter bool)
ty := parseType(p, c, taxa.TypeParams.In())
return ty, !ty.Nil()
},
canStart: canStartPath,
}.appendTo(generic.Args())

ty = generic.AsAny()
Expand Down
65 changes: 65 additions & 0 deletions experimental/parser/testdata/parser/lists.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright 2020-2024 Buf Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// This test exercises every delimited list production in the language.

option foo = [];
option foo = [1];
option foo = [1, 2];
option foo = [1, 2 3];
option foo = [1, 2,, 3];
option foo = [1, 2,, 3,];
option foo = [,1 2,, 3,];
option foo = [1; 2; 3];
option foo = [a {}];
option foo = [,];

option foo = {
bar: 1
bar {
bar: 2;;
}
};
option foo = {;bar: 1};
option foo = {baz: 1;; baz: 1};
option foo = {baz: 1,; baz: 1;};
option foo = {
bar {;}
bar {,}
};

service S {
rpc Foo(int) returns (int);
rpc Foo(int, int) returns (int, int);
rpc Foo(int int) returns (int int);
rpc Foo(int; int) returns (int, int,);
rpc Foo(, int, int) returns (int,, int,);
rpc Foo(;) returns (,);
rpc Foo() returns ();
}

message M {
map<int> x;
map<int, int> x;
map<int int> x;
map<int,, int> x;
map<,> x;
map<> x;
map<,int, int> x;
map<int; int> x;
map<
int,
int,
> x;
}
Loading

0 comments on commit 29daeb2

Please sign in to comment.