bufbuild · mcy · Jan 9, 2025 · Dec 19, 2024 · Jan 7, 2025 · Jan 7, 2025
diff --git a/experimental/ast/options.go b/experimental/ast/options.go
@@ -51,6 +51,11 @@ type Option struct {
 	Value  ExprAny
 }
 
+// Span implements [report.Spanner].
+func (o Option) Span() report.Span {
+	return report.Join(o.Path, o.Equals, o.Value)
+}
+
 type rawOption struct {
 	path   rawPath
 	equals token.ID

diff --git a/experimental/parser/diagnostics_internal.go b/experimental/parser/diagnostics_internal.go
@@ -42,6 +42,9 @@ func (e errUnexpected) Diagnose(d *report.Diagnostic) {
 	got := e.got
 	if got == nil {
 		got = taxa.Classify(e.what)
+		if got == taxa.Unknown {
+			got = "tokens"
+		}
 	}
 
 	var message report.DiagnosticOption

diff --git a/experimental/parser/parse_decl.go b/experimental/parser/parse_decl.go
@@ -27,6 +27,10 @@ type exprComma struct {
 	comma token.Token
 }
 
+func (e exprComma) Span() report.Span {
+	return e.expr.Span()
+}
+
 // parseDecl parses any Protobuf declaration.
 //
 // This function will always advance cursor if it is not empty.
@@ -290,6 +294,7 @@ func parseRange(p *parser, c *token.Cursor) ast.DeclRange {
 	// is empty, or if the first comma occurs without seeing an =, we can choose
 	// to parse this as an array, instead.
 	if !canStartOptions(c.Peek()) {
+		first := true
 		delimited[ast.ExprAny]{
 			p: p, c: c,
 			what: taxa.Expr,
@@ -298,11 +303,28 @@ func parseRange(p *parser, c *token.Cursor) ast.DeclRange {
 			required: true,
 			exhaust:  false,
 			parse: func(c *token.Cursor) (ast.ExprAny, bool) {
+				first = false
 				expr := parseExpr(p, c, in.In())
 				badExpr = expr.IsZero()
 
 				return expr, !expr.IsZero()
 			},
+			start: canStartExpr,
+			stop: func(t token.Token) bool {
+				if t.Text() == ";" || t.Text() == "[" {
+					return true
+				}
+
+				// After the first element, stop if we see an identifier
+				// coming up. This is for a case like this:
+				//
+				// reserved 1, 2
+				// message Foo {}
+				//
+				// If we don't do this, message will be interpreted as an
+				// expression.
+				return !first && t.Kind() == token.Ident
+			},
 		}.iter(func(expr ast.ExprAny, comma token.Token) bool {
 			exprs = append(exprs, exprComma{expr, comma})
 			return true
@@ -343,6 +365,7 @@ func parseTypeList(p *parser, parens token.Token, types ast.TypeList, in taxa.No
 			ty := parseType(p, c, in.In())
 			return ty, !ty.IsZero()
 		},
+		start: canStartPath,
 	}.appendTo(types)
 }
 
@@ -397,6 +420,7 @@ func parseOptions(p *parser, brackets token.Token, _ taxa.Noun) ast.CompactOptio
 			}
 			return option, !option.Value.IsZero()
 		},
+		start: canStartPath,
 	}.appendTo(options)
 
 	return options

diff --git a/experimental/parser/parse_delimited.go b/experimental/parser/parse_delimited.go
@@ -25,7 +25,7 @@ import (
 )
 
 // delimited is a mechanism for parsing a punctuation-delimited list.
-type delimited[T any] struct {
+type delimited[T report.Spanner] struct {
 	p *parser
 	c *token.Cursor
 
@@ -46,6 +46,12 @@ type delimited[T any] struct {
 	//
 	// This function is expected to exhaust
 	parse func(*token.Cursor) (T, bool)
+
+	// Used for skipping tokens until we can begin parsing.
+	//
+	// start is called until we see a token that returns true for it. However,
+	// if stop is not nil and it returns true for that token, parsing stops.
+	start, stop func(token.Token) bool
 }
 
 func (d delimited[T]) appendTo(commas ast.Commas[T]) {
@@ -65,9 +71,11 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) {
 	}
 
 	var delim token.Token
+	var latest int // The index of the most recently seen delimiter.
 
 	if next := d.c.Peek(); slices.Contains(d.delims, next.Text()) {
 		_ = d.c.Pop()
+		latest = slices.Index(d.delims, next.Text())
 
 		d.p.Error(errUnexpected{
 			what:  next,
@@ -77,21 +85,79 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) {
 		})
 	}
 
+	var needDelim bool
+	var mark token.CursorMark
 	for !d.c.Done() {
+		ensureProgress(d.c, &mark)
+
+		// Set if we should not diagnose a missing comma, because there was
+		// garbage in front of the call to parse().
+		var badPrefix bool
+		if !d.start(d.c.Peek()) {
+			if d.stop != nil && d.stop(d.c.Peek()) {
+				break
+			}
+
+			first := d.c.Pop()
+			var last token.Token
+			for !d.c.Done() && !d.start(d.c.Peek()) {
+				if d.stop != nil && d.stop(d.c.Peek()) {
+					break
+				}
+				last = d.c.Pop()
+			}
+
+			want := d.what.AsSet()
+			if needDelim && delim.IsZero() {
+				want = d.delimNouns()
+			}
+
+			what := report.Spanner(first)
+			if !last.IsZero() {
+				what = report.Join(first, last)
+			}
+
+			badPrefix = true
+			d.p.Error(errUnexpected{
+				what:  what,
+				where: d.in.In(),
+				want:  want,
+			})
+		}
+
 		v, ok := d.parse(d.c)
 		if !ok {
 			break
 		}
 
+		if !badPrefix && needDelim && delim.IsZero() {
+			d.p.Error(errUnexpected{
+				what:  v,
+				where: d.in.In(),
+				want:  d.delimNouns(),
+			}).Apply(
+				// TODO: this should be a suggestion.
+				report.Snippetf(v.Span().Rune(0), "note: assuming a missing `%s` here", d.delims[latest]),
+			)
+		}
+		needDelim = d.required
+
 		// Pop as many delimiters as we can.
 		delim = token.Zero
-		for slices.Contains(d.delims, d.c.Peek().Text()) {
+		for {
+			which := slices.Index(d.delims, d.c.Peek().Text())
+			if which < 0 {
+				break
+			}
+			latest = which
+
 			next := d.c.Pop()
 			if delim.IsZero() {
 				delim = next
 				continue
 			}
 
+			// Diagnose all extra delimiters after the first.
 			d.p.Error(errUnexpected{
 				what:  next,
 				where: d.in.In(),
@@ -100,9 +166,18 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) {
 			}).Apply(report.Snippetf(delim, "first delimiter is here"))
 		}
 
-		if !yield(v, delim) || (d.required && delim.IsZero()) {
+		if !yield(v, delim) {
 			break
 		}
+
+		// In non-exhaust mode, if we miss a required comma, bail if we have
+		// reached a stop token, or if we don't have a stop predicate.
+		// Otherwise, go again to parse another thing.
+		if delim.IsZero() && d.required && !d.exhaust {
+			if d.stop == nil || d.stop(d.c.Peek()) {
+				break
+			}
+		}
 	}
 
 	switch {
@@ -121,3 +196,11 @@ func (d delimited[T]) iter(yield func(value T, delim token.Token) bool) {
 		})
 	}
 }
+
+func (d delimited[T]) delimNouns() taxa.Set {
+	var set taxa.Set
+	for _, delim := range d.delims {
+		set = set.With(taxa.Punct(delim, false))
+	}
+	return set
+}
diff --git a/experimental/parser/parse_expr.go b/experimental/parser/parse_expr.go
@@ -60,31 +60,40 @@ func parseExprInfix(p *parser, c *token.Cursor, where taxa.Place, lhs ast.ExprAn
 				}).AsAny()
 
 			case "{", "<", "[": // This is for colon-less, array or dict-valued fields.
-				if !next.IsLeaf() && lhs.Kind() != ast.ExprKindField {
-					// The previous expression cannot also be a key-value pair, since
-					// this messes with parsing of dicts, which are not comma-separated.
-					//
-					// In other words, consider the following, inside of an expression
-					// context:
+				if next.IsLeaf() {
+					break
+				}
+
+				// The previous expression cannot also be a key-value pair, since
+				// this messes with parsing of dicts, which are not comma-separated.
+				//
+				// In other words, consider the following, inside of an expression
+				// context:
+				//
+				// foo: bar { ... }
+				//
+				// We want to diagnose the { as unexpected here, and it is better
+				// for that to be done by whatever is calling parseExpr since it
+				// will have more context.
+				//
+				// We also do not allow this inside of arrays, because we want
+				// [a {}] to parse as [a, {}] not [a: {}].
+				if lhs.Kind() == ast.ExprKindField || where.Subject() == taxa.Array {
+					break
+				}
+
+				return p.NewExprField(ast.ExprFieldArgs{
+					Key: lhs,
+					// Why not call parseExprSolo? Suppose the following
+					// (invalid) production:
 					//
-					// foo: bar { ... }
+					// foo { ... } to { ... }
 					//
-					// We want to diagnose the { as unexpected here, and it is better
-					// for that to be done by whatever is calling parseExpr since it
-					// will have more context.
-					return p.NewExprField(ast.ExprFieldArgs{
-						Key: lhs,
-						// Why not call parseExprSolo? Suppose the following
-						// (invalid) production:
-						//
-						// foo { ... } to { ... }
-						//
-						// Calling parseExprInfix will cause this to be parsed
-						// as a range expression, which will be diagnosed when
-						// we legalize.
-						Value: parseExprInfix(p, c, where, ast.ExprAny{}, prec+1),
-					}).AsAny()
-				}
+					// Calling parseExprInfix will cause this to be parsed
+					// as a range expression, which will be diagnosed when
+					// we legalize.
+					Value: parseExprInfix(p, c, where, ast.ExprAny{}, prec+1),
+				}).AsAny()
 			}
 		}
 
@@ -157,7 +166,7 @@ func parseExprSolo(p *parser, c *token.Cursor, where taxa.Place) ast.ExprAny {
 		elems := delimited[ast.ExprAny]{
 			p:    p,
 			c:    body.Children(),
-			what: taxa.Expr,
+			what: taxa.DictField,
 			in:   in,
 
 			delims:   []string{",", ";"},
@@ -168,9 +177,15 @@ func parseExprSolo(p *parser, c *token.Cursor, where taxa.Place) ast.ExprAny {
 				expr := parseExpr(p, c, in.In())
 				return expr, !expr.IsZero()
 			},
+			start: canStartExpr,
 		}
 
 		if next.Text() == "[" {
+			elems.what = taxa.Expr
+			elems.delims = []string{","}
+			elems.required = true
+			elems.trailing = false
+
 			array := p.NewExprArray(body)
 			elems.appendTo(array)
 			return array.AsAny()

diff --git a/experimental/parser/parse_type.go b/experimental/parser/parse_type.go
@@ -185,6 +185,7 @@ func parseTypeImpl(p *parser, c *token.Cursor, where taxa.Place, pathAfter bool)
 				ty := parseType(p, c, taxa.TypeParams.In())
 				return ty, !ty.IsZero()
 			},
+			start: canStartPath,
 		}.appendTo(generic.Args())
 
 		ty = generic.AsAny()