Auto merge of #75779 - scileo:fix-issue-75492, r=petrochenkov

bors · bors · commit 36b0d7e25769 · 2020-08-30T21:03:06.000Z
Improve error message when typo is made in format! The expansion of the format! built-in macro is roughly done in two steps: - the format expression is parsed, the arguments are parsed, - the format expression is checked to be a string literal, code is expanded. The problem is that the expression parser can eat too much tokens, which invalidates the parsing of the next format arguments. As the format expression check happens next, the error emitted concerns the format arguments, whereas the problem is about the format expression. This PR contains two commits. The first one actually checks that the formatting expression is a string literal before raising any error about the formatting arguments, and the second one contains some simple heuristics which allow to suggest, when the format expression is followed by a dot instead of a comma, to suggest to replace the dot with a comma. This pull request should fix #75492. Note: this is my first non-doc contribution to the rust ecosystem. Feel free to make any comment about my code, or whatever. I'll be very happy to fix it :)
diff --git a/compiler/rustc_builtin_macros/src/format.rs b/compiler/rustc_builtin_macros/src/format.rs
@@ -135,7 +135,26 @@ fn parse_args<'a>(
         return Err(ecx.struct_span_err(sp, "requires at least a format string argument"));
     }
 
-    let fmtstr = p.parse_expr()?;
+    let first_token = &p.token;
+    let fmtstr = match first_token.kind {
+        token::TokenKind::Literal(token::Lit {
+            kind: token::LitKind::Str | token::LitKind::StrRaw(_),
+            ..
+        }) => {
+            // If the first token is a string literal, then a format expression
+            // is constructed from it.
+            //
+            // This allows us to properly handle cases when the first comma
+            // after the format string is mistakenly replaced with any operator,
+            // which cause the expression parser to eat too much tokens.
+            p.parse_literal_maybe_minus()?
+        }
+        _ => {
+            // Otherwise, we fall back to the expression parser.
+            p.parse_expr()?
+        }
+    };
+
     let mut first = true;
     let mut named = false;
 
diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
@@ -1480,7 +1480,7 @@ impl<'a> Parser<'a> {
 
     /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`).
     /// Keep this in sync with `Token::can_begin_literal_maybe_minus`.
-    pub(super) fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P<Expr>> {
+    pub fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P<Expr>> {
         maybe_whole_expr!(self);
 
         let lo = self.token.span;
diff --git a/src/test/ui/fmt/incorrect-first-separator.rs b/src/test/ui/fmt/incorrect-first-separator.rs
@@ -0,0 +1,22 @@
+// Allows to track issue #75492:
+// https://github.com/rust-lang/rust/issues/75492
+
+use std::iter;
+
+fn main() {
+    format!("A number: {}". iter::once(42).next().unwrap());
+    //~^ ERROR expected token: `,`
+
+    // Other kind of types are also checked:
+
+    format!("A number: {}" / iter::once(42).next().unwrap());
+    //~^ ERROR expected token: `,`
+
+    format!("A number: {}"; iter::once(42).next().unwrap());
+    //~^ ERROR expected token: `,`
+
+    // Note: this character is an COMBINING COMMA BELOW unicode char
+    format!("A number: {}" ̦ iter::once(42).next().unwrap());
+    //~^ ERROR expected token: `,`
+    //~^^ ERROR unknown start of token: \u{326}
+}
diff --git a/src/test/ui/fmt/incorrect-first-separator.stderr b/src/test/ui/fmt/incorrect-first-separator.stderr
@@ -0,0 +1,32 @@
+error: unknown start of token: \u{326}
+  --> $DIR/incorrect-first-separator.rs:19:28
+   |
+LL |     format!("A number: {}" ̦ iter::once(42).next().unwrap());
+   |                            ^
+
+error: expected token: `,`
+  --> $DIR/incorrect-first-separator.rs:7:27
+   |
+LL |     format!("A number: {}". iter::once(42).next().unwrap());
+   |                           ^ expected `,`
+
+error: expected token: `,`
+  --> $DIR/incorrect-first-separator.rs:12:28
+   |
+LL |     format!("A number: {}" / iter::once(42).next().unwrap());
+   |                            ^ expected `,`
+
+error: expected token: `,`
+  --> $DIR/incorrect-first-separator.rs:15:27
+   |
+LL |     format!("A number: {}"; iter::once(42).next().unwrap());
+   |                           ^ expected `,`
+
+error: expected token: `,`
+  --> $DIR/incorrect-first-separator.rs:19:30
+   |
+LL |     format!("A number: {}" ̦ iter::once(42).next().unwrap());
+   |                             ^^^^ expected `,`
+
+error: aborting due to 5 previous errors
+