Skip to content

Commit d6a4298

Browse files
Rollup merge of #129899 - veera-sivarajan:fix-97793-pr-final, r=chenyukang
Add Suggestions for Misspelled Keywords Fixes #97793 This PR detects misspelled keywords using two heuristics: 1. Lowercasing the unexpected identifier. 2. Using edit distance to find a keyword similar to the unexpected identifier. However, it does not detect each and every misspelled keyword to minimize false positives and ambiguities. More details about the implementation can be found in the comments.
2 parents 6dd07e4 + 14e86eb commit d6a4298

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+702
-12
lines changed

compiler/rustc_parse/messages.ftl

+6
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,7 @@ parse_invalid_char_in_escape_msg = invalid character in {$is_hex ->
381381
*[false] unicode
382382
} escape
383383
384+
384385
parse_invalid_comparison_operator = invalid comparison operator `{$invalid}`
385386
.use_instead = `{$invalid}` is not a valid comparison operator, use `{$correct}`
386387
.spaceship_operator_invalid = `<=>` is not a valid comparison operator, use `std::cmp::Ordering`
@@ -581,6 +582,11 @@ parse_missing_trait_in_trait_impl = missing trait in a trait impl
581582
.suggestion_add_trait = add a trait here
582583
.suggestion_remove_for = for an inherent impl, drop this `for`
583584
585+
parse_misspelled_kw = {$is_incorrect_case ->
586+
[true] write keyword `{$similar_kw}` in lowercase
587+
*[false] there is a keyword `{$similar_kw}` with a similar name
588+
}
589+
584590
parse_modifier_lifetime = `{$modifier}` may only modify trait bounds, not lifetime bounds
585591
.suggestion = remove the `{$modifier}`
586592

compiler/rustc_parse/src/parser/diagnostics.rs

+82-3
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@ use rustc_errors::{
1919
Subdiagnostic,
2020
};
2121
use rustc_session::errors::ExprParenthesesNeeded;
22+
use rustc_span::edit_distance::find_best_match_for_name;
2223
use rustc_span::source_map::Spanned;
23-
use rustc_span::symbol::{kw, sym, Ident};
24+
use rustc_span::symbol::{kw, sym, AllKeywords, Ident};
2425
use rustc_span::{BytePos, Span, SpanSnippetError, Symbol, DUMMY_SP};
2526
use thin_vec::{thin_vec, ThinVec};
2627
use tracing::{debug, trace};
@@ -203,6 +204,37 @@ impl std::fmt::Display for UnaryFixity {
203204
}
204205
}
205206

207+
#[derive(Debug, rustc_macros::Subdiagnostic)]
208+
#[suggestion(
209+
parse_misspelled_kw,
210+
applicability = "machine-applicable",
211+
code = "{similar_kw}",
212+
style = "verbose"
213+
)]
214+
struct MisspelledKw {
215+
similar_kw: String,
216+
#[primary_span]
217+
span: Span,
218+
is_incorrect_case: bool,
219+
}
220+
221+
/// Checks if the given `lookup` identifier is similar to any keyword symbol in `candidates`.
222+
fn find_similar_kw(lookup: Ident, candidates: &[Symbol]) -> Option<MisspelledKw> {
223+
let lowercase = lookup.name.as_str().to_lowercase();
224+
let lowercase_sym = Symbol::intern(&lowercase);
225+
if candidates.contains(&lowercase_sym) {
226+
Some(MisspelledKw { similar_kw: lowercase, span: lookup.span, is_incorrect_case: true })
227+
} else if let Some(similar_sym) = find_best_match_for_name(candidates, lookup.name, None) {
228+
Some(MisspelledKw {
229+
similar_kw: similar_sym.to_string(),
230+
span: lookup.span,
231+
is_incorrect_case: false,
232+
})
233+
} else {
234+
None
235+
}
236+
}
237+
206238
struct MultiSugg {
207239
msg: String,
208240
patches: Vec<(Span, String)>,
@@ -638,9 +670,9 @@ impl<'a> Parser<'a> {
638670
let concat = Symbol::intern(&format!("{prev}{cur}"));
639671
let ident = Ident::new(concat, DUMMY_SP);
640672
if ident.is_used_keyword() || ident.is_reserved() || ident.is_raw_guess() {
641-
let span = self.prev_token.span.to(self.token.span);
673+
let concat_span = self.prev_token.span.to(self.token.span);
642674
err.span_suggestion_verbose(
643-
span,
675+
concat_span,
644676
format!("consider removing the space to spell keyword `{concat}`"),
645677
concat,
646678
Applicability::MachineApplicable,
@@ -741,9 +773,55 @@ impl<'a> Parser<'a> {
741773
err.span_label(sp, label_exp);
742774
err.span_label(self.token.span, "unexpected token");
743775
}
776+
777+
// Check for misspelled keywords if there are no suggestions added to the diagnostic.
778+
if err.suggestions.as_ref().is_ok_and(|code_suggestions| code_suggestions.is_empty()) {
779+
self.check_for_misspelled_kw(&mut err, &expected);
780+
}
744781
Err(err)
745782
}
746783

784+
/// Checks if the current token or the previous token are misspelled keywords
785+
/// and adds a helpful suggestion.
786+
fn check_for_misspelled_kw(&self, err: &mut Diag<'_>, expected: &[TokenType]) {
787+
let Some((curr_ident, _)) = self.token.ident() else {
788+
return;
789+
};
790+
let expected_tokens: &[TokenType] =
791+
expected.len().checked_sub(10).map_or(&expected, |index| &expected[index..]);
792+
let expected_keywords: Vec<Symbol> = expected_tokens
793+
.iter()
794+
.filter_map(|token| if let TokenType::Keyword(kw) = token { Some(*kw) } else { None })
795+
.collect();
796+
797+
// When there are a few keywords in the last ten elements of `self.expected_tokens` and the current
798+
// token is an identifier, it's probably a misspelled keyword.
799+
// This handles code like `async Move {}`, misspelled `if` in match guard, misspelled `else` in `if`-`else`
800+
// and mispelled `where` in a where clause.
801+
if !expected_keywords.is_empty()
802+
&& !curr_ident.is_used_keyword()
803+
&& let Some(misspelled_kw) = find_similar_kw(curr_ident, &expected_keywords)
804+
{
805+
err.subdiagnostic(misspelled_kw);
806+
} else if let Some((prev_ident, _)) = self.prev_token.ident()
807+
&& !prev_ident.is_used_keyword()
808+
{
809+
// We generate a list of all keywords at runtime rather than at compile time
810+
// so that it gets generated only when the diagnostic needs it.
811+
// Also, it is unlikely that this list is generated multiple times because the
812+
// parser halts after execution hits this path.
813+
let all_keywords = AllKeywords::new().collect_used(|| prev_ident.span.edition());
814+
815+
// Otherwise, check the previous token with all the keywords as possible candidates.
816+
// This handles code like `Struct Human;` and `While a < b {}`.
817+
// We check the previous token only when the current token is an identifier to avoid false
818+
// positives like suggesting keyword `for` for `extern crate foo {}`.
819+
if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) {
820+
err.subdiagnostic(misspelled_kw);
821+
}
822+
}
823+
}
824+
747825
/// The user has written `#[attr] expr` which is unsupported. (#106020)
748826
pub(super) fn attr_on_non_tail_expr(&self, expr: &Expr) -> ErrorGuaranteed {
749827
// Missing semicolon typo error.
@@ -846,6 +924,7 @@ impl<'a> Parser<'a> {
846924
);
847925
}
848926
}
927+
849928
err.emit()
850929
}
851930

compiler/rustc_span/src/symbol.rs

+41-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ mod tests;
2020

2121
// The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`.
2222
symbols! {
23-
// If you modify this list, adjust `is_special` and `is_used_keyword`/`is_unused_keyword`.
23+
// If you modify this list, adjust `is_special`, `is_used_keyword`/`is_unused_keyword`
24+
// and `AllKeywords`.
2425
// But this should rarely be necessary if the keywords are kept in alphabetic order.
2526
Keywords {
2627
// Special reserved identifiers used internally for elided lifetimes,
@@ -2579,3 +2580,42 @@ impl Ident {
25792580
self.name.can_be_raw() && self.is_reserved()
25802581
}
25812582
}
2583+
2584+
/// An iterator over all the keywords in Rust.
2585+
#[derive(Copy, Clone)]
2586+
pub struct AllKeywords {
2587+
curr_idx: u32,
2588+
end_idx: u32,
2589+
}
2590+
2591+
impl AllKeywords {
2592+
/// Initialize a new iterator over all the keywords.
2593+
///
2594+
/// *Note:* Please update this if a new keyword is added beyond the current
2595+
/// range.
2596+
pub fn new() -> Self {
2597+
AllKeywords { curr_idx: kw::Empty.as_u32(), end_idx: kw::Yeet.as_u32() }
2598+
}
2599+
2600+
/// Collect all the keywords in a given edition into a vector.
2601+
pub fn collect_used(&self, edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
2602+
self.filter(|&keyword| {
2603+
keyword.is_used_keyword_always() || keyword.is_used_keyword_conditional(edition)
2604+
})
2605+
.collect()
2606+
}
2607+
}
2608+
2609+
impl Iterator for AllKeywords {
2610+
type Item = Symbol;
2611+
2612+
fn next(&mut self) -> Option<Self::Item> {
2613+
if self.curr_idx <= self.end_idx {
2614+
let keyword = Symbol::new(self.curr_idx);
2615+
self.curr_idx += 1;
2616+
Some(keyword)
2617+
} else {
2618+
None
2619+
}
2620+
}
2621+
}

tests/ui/parser/extern-crate-unexpected-token.stderr

+5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ error: expected one of `crate` or `{`, found `crte`
33
|
44
LL | extern crte foo;
55
| ^^^^ expected one of `crate` or `{`
6+
|
7+
help: there is a keyword `crate` with a similar name
8+
|
9+
LL | extern crate foo;
10+
| ~~~~~
611

712
error: aborting due to 1 previous error
813

tests/ui/parser/issues/issue-70549-resolve-after-recovered-self-ctor.stderr

+25-8
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,16 @@ error: expected one of `:`, `@`, or `|`, found keyword `Self`
88
--> $DIR/issue-70549-resolve-after-recovered-self-ctor.rs:4:17
99
|
1010
LL | fn foo(&mur Self) {}
11-
| -----^^^^
12-
| | |
13-
| | expected one of `:`, `@`, or `|`
14-
| help: declare the type after the parameter binding: `<identifier>: <type>`
11+
| ^^^^ expected one of `:`, `@`, or `|`
12+
|
13+
help: there is a keyword `mut` with a similar name
14+
|
15+
LL | fn foo(&mut Self) {}
16+
| ~~~
17+
help: declare the type after the parameter binding
18+
|
19+
LL | fn foo(<identifier>: <type>) {}
20+
| ~~~~~~~~~~~~~~~~~~~~
1521

1622
error: unexpected lifetime `'static` in pattern
1723
--> $DIR/issue-70549-resolve-after-recovered-self-ctor.rs:8:13
@@ -35,16 +41,27 @@ error: expected one of `:`, `@`, or `|`, found keyword `Self`
3541
--> $DIR/issue-70549-resolve-after-recovered-self-ctor.rs:8:25
3642
|
3743
LL | fn bar(&'static mur Self) {}
38-
| -------------^^^^
39-
| | |
40-
| | expected one of `:`, `@`, or `|`
41-
| help: declare the type after the parameter binding: `<identifier>: <type>`
44+
| ^^^^ expected one of `:`, `@`, or `|`
45+
|
46+
help: there is a keyword `mut` with a similar name
47+
|
48+
LL | fn bar(&'static mut Self) {}
49+
| ~~~
50+
help: declare the type after the parameter binding
51+
|
52+
LL | fn bar(<identifier>: <type>) {}
53+
| ~~~~~~~~~~~~~~~~~~~~
4254

4355
error: expected one of `:`, `@`, or `|`, found keyword `Self`
4456
--> $DIR/issue-70549-resolve-after-recovered-self-ctor.rs:14:17
4557
|
4658
LL | fn baz(&mur Self @ _) {}
4759
| ^^^^ expected one of `:`, `@`, or `|`
60+
|
61+
help: there is a keyword `mut` with a similar name
62+
|
63+
LL | fn baz(&mut Self @ _) {}
64+
| ~~~
4865

4966
error[E0533]: expected unit struct, found self constructor `Self`
5067
--> $DIR/issue-70549-resolve-after-recovered-self-ctor.rs:4:17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
trait Animal {
2+
Type Result = u8;
3+
//~^ ERROR expected one of
4+
}
5+
6+
fn main() {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
error: expected one of `!` or `::`, found `Result`
2+
--> $DIR/assoc-type.rs:2:10
3+
|
4+
LL | trait Animal {
5+
| - while parsing this item list starting here
6+
LL | Type Result = u8;
7+
| ^^^^^^ expected one of `!` or `::`
8+
LL |
9+
LL | }
10+
| - the item list ends here
11+
|
12+
help: write keyword `type` in lowercase
13+
|
14+
LL | type Result = u8;
15+
| ~~~~
16+
17+
error: aborting due to 1 previous error
18+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
//@ edition: 2018
2+
3+
fn main() {
4+
async Move {}
5+
//~^ ERROR expected one of
6+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
error: expected one of `move`, `|`, or `||`, found `Move`
2+
--> $DIR/async-move.rs:4:11
3+
|
4+
LL | async Move {}
5+
| ^^^^ expected one of `move`, `|`, or `||`
6+
|
7+
help: write keyword `move` in lowercase
8+
|
9+
LL | async move {}
10+
| ~~~~
11+
12+
error: aborting due to 1 previous error
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
cnst fn code() {}
2+
//~^ ERROR expected one of
3+
4+
fn main() {
5+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
error: expected one of `!` or `::`, found keyword `fn`
2+
--> $DIR/const-fn.rs:1:6
3+
|
4+
LL | cnst fn code() {}
5+
| ^^ expected one of `!` or `::`
6+
|
7+
help: there is a keyword `const` with a similar name
8+
|
9+
LL | const fn code() {}
10+
| ~~~~~
11+
12+
error: aborting due to 1 previous error
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
fn foo<consta N: usize>(_arr: [i32; N]) {}
2+
//~^ ERROR expected one of
3+
4+
fn main() {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
error: expected one of `,`, `:`, `=`, or `>`, found `N`
2+
--> $DIR/const-generics.rs:1:15
3+
|
4+
LL | fn foo<consta N: usize>(_arr: [i32; N]) {}
5+
| ^ expected one of `,`, `:`, `=`, or `>`
6+
|
7+
help: there is a keyword `const` with a similar name
8+
|
9+
LL | fn foo<const N: usize>(_arr: [i32; N]) {}
10+
| ~~~~~
11+
12+
error: aborting due to 1 previous error
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
cons A: u8 = 10;
2+
//~^ ERROR expected one of
3+
4+
fn main() {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
error: expected one of `!` or `::`, found `A`
2+
--> $DIR/const.rs:1:6
3+
|
4+
LL | cons A: u8 = 10;
5+
| ^ expected one of `!` or `::`
6+
|
7+
help: there is a keyword `const` with a similar name
8+
|
9+
LL | const A: u8 = 10;
10+
| ~~~~~
11+
12+
error: aborting due to 1 previous error
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
fn main() {
2+
form i in 1..10 {}
3+
//~^ ERROR expected one of
4+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
error: expected one of `!`, `.`, `::`, `;`, `?`, `{`, `}`, or an operator, found `i`
2+
--> $DIR/for-loop.rs:2:10
3+
|
4+
LL | form i in 1..10 {}
5+
| ^ expected one of 8 possible tokens
6+
|
7+
help: there is a keyword `for` with a similar name
8+
|
9+
LL | for i in 1..10 {}
10+
| ~~~
11+
12+
error: aborting due to 1 previous error
13+

0 commit comments

Comments
 (0)