Skip to content

Commit dab06cc

Browse files
committed
Emit only one nbsp error per file
1 parent 39edcfa commit dab06cc

File tree

3 files changed

+27
-45
lines changed

3 files changed

+27
-45
lines changed

compiler/rustc_parse/src/lexer/mod.rs

+26-5
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,15 @@ pub(crate) fn parse_token_trees<'a>(
5252
}
5353

5454
let cursor = Cursor::new(src);
55-
let string_reader =
56-
StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span };
55+
let string_reader = StringReader {
56+
sess,
57+
start_pos,
58+
pos: start_pos,
59+
src,
60+
cursor,
61+
override_span,
62+
nbsp_is_whitespace: false,
63+
};
5764
tokentrees::TokenTreesReader::parse_all_token_trees(string_reader)
5865
}
5966

@@ -68,6 +75,10 @@ struct StringReader<'a> {
6875
/// Cursor for getting lexer tokens.
6976
cursor: Cursor<'a>,
7077
override_span: Option<Span>,
78+
/// When a "unknown start of token: \u{a0}" has already been emitted earlier
79+
/// in this file, it's safe to treat further occurrences of the non-breaking
80+
/// space character as whitespace.
81+
nbsp_is_whitespace: bool,
7182
}
7283

7384
impl<'a> StringReader<'a> {
@@ -239,6 +250,16 @@ impl<'a> StringReader<'a> {
239250
}
240251
let mut it = self.str_from_to_end(start).chars();
241252
let c = it.next().unwrap();
253+
if c == '\u{00a0}' {
254+
// If an error has already been reported on non-breaking
255+
// space characters earlier in the file, treat all
256+
// subsequent occurrences as whitespace.
257+
if self.nbsp_is_whitespace {
258+
preceded_by_whitespace = true;
259+
continue;
260+
}
261+
self.nbsp_is_whitespace = true;
262+
}
242263
let repeats = it.take_while(|c1| *c1 == c).count();
243264
let mut err =
244265
self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c);
@@ -486,7 +507,7 @@ impl<'a> StringReader<'a> {
486507

487508
/// Slice of the source text from `start` up to but excluding `self.pos`,
488509
/// meaning the slice does not include the character `self.ch`.
489-
fn str_from(&self, start: BytePos) -> &str {
510+
fn str_from(&self, start: BytePos) -> &'a str {
490511
self.str_from_to(start, self.pos)
491512
}
492513

@@ -497,12 +518,12 @@ impl<'a> StringReader<'a> {
497518
}
498519

499520
/// Slice of the source text spanning from `start` up to but excluding `end`.
500-
fn str_from_to(&self, start: BytePos, end: BytePos) -> &str {
521+
fn str_from_to(&self, start: BytePos, end: BytePos) -> &'a str {
501522
&self.src[self.src_index(start)..self.src_index(end)]
502523
}
503524

504525
/// Slice of the source text spanning from `start` until the end
505-
fn str_from_to_end(&self, start: BytePos) -> &str {
526+
fn str_from_to_end(&self, start: BytePos) -> &'a str {
506527
&self.src[self.src_index(start)..]
507528
}
508529

tests/ui/parser/unicode-chars.rs

-6
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,4 @@ fn main() {
66
//~^ ERROR unknown start of token: \u{a0}
77
//~^^ NOTE character appears 3 more times
88
//~^^^ HELP Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
9-
//~^^^^ ERROR unknown start of token: \u{a0}
10-
//~^^^^^ HELP Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
11-
//~^^^^^^ ERROR unknown start of token: \u{a0}
12-
//~^^^^^^^ HELP Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
13-
//~^^^^^^^^ ERROR unknown start of token: \u{a0}
14-
//~^^^^^^^^^ HELP Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
159
}

tests/ui/parser/unicode-chars.stderr

+1-34
Original file line numberDiff line numberDiff line change
@@ -21,38 +21,5 @@ help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is
2121
LL | let x = 0;
2222
| ++++
2323

24-
error: unknown start of token: \u{a0}
25-
--> $DIR/unicode-chars.rs:5:12
26-
|
27-
LL |     let x = 0;
28-
| ^
29-
|
30-
help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
31-
|
32-
LL |     let x = 0;
33-
| +
34-
35-
error: unknown start of token: \u{a0}
36-
--> $DIR/unicode-chars.rs:5:14
37-
|
38-
LL |     let x = 0;
39-
| ^
40-
|
41-
help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
42-
|
43-
LL |     let x = 0;
44-
| +
45-
46-
error: unknown start of token: \u{a0}
47-
--> $DIR/unicode-chars.rs:5:16
48-
|
49-
LL |     let x = 0;
50-
| ^
51-
|
52-
help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
53-
|
54-
LL |     let x = 0;
55-
| +
56-
57-
error: aborting due to 5 previous errors
24+
error: aborting due to 2 previous errors
5825

0 commit comments

Comments
 (0)