Skip to content

Commit e6fa633

Browse files
committed
Properly capture trailing 'unglued' token
If we try to capture the `Vec<u8>` in `Option<Vec<u8>>`, we'll need to capture a `>` token which was 'unglued' from a `>>` token. The processing of unglueing a token for parsing purposes bypasses the usual capturing infrastructure, so we currently lose the trailing `>`. As a result, we fall back to the reparsed `TokenStream`, causing us to lose spans. This commit makes token capturing keep track of a trailing 'unglued' token. Note that we don't need to care about unglueing except at the end of the captured tokens - if we capture both the first and second unglued tokens, then we'll end up capturing the full 'glued' token, which already works correctly.
1 parent 388eb24 commit e6fa633

File tree

3 files changed

+106
-9
lines changed

3 files changed

+106
-9
lines changed

compiler/rustc_parse/src/parser/mod.rs

+58-9
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ pub use path::PathStyle;
1717
use rustc_ast::ptr::P;
1818
use rustc_ast::token::{self, DelimToken, Token, TokenKind};
1919
use rustc_ast::tokenstream::{self, DelimSpan, LazyTokenStream, Spacing};
20-
use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree};
20+
use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree, TreeAndSpacing};
2121
use rustc_ast::DUMMY_NODE_ID;
2222
use rustc_ast::{self as ast, AnonConst, AttrStyle, AttrVec, Const, CrateSugar, Extern, Unsafe};
2323
use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit};
@@ -132,6 +132,28 @@ struct TokenCursor {
132132
// Counts the number of calls to `next` or `next_desugared`,
133133
// depending on whether `desugar_doc_comments` is set.
134134
num_next_calls: usize,
135+
// During parsing, we may sometimes need to 'unglue' a
136+
// glued token into two component tokens
137+
// (e.g. '>>' into '>' and '>), so that the parser
138+
// can consume them one at a time. This process
139+
// bypasses the normal capturing mechanism
140+
// (e.g. `num_next_calls` will not be incremented),
141+
// since the 'unglued' tokens due not exist in
142+
// the original `TokenStream`.
143+
//
144+
// If we end up consuming both unglued tokens,
145+
// then this is not an issue - we'll end up
146+
// capturing the single 'glued' token.
147+
//
148+
// However, in certain circumstances, we may
149+
// want to capture just the first 'unglued' token.
150+
// For example, capturing the `Vec<u8>`
151+
// in `Option<Vec<u8>>` requires us to unglue
152+
// the trailing `>>` token. The `append_unglued_token`
153+
// field is used to track this token - it gets
154+
// appended to the captured stream when
155+
// we evaluate a `LazyTokenStream`
156+
append_unglued_token: Option<TreeAndSpacing>,
135157
}
136158

137159
#[derive(Clone)]
@@ -336,6 +358,7 @@ impl<'a> Parser<'a> {
336358
stack: Vec::new(),
337359
num_next_calls: 0,
338360
desugar_doc_comments,
361+
append_unglued_token: None,
339362
},
340363
desugar_doc_comments,
341364
unmatched_angle_bracket_count: 0,
@@ -359,6 +382,10 @@ impl<'a> Parser<'a> {
359382
self.token_cursor.next()
360383
};
361384
self.token_cursor.num_next_calls += 1;
385+
// We've retrieved an token from the underlying
386+
// cursor, so we no longer need to worry about
387+
// an unglued token. See `break_and_eat` for more details
388+
self.token_cursor.append_unglued_token = None;
362389
if next.span.is_dummy() {
363390
// Tweak the location for better diagnostics, but keep syntactic context intact.
364391
next.span = fallback_span.with_ctxt(next.span.ctxt());
@@ -555,6 +582,14 @@ impl<'a> Parser<'a> {
555582
let first_span = self.sess.source_map().start_point(self.token.span);
556583
let second_span = self.token.span.with_lo(first_span.hi());
557584
self.token = Token::new(first, first_span);
585+
// Keep track of this token - if we end token capturing now,
586+
// we'll want to append this token to the captured stream.
587+
//
588+
// If we consume any additional tokens, then this token
589+
// is not needed (we'll capture the entire 'glued' token),
590+
// and `next_tok` will set this field to `None`
591+
self.token_cursor.append_unglued_token =
592+
Some((TokenTree::Token(self.token.clone()), Spacing::Alone));
558593
// Use the spacing of the glued token as the spacing
559594
// of the unglued second token.
560595
self.bump_with((Token::new(second, second_span), self.token_spacing));
@@ -1230,6 +1265,7 @@ impl<'a> Parser<'a> {
12301265
num_calls: usize,
12311266
desugar_doc_comments: bool,
12321267
trailing_semi: bool,
1268+
append_unglued_token: Option<TreeAndSpacing>,
12331269
}
12341270
impl CreateTokenStream for LazyTokenStreamImpl {
12351271
fn create_token_stream(&self) -> TokenStream {
@@ -1253,12 +1289,18 @@ impl<'a> Parser<'a> {
12531289
}))
12541290
.take(num_calls);
12551291

1256-
make_token_stream(tokens)
1292+
make_token_stream(tokens, self.append_unglued_token.clone())
12571293
}
12581294
fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
12591295
if self.trailing_semi {
12601296
panic!("Called `add_trailing_semi` twice!");
12611297
}
1298+
if self.append_unglued_token.is_some() {
1299+
panic!(
1300+
"Cannot call `add_trailing_semi` when we have an unglued token {:?}",
1301+
self.append_unglued_token
1302+
);
1303+
}
12621304
let mut new = self.clone();
12631305
new.trailing_semi = true;
12641306
Box::new(new)
@@ -1271,6 +1313,7 @@ impl<'a> Parser<'a> {
12711313
cursor_snapshot,
12721314
desugar_doc_comments: self.desugar_doc_comments,
12731315
trailing_semi: false,
1316+
append_unglued_token: self.token_cursor.append_unglued_token.clone(),
12741317
};
12751318
Ok((ret, Some(LazyTokenStream::new(lazy_impl))))
12761319
}
@@ -1325,7 +1368,10 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa
13251368
/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
13261369
/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
13271370
/// of open and close delims.
1328-
fn make_token_stream(tokens: impl Iterator<Item = (Token, Spacing)>) -> TokenStream {
1371+
fn make_token_stream(
1372+
tokens: impl Iterator<Item = (Token, Spacing)>,
1373+
append_unglued_token: Option<TreeAndSpacing>,
1374+
) -> TokenStream {
13291375
#[derive(Debug)]
13301376
struct FrameData {
13311377
open: Span,
@@ -1348,14 +1394,17 @@ fn make_token_stream(tokens: impl Iterator<Item = (Token, Spacing)>) -> TokenStr
13481394
.inner
13491395
.push((delimited, Spacing::Alone));
13501396
}
1351-
token => stack
1352-
.last_mut()
1353-
.expect("Bottom token frame is missing!")
1354-
.inner
1355-
.push((TokenTree::Token(token), spacing)),
1397+
token => {
1398+
stack
1399+
.last_mut()
1400+
.expect("Bottom token frame is missing!")
1401+
.inner
1402+
.push((TokenTree::Token(token), spacing));
1403+
}
13561404
}
13571405
}
1358-
let final_buf = stack.pop().expect("Missing final buf!");
1406+
let mut final_buf = stack.pop().expect("Missing final buf!");
1407+
final_buf.inner.extend(append_unglued_token);
13591408
assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
13601409
TokenStream::new(final_buf.inner)
13611410
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// aux-build:test-macros.rs
2+
// compile-flags: -Z span-debug
3+
// check-pass
4+
5+
// Tests that we properly handle parsing a nonterminal
6+
// where we have two consecutive angle brackets (one inside
7+
// the nonterminal, and one outside)
8+
9+
#![no_std] // Don't load unnecessary hygiene information from std
10+
extern crate std;
11+
extern crate test_macros;
12+
13+
macro_rules! trailing_angle {
14+
(Option<$field:ty>) => {
15+
test_macros::print_bang_consume!($field);
16+
}
17+
}
18+
19+
trailing_angle!(Option<Vec<u8>>);
20+
fn main() {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
PRINT-BANG INPUT (DISPLAY): Vec<u8>
2+
PRINT-BANG RE-COLLECTED (DISPLAY): Vec < u8 >
3+
PRINT-BANG INPUT (DEBUG): TokenStream [
4+
Group {
5+
delimiter: None,
6+
stream: TokenStream [
7+
Ident {
8+
ident: "Vec",
9+
span: $DIR/capture-unglued-token.rs:19:24: 19:27 (#0),
10+
},
11+
Punct {
12+
ch: '<',
13+
spacing: Alone,
14+
span: $DIR/capture-unglued-token.rs:19:27: 19:28 (#0),
15+
},
16+
Ident {
17+
ident: "u8",
18+
span: $DIR/capture-unglued-token.rs:19:28: 19:30 (#0),
19+
},
20+
Punct {
21+
ch: '>',
22+
spacing: Alone,
23+
span: $DIR/capture-unglued-token.rs:19:30: 19:31 (#0),
24+
},
25+
],
26+
span: $DIR/capture-unglued-token.rs:15:42: 15:48 (#4),
27+
},
28+
]

0 commit comments

Comments
 (0)