Skip to content

Commit be0ea0c

Browse files
committed
Auto merge of rust-lang#128725 - nnethercote:fix-assoc-expr-collect-problems, r=petrochenkov
Fix problems with assoc expr token collection There are several cases involving assoc exprs and attributes where the current code does the wrong thing. This PR adds some tests that demonstrate the problems and then fixes them. r? `@petrochenkov`
2 parents 4b7d074 + 9d31f86 commit be0ea0c

File tree

12 files changed

+483
-298
lines changed

12 files changed

+483
-298
lines changed

compiler/rustc_parse/src/parser/attr.rs

+14-8
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ use rustc_span::{sym, BytePos, Span};
88
use thin_vec::ThinVec;
99
use tracing::debug;
1010

11-
use super::{AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle};
11+
use super::{
12+
AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle, Trailing,
13+
UsePreAttrPos,
14+
};
1215
use crate::{errors, fluent_generated as fluent, maybe_whole};
1316

1417
// Public for rustfmt usage
@@ -257,7 +260,8 @@ impl<'a> Parser<'a> {
257260
pub fn parse_attr_item(&mut self, force_collect: ForceCollect) -> PResult<'a, ast::AttrItem> {
258261
maybe_whole!(self, NtMeta, |attr| attr.into_inner());
259262

260-
let do_parse = |this: &mut Self, _empty_attrs| {
263+
// Attr items don't have attributes.
264+
self.collect_tokens(None, AttrWrapper::empty(), force_collect, |this, _empty_attrs| {
261265
let is_unsafe = this.eat_keyword(kw::Unsafe);
262266
let unsafety = if is_unsafe {
263267
let unsafe_span = this.prev_token.span;
@@ -273,10 +277,12 @@ impl<'a> Parser<'a> {
273277
if is_unsafe {
274278
this.expect(&token::CloseDelim(Delimiter::Parenthesis))?;
275279
}
276-
Ok((ast::AttrItem { unsafety, path, args, tokens: None }, false))
277-
};
278-
// Attr items don't have attributes.
279-
self.collect_tokens_trailing_token(AttrWrapper::empty(), force_collect, do_parse)
280+
Ok((
281+
ast::AttrItem { unsafety, path, args, tokens: None },
282+
Trailing::No,
283+
UsePreAttrPos::No,
284+
))
285+
})
280286
}
281287

282288
/// Parses attributes that appear after the opening of an item. These should
@@ -309,8 +315,8 @@ impl<'a> Parser<'a> {
309315
};
310316
if let Some(attr) = attr {
311317
// If we are currently capturing tokens (i.e. we are within a call to
312-
// `Parser::collect_tokens_trailing_tokens`) record the token positions of this
313-
// inner attribute, for possible later processing in a `LazyAttrTokenStream`.
318+
// `Parser::collect_tokens`) record the token positions of this inner attribute,
319+
// for possible later processing in a `LazyAttrTokenStream`.
314320
if let Capturing::Yes = self.capture_state.capturing {
315321
let end_pos = self.num_bump_calls;
316322
let parser_range = ParserRange(start_pos..end_pos);

compiler/rustc_parse/src/parser/attr_wrapper.rs

+75-37
Original file line numberDiff line numberDiff line change
@@ -12,40 +12,56 @@ use rustc_span::{sym, Span, DUMMY_SP};
1212

1313
use super::{
1414
Capturing, FlatToken, ForceCollect, NodeRange, NodeReplacement, Parser, ParserRange,
15-
TokenCursor,
15+
TokenCursor, Trailing,
1616
};
1717

18+
// When collecting tokens, this fully captures the start point. Usually its
19+
// just after outer attributes, but occasionally it's before.
20+
#[derive(Clone, Debug)]
21+
pub(super) struct CollectPos {
22+
start_token: (Token, Spacing),
23+
cursor_snapshot: TokenCursor,
24+
start_pos: u32,
25+
}
26+
27+
pub(super) enum UsePreAttrPos {
28+
No,
29+
Yes,
30+
}
31+
1832
/// A wrapper type to ensure that the parser handles outer attributes correctly.
1933
/// When we parse outer attributes, we need to ensure that we capture tokens
2034
/// for the attribute target. This allows us to perform cfg-expansion on
2135
/// a token stream before we invoke a derive proc-macro.
2236
///
2337
/// This wrapper prevents direct access to the underlying `ast::AttrVec`.
2438
/// Parsing code can only get access to the underlying attributes
25-
/// by passing an `AttrWrapper` to `collect_tokens_trailing_token`.
39+
/// by passing an `AttrWrapper` to `collect_tokens`.
2640
/// This makes it difficult to accidentally construct an AST node
2741
/// (which stores an `ast::AttrVec`) without first collecting tokens.
2842
///
2943
/// This struct has its own module, to ensure that the parser code
3044
/// cannot directly access the `attrs` field.
3145
#[derive(Debug, Clone)]
32-
pub struct AttrWrapper {
46+
pub(super) struct AttrWrapper {
3347
attrs: AttrVec,
3448
// The start of the outer attributes in the parser's token stream.
3549
// This lets us create a `NodeReplacement` for the entire attribute
36-
// target, including outer attributes.
37-
start_pos: u32,
50+
// target, including outer attributes. `None` if there are no outer
51+
// attributes.
52+
start_pos: Option<u32>,
3853
}
3954

4055
impl AttrWrapper {
4156
pub(super) fn new(attrs: AttrVec, start_pos: u32) -> AttrWrapper {
42-
AttrWrapper { attrs, start_pos }
57+
AttrWrapper { attrs, start_pos: Some(start_pos) }
4358
}
44-
pub fn empty() -> AttrWrapper {
45-
AttrWrapper { attrs: AttrVec::new(), start_pos: u32::MAX }
59+
60+
pub(super) fn empty() -> AttrWrapper {
61+
AttrWrapper { attrs: AttrVec::new(), start_pos: None }
4662
}
4763

48-
pub(crate) fn take_for_recovery(self, psess: &ParseSess) -> AttrVec {
64+
pub(super) fn take_for_recovery(self, psess: &ParseSess) -> AttrVec {
4965
psess.dcx().span_delayed_bug(
5066
self.attrs.get(0).map(|attr| attr.span).unwrap_or(DUMMY_SP),
5167
"AttrVec is taken for recovery but no error is produced",
@@ -56,12 +72,12 @@ impl AttrWrapper {
5672

5773
/// Prepend `self.attrs` to `attrs`.
5874
// FIXME: require passing an NT to prevent misuse of this method
59-
pub(crate) fn prepend_to_nt_inner(mut self, attrs: &mut AttrVec) {
75+
pub(super) fn prepend_to_nt_inner(mut self, attrs: &mut AttrVec) {
6076
mem::swap(attrs, &mut self.attrs);
6177
attrs.extend(self.attrs);
6278
}
6379

64-
pub fn is_empty(&self) -> bool {
80+
pub(super) fn is_empty(&self) -> bool {
6581
self.attrs.is_empty()
6682
}
6783
}
@@ -77,7 +93,7 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
7793
}
7894

7995
// From a value of this type we can reconstruct the `TokenStream` seen by the
80-
// `f` callback passed to a call to `Parser::collect_tokens_trailing_token`, by
96+
// `f` callback passed to a call to `Parser::collect_tokens`, by
8197
// replaying the getting of the tokens. This saves us producing a `TokenStream`
8298
// if it is never needed, e.g. a captured `macro_rules!` argument that is never
8399
// passed to a proc macro. In practice, token stream creation happens rarely
@@ -166,16 +182,30 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
166182
}
167183

168184
impl<'a> Parser<'a> {
185+
pub(super) fn collect_pos(&self) -> CollectPos {
186+
CollectPos {
187+
start_token: (self.token.clone(), self.token_spacing),
188+
cursor_snapshot: self.token_cursor.clone(),
189+
start_pos: self.num_bump_calls,
190+
}
191+
}
192+
169193
/// Parses code with `f`. If appropriate, it records the tokens (in
170194
/// `LazyAttrTokenStream` form) that were parsed in the result, accessible
171-
/// via the `HasTokens` trait. The second (bool) part of the callback's
195+
/// via the `HasTokens` trait. The `Trailing` part of the callback's
172196
/// result indicates if an extra token should be captured, e.g. a comma or
173-
/// semicolon.
197+
/// semicolon. The `UsePreAttrPos` part of the callback's result indicates
198+
/// if we should use `pre_attr_pos` as the collection start position (only
199+
/// required in a few cases).
174200
///
175201
/// The `attrs` passed in are in `AttrWrapper` form, which is opaque. The
176202
/// `AttrVec` within is passed to `f`. See the comment on `AttrWrapper` for
177203
/// details.
178204
///
205+
/// `pre_attr_pos` is the position before the outer attributes (or the node
206+
/// itself, if no outer attributes are present). It is only needed if `f`
207+
/// can return `UsePreAttrPos::Yes`.
208+
///
179209
/// Note: If your callback consumes an opening delimiter (including the
180210
/// case where `self.token` is an opening delimiter on entry to this
181211
/// function), you must also consume the corresponding closing delimiter.
@@ -197,11 +227,12 @@ impl<'a> Parser<'a> {
197227
/// } // 32..33
198228
/// } // 33..34
199229
/// ```
200-
pub fn collect_tokens_trailing_token<R: HasAttrs + HasTokens>(
230+
pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
201231
&mut self,
232+
pre_attr_pos: Option<CollectPos>,
202233
attrs: AttrWrapper,
203234
force_collect: ForceCollect,
204-
f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, (R, bool)>,
235+
f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
205236
) -> PResult<'a, R> {
206237
// We must collect if anything could observe the collected tokens, i.e.
207238
// if any of the following conditions hold.
@@ -220,23 +251,20 @@ impl<'a> Parser<'a> {
220251
return Ok(f(self, attrs.attrs)?.0);
221252
}
222253

223-
let start_token = (self.token.clone(), self.token_spacing);
224-
let cursor_snapshot = self.token_cursor.clone();
225-
let start_pos = self.num_bump_calls;
254+
let mut collect_pos = self.collect_pos();
226255
let has_outer_attrs = !attrs.attrs.is_empty();
227256
let parser_replacements_start = self.capture_state.parser_replacements.len();
228257

229258
// We set and restore `Capturing::Yes` on either side of the call to
230-
// `f`, so we can distinguish the outermost call to
231-
// `collect_tokens_trailing_token` (e.g. parsing `m` in the example
232-
// above) from any inner (indirectly recursive) calls (e.g. parsing `g`
233-
// in the example above). This distinction is used below and in
234-
// `Parser::parse_inner_attributes`.
235-
let (mut ret, capture_trailing) = {
259+
// `f`, so we can distinguish the outermost call to `collect_tokens`
260+
// (e.g. parsing `m` in the example above) from any inner (indirectly
261+
// recursive) calls (e.g. parsing `g` in the example above). This
262+
// distinction is used below and in `Parser::parse_inner_attributes`.
263+
let (mut ret, capture_trailing, use_pre_attr_pos) = {
236264
let prev_capturing = mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
237-
let ret_and_trailing = f(self, attrs.attrs);
265+
let res = f(self, attrs.attrs);
238266
self.capture_state.capturing = prev_capturing;
239-
ret_and_trailing?
267+
res?
240268
};
241269

242270
// When we're not in `capture_cfg` mode, then skip collecting and
@@ -279,10 +307,18 @@ impl<'a> Parser<'a> {
279307
return Ok(ret);
280308
}
281309

310+
// Replace the post-attribute collection start position with the
311+
// pre-attribute position supplied, if `f` indicated it is necessary.
312+
// (The caller is responsible for providing a non-`None` `pre_attr_pos`
313+
// if this is a possibility.)
314+
if matches!(use_pre_attr_pos, UsePreAttrPos::Yes) {
315+
collect_pos = pre_attr_pos.unwrap();
316+
}
317+
282318
let parser_replacements_end = self.capture_state.parser_replacements.len();
283319

284320
assert!(
285-
!(self.break_last_token && capture_trailing),
321+
!(self.break_last_token && matches!(capture_trailing, Trailing::Yes)),
286322
"Cannot set break_last_token and have trailing token"
287323
);
288324

@@ -294,7 +330,7 @@ impl<'a> Parser<'a> {
294330
// `AttrTokenStream`, we will create the proper token.
295331
+ self.break_last_token as u32;
296332

297-
let num_calls = end_pos - start_pos;
333+
let num_calls = end_pos - collect_pos.start_pos;
298334

299335
// Take the captured `ParserRange`s for any inner attributes that we parsed in
300336
// `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
@@ -328,7 +364,9 @@ impl<'a> Parser<'a> {
328364
.iter()
329365
.cloned()
330366
.chain(inner_attr_parser_replacements.iter().cloned())
331-
.map(|(parser_range, data)| (NodeRange::new(parser_range, start_pos), data))
367+
.map(|(parser_range, data)| {
368+
(NodeRange::new(parser_range, collect_pos.start_pos), data)
369+
})
332370
.collect()
333371
};
334372

@@ -355,9 +393,9 @@ impl<'a> Parser<'a> {
355393
// - `tokens`: lazy tokens for `g` (with its inner attr deleted).
356394

357395
let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
358-
start_token,
396+
start_token: collect_pos.start_token,
397+
cursor_snapshot: collect_pos.cursor_snapshot,
359398
num_calls,
360-
cursor_snapshot,
361399
break_last_token: self.break_last_token,
362400
node_replacements,
363401
});
@@ -368,9 +406,9 @@ impl<'a> Parser<'a> {
368406
}
369407

370408
// If `capture_cfg` is set and we're inside a recursive call to
371-
// `collect_tokens_trailing_token`, then we need to register a replace range
372-
// if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion
373-
// on the captured token stream.
409+
// `collect_tokens`, then we need to register a replace range if we
410+
// have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager
411+
// cfg-expansion on the captured token stream.
374412
if self.capture_cfg
375413
&& matches!(self.capture_state.capturing, Capturing::Yes)
376414
&& has_cfg_or_cfg_attr(ret.attrs())
@@ -389,7 +427,8 @@ impl<'a> Parser<'a> {
389427
// Set things up so that the entire AST node that we just parsed, including attributes,
390428
// will be replaced with `target` in the lazy token stream. This will allow us to
391429
// cfg-expand this AST node.
392-
let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos };
430+
let start_pos =
431+
if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
393432
let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
394433
self.capture_state
395434
.parser_replacements
@@ -490,7 +529,6 @@ mod size_asserts {
490529

491530
use super::*;
492531
// tidy-alphabetical-start
493-
static_assert_size!(AttrWrapper, 16);
494532
static_assert_size!(LazyAttrTokenStreamImpl, 96);
495533
// tidy-alphabetical-end
496534
}

compiler/rustc_parse/src/parser/diagnostics.rs

+10-9
Original file line numberDiff line numberDiff line change
@@ -2487,13 +2487,14 @@ impl<'a> Parser<'a> {
24872487
pub(super) fn handle_unambiguous_unbraced_const_arg(&mut self) -> PResult<'a, P<Expr>> {
24882488
let start = self.token.span;
24892489
let attrs = self.parse_outer_attributes()?;
2490-
let expr = self.parse_expr_res(Restrictions::CONST_EXPR, attrs).map_err(|mut err| {
2491-
err.span_label(
2492-
start.shrink_to_lo(),
2493-
"while parsing a const generic argument starting here",
2494-
);
2495-
err
2496-
})?;
2490+
let (expr, _) =
2491+
self.parse_expr_res(Restrictions::CONST_EXPR, attrs).map_err(|mut err| {
2492+
err.span_label(
2493+
start.shrink_to_lo(),
2494+
"while parsing a const generic argument starting here",
2495+
);
2496+
err
2497+
})?;
24972498
if !self.expr_is_valid_const_arg(&expr) {
24982499
self.dcx().emit_err(ConstGenericWithoutBraces {
24992500
span: expr.span,
@@ -2613,7 +2614,7 @@ impl<'a> Parser<'a> {
26132614
let attrs = self.parse_outer_attributes()?;
26142615
self.parse_expr_res(Restrictions::CONST_EXPR, attrs)
26152616
})() {
2616-
Ok(expr) => {
2617+
Ok((expr, _)) => {
26172618
// Find a mistake like `MyTrait<Assoc == S::Assoc>`.
26182619
if snapshot.token == token::EqEq {
26192620
err.span_suggestion(
@@ -2671,7 +2672,7 @@ impl<'a> Parser<'a> {
26712672
})() {
26722673
// Since we don't know the exact reason why we failed to parse the type or the
26732674
// expression, employ a simple heuristic to weed out some pathological cases.
2674-
Ok(expr) if let token::Comma | token::Gt = snapshot.token.kind => {
2675+
Ok((expr, _)) if let token::Comma | token::Gt = snapshot.token.kind => {
26752676
self.restore_snapshot(snapshot);
26762677
Some(expr)
26772678
}

0 commit comments

Comments
 (0)