Skip to content

Commit 1fdabfb

Browse files
committed
Avoid double-handling of attributes in collect_tokens.
By keeping track of attributes that have been previously processed. This fixes the `macro-rules-derive-cfg.stdout` test, and is necessary for #124141 which removes nonterminals. Also shrink the `SmallVec` inline size used in `IntervalSet`. 2 gives slightly better perf than 4 now that there's an `IntervalSet` in `Parser`, which is cloned reasonably often.
1 parent b7e7f6e commit 1fdabfb

File tree

6 files changed

+30
-35
lines changed

6 files changed

+30
-35
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -4172,6 +4172,7 @@ dependencies = [
41724172
"rustc_errors",
41734173
"rustc_feature",
41744174
"rustc_fluent_macro",
4175+
"rustc_index",
41754176
"rustc_lexer",
41764177
"rustc_macros",
41774178
"rustc_session",

compiler/rustc_index/src/interval.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ mod tests;
1818
#[derive(Debug, Clone)]
1919
pub struct IntervalSet<I> {
2020
// Start, end
21-
map: SmallVec<[(u32, u32); 4]>,
21+
map: SmallVec<[(u32, u32); 2]>,
2222
domain: usize,
2323
_data: PhantomData<I>,
2424
}

compiler/rustc_parse/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ rustc_data_structures = { path = "../rustc_data_structures" }
1212
rustc_errors = { path = "../rustc_errors" }
1313
rustc_feature = { path = "../rustc_feature" }
1414
rustc_fluent_macro = { path = "../rustc_fluent_macro" }
15+
rustc_index = { path = "../rustc_index" }
1516
rustc_lexer = { path = "../rustc_lexer" }
1617
rustc_macros = { path = "../rustc_macros" }
1718
rustc_session = { path = "../rustc_session" }

compiler/rustc_parse/src/parser/attr_wrapper.rs

+19-4
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,20 @@ impl<'a> Parser<'a> {
256256
res?
257257
};
258258

259+
// Ignore any attributes we've previously processed. This happens when
260+
// an inner call to `collect_tokens` returns an AST node and then an
261+
// outer call ends up with the same AST node without any additional
262+
// wrapping layer.
263+
let ret_attrs: AttrVec = ret
264+
.attrs()
265+
.iter()
266+
.cloned()
267+
.filter(|attr| {
268+
let is_unseen = self.capture_state.seen_attrs.insert(attr.id);
269+
is_unseen
270+
})
271+
.collect();
272+
259273
// When we're not in "definite capture mode", then skip collecting and
260274
// return early if either of the following conditions hold.
261275
// - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`).
@@ -269,7 +283,7 @@ impl<'a> Parser<'a> {
269283
// tokens.
270284
let definite_capture_mode = self.capture_cfg
271285
&& matches!(self.capture_state.capturing, Capturing::Yes)
272-
&& has_cfg_or_cfg_attr(ret.attrs());
286+
&& has_cfg_or_cfg_attr(&ret_attrs);
273287
if !definite_capture_mode && matches!(ret.tokens_mut(), None | Some(Some(_))) {
274288
return Ok(ret);
275289
}
@@ -289,7 +303,7 @@ impl<'a> Parser<'a> {
289303
// outer and inner attributes. So this check is more precise than
290304
// the earlier `needs_tokens` check, and we don't need to
291305
// check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.)
292-
|| needs_tokens(ret.attrs())
306+
|| needs_tokens(&ret_attrs)
293307
// - We are in "definite capture mode", which requires that there
294308
// are `#[cfg]` or `#[cfg_attr]` attributes. (During normal
295309
// non-`capture_cfg` parsing, we don't need any special capturing
@@ -328,7 +342,7 @@ impl<'a> Parser<'a> {
328342
// `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
329343
// which means the relevant tokens will be removed. (More details below.)
330344
let mut inner_attr_parser_replacements = Vec::new();
331-
for attr in ret.attrs() {
345+
for attr in ret_attrs.iter() {
332346
if attr.style == ast::AttrStyle::Inner {
333347
if let Some(inner_attr_parser_range) =
334348
self.capture_state.inner_attr_parser_ranges.remove(&attr.id)
@@ -418,7 +432,7 @@ impl<'a> Parser<'a> {
418432
// cfg-expand this AST node.
419433
let start_pos =
420434
if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
421-
let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
435+
let target = AttrsTarget { attrs: ret_attrs, tokens };
422436
tokens_used = true;
423437
self.capture_state
424438
.parser_replacements
@@ -428,6 +442,7 @@ impl<'a> Parser<'a> {
428442
// the outermost call to this method.
429443
self.capture_state.parser_replacements.clear();
430444
self.capture_state.inner_attr_parser_ranges.clear();
445+
self.capture_state.seen_attrs.clear();
431446
}
432447
assert!(tokens_used); // check we didn't create `tokens` unnecessarily
433448
Ok(ret)

compiler/rustc_parse/src/parser/mod.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ use rustc_ast_pretty::pprust;
3535
use rustc_data_structures::fx::FxHashMap;
3636
use rustc_data_structures::sync::Lrc;
3737
use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult};
38+
use rustc_index::interval::IntervalSet;
3839
use rustc_session::parse::ParseSess;
3940
use rustc_span::symbol::{kw, sym, Ident, Symbol};
4041
use rustc_span::{Span, DUMMY_SP};
@@ -183,7 +184,7 @@ pub struct Parser<'a> {
183184
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
184185
// it doesn't unintentionally get bigger.
185186
#[cfg(target_pointer_width = "64")]
186-
rustc_data_structures::static_assert_size!(Parser<'_>, 256);
187+
rustc_data_structures::static_assert_size!(Parser<'_>, 288);
187188

188189
/// Stores span information about a closure.
189190
#[derive(Clone, Debug)]
@@ -261,6 +262,9 @@ struct CaptureState {
261262
capturing: Capturing,
262263
parser_replacements: Vec<ParserReplacement>,
263264
inner_attr_parser_ranges: FxHashMap<AttrId, ParserRange>,
265+
// `IntervalSet` is good for perf because attrs are mostly added to this
266+
// set in contiguous ranges.
267+
seen_attrs: IntervalSet<AttrId>,
264268
}
265269

266270
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
@@ -458,6 +462,7 @@ impl<'a> Parser<'a> {
458462
capturing: Capturing::No,
459463
parser_replacements: Vec::new(),
460464
inner_attr_parser_ranges: Default::default(),
465+
seen_attrs: IntervalSet::new(u32::MAX as usize),
461466
},
462467
current_closure: None,
463468
recovery: Recovery::Allowed,

tests/ui/proc-macro/macro-rules-derive-cfg.stdout

+2-29
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
PRINT-DERIVE INPUT (DISPLAY): struct
22
Foo([bool; #[rustc_dummy(first)] #[rustc_dummy(second)]
3-
{ #![rustc_dummy(third)] #[rustc_dummy(fourth)] #[rustc_dummy(fourth)] 30 }]);
3+
{ #![rustc_dummy(third)] #[rustc_dummy(fourth)] 30 }]);
44
PRINT-DERIVE DEEP-RE-COLLECTED (DISPLAY): struct
55
Foo([bool; #[rustc_dummy(first)] #[rustc_dummy(second)]
6-
{
7-
#! [rustc_dummy(third)] #[rustc_dummy(fourth)] #[rustc_dummy(fourth)] 30
8-
}]);
6+
{ #! [rustc_dummy(third)] #[rustc_dummy(fourth)] 30 }]);
97
PRINT-DERIVE INPUT (DEBUG): TokenStream [
108
Ident {
119
ident: "struct",
@@ -138,31 +136,6 @@ PRINT-DERIVE INPUT (DEBUG): TokenStream [
138136
],
139137
span: $DIR/macro-rules-derive-cfg.rs:25:6: 25:49 (#0),
140138
},
141-
Punct {
142-
ch: '#',
143-
spacing: Alone,
144-
span: $DIR/macro-rules-derive-cfg.rs:25:5: 25:6 (#0),
145-
},
146-
Group {
147-
delimiter: Bracket,
148-
stream: TokenStream [
149-
Ident {
150-
ident: "rustc_dummy",
151-
span: $DIR/macro-rules-derive-cfg.rs:25:28: 25:39 (#0),
152-
},
153-
Group {
154-
delimiter: Parenthesis,
155-
stream: TokenStream [
156-
Ident {
157-
ident: "fourth",
158-
span: $DIR/macro-rules-derive-cfg.rs:25:40: 25:46 (#0),
159-
},
160-
],
161-
span: $DIR/macro-rules-derive-cfg.rs:25:39: 25:47 (#0),
162-
},
163-
],
164-
span: $DIR/macro-rules-derive-cfg.rs:25:6: 25:49 (#0),
165-
},
166139
Literal {
167140
kind: Integer,
168141
symbol: "30",

0 commit comments

Comments
 (0)