Skip to content

Commit d1f05fd

Browse files
committed
Distinguish the two kinds of token range.
When collecting tokens there are two kinds of range: - a range relative to the parser's full token stream (which we get when we are parsing); - a range relative to a single AST node's token stream (which we use within `LazyAttrTokenStreamImpl` when replacing tokens). These are currently both represented with `Range<u32>` and it's easy to mix them up -- until now I hadn't properly understood the difference. This commit introduces `ParserRange` and `NodeRange` to distinguish them. This also requires splitting `ReplaceRange` in two, giving the new types `ParserReplacement` and `NodeReplacement`. (These latter two names reduce the overloading of the word "range".) The commit also rewrites some comments to be clearer. The end result is a little more verbose, but much clearer.
1 parent 9d77d17 commit d1f05fd

File tree

4 files changed

+124
-78
lines changed

4 files changed

+124
-78
lines changed

compiler/rustc_builtin_macros/src/cfg_eval.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ impl CfgEval<'_> {
202202
}
203203

204204
// Now that we have our re-parsed `AttrTokenStream`, recursively configuring
205-
// our attribute target will correctly the tokens as well.
205+
// our attribute target will correctly configure the tokens as well.
206206
flat_map_annotatable(self, annotatable)
207207
}
208208
}

compiler/rustc_parse/src/parser/attr.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use rustc_span::{sym, BytePos, Span};
88
use thin_vec::ThinVec;
99
use tracing::debug;
1010

11-
use super::{AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, PathStyle};
11+
use super::{AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle};
1212
use crate::{errors, fluent_generated as fluent, maybe_whole};
1313

1414
// Public for rustfmt usage
@@ -307,8 +307,8 @@ impl<'a> Parser<'a> {
307307
// inner attribute, for possible later processing in a `LazyAttrTokenStream`.
308308
if let Capturing::Yes = self.capture_state.capturing {
309309
let end_pos = self.num_bump_calls;
310-
let range = start_pos..end_pos;
311-
self.capture_state.inner_attr_ranges.insert(attr.id, range);
310+
let parser_range = ParserRange(start_pos..end_pos);
311+
self.capture_state.inner_attr_parser_ranges.insert(attr.id, parser_range);
312312
}
313313
attrs.push(attr);
314314
} else {

compiler/rustc_parse/src/parser/attr_wrapper.rs

+70-54
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ use rustc_errors::PResult;
1010
use rustc_session::parse::ParseSess;
1111
use rustc_span::{sym, Span, DUMMY_SP};
1212

13-
use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCursor};
13+
use super::{
14+
Capturing, FlatToken, ForceCollect, NodeRange, NodeReplacement, Parser, ParserRange,
15+
TokenCursor,
16+
};
1417

1518
/// A wrapper type to ensure that the parser handles outer attributes correctly.
1619
/// When we parse outer attributes, we need to ensure that we capture tokens
@@ -28,8 +31,8 @@ use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCurso
2831
#[derive(Debug, Clone)]
2932
pub struct AttrWrapper {
3033
attrs: AttrVec,
31-
// The start of the outer attributes in the token cursor.
32-
// This allows us to create a `ReplaceRange` for the entire attribute
34+
// The start of the outer attributes in the parser's token stream.
35+
// This lets us create a `NodeReplacement` for the entire attribute
3336
// target, including outer attributes.
3437
start_pos: u32,
3538
}
@@ -88,7 +91,7 @@ struct LazyAttrTokenStreamImpl {
8891
cursor_snapshot: TokenCursor,
8992
num_calls: u32,
9093
break_last_token: bool,
91-
replace_ranges: Box<[ReplaceRange]>,
94+
node_replacements: Box<[NodeReplacement]>,
9295
}
9396

9497
impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
@@ -103,21 +106,24 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
103106
.chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next())))
104107
.take(self.num_calls as usize);
105108

106-
if self.replace_ranges.is_empty() {
109+
if self.node_replacements.is_empty() {
107110
make_attr_token_stream(tokens, self.break_last_token)
108111
} else {
109112
let mut tokens: Vec<_> = tokens.collect();
110-
let mut replace_ranges = self.replace_ranges.to_vec();
111-
replace_ranges.sort_by_key(|(range, _)| range.start);
113+
let mut node_replacements = self.node_replacements.to_vec();
114+
node_replacements.sort_by_key(|(range, _)| range.0.start);
112115

113116
#[cfg(debug_assertions)]
114-
for [(range, tokens), (next_range, next_tokens)] in replace_ranges.array_windows() {
117+
for [(node_range, tokens), (next_node_range, next_tokens)] in
118+
node_replacements.array_windows()
119+
{
115120
assert!(
116-
range.end <= next_range.start || range.end >= next_range.end,
117-
"Replace ranges should either be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
118-
range,
121+
node_range.0.end <= next_node_range.0.start
122+
|| node_range.0.end >= next_node_range.0.end,
123+
"Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
124+
node_range,
119125
tokens,
120-
next_range,
126+
next_node_range,
121127
next_tokens,
122128
);
123129
}
@@ -135,20 +141,23 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
135141
// start position, we ensure that any (outer) replace range which
136142
// encloses another (inner) replace range will fully overwrite the
137143
// inner range's replacement.
138-
for (range, target) in replace_ranges.into_iter().rev() {
139-
assert!(!range.is_empty(), "Cannot replace an empty range: {range:?}");
144+
for (node_range, target) in node_replacements.into_iter().rev() {
145+
assert!(
146+
!node_range.0.is_empty(),
147+
"Cannot replace an empty node range: {:?}",
148+
node_range.0
149+
);
140150

141151
// Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, plus
142152
// enough `FlatToken::Empty`s to fill up the rest of the range. This keeps the
143153
// total length of `tokens` constant throughout the replacement process, allowing
144-
// us to use all of the `ReplaceRanges` entries without adjusting indices.
154+
// us to do all replacements without adjusting indices.
145155
let target_len = target.is_some() as usize;
146156
tokens.splice(
147-
(range.start as usize)..(range.end as usize),
148-
target
149-
.into_iter()
150-
.map(|target| FlatToken::AttrsTarget(target))
151-
.chain(iter::repeat(FlatToken::Empty).take(range.len() - target_len)),
157+
(node_range.0.start as usize)..(node_range.0.end as usize),
158+
target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain(
159+
iter::repeat(FlatToken::Empty).take(node_range.0.len() - target_len),
160+
),
152161
);
153162
}
154163
make_attr_token_stream(tokens.into_iter(), self.break_last_token)
@@ -215,7 +224,7 @@ impl<'a> Parser<'a> {
215224
let cursor_snapshot = self.token_cursor.clone();
216225
let start_pos = self.num_bump_calls;
217226
let has_outer_attrs = !attrs.attrs.is_empty();
218-
let replace_ranges_start = self.capture_state.replace_ranges.len();
227+
let parser_replacements_start = self.capture_state.parser_replacements.len();
219228

220229
// We set and restore `Capturing::Yes` on either side of the call to
221230
// `f`, so we can distinguish the outermost call to
@@ -270,7 +279,7 @@ impl<'a> Parser<'a> {
270279
return Ok(ret);
271280
}
272281

273-
let replace_ranges_end = self.capture_state.replace_ranges.len();
282+
let parser_replacements_end = self.capture_state.parser_replacements.len();
274283

275284
assert!(
276285
!(self.break_last_token && capture_trailing),
@@ -287,15 +296,16 @@ impl<'a> Parser<'a> {
287296

288297
let num_calls = end_pos - start_pos;
289298

290-
// Take the captured ranges for any inner attributes that we parsed in
291-
// `Parser::parse_inner_attributes`, and pair them in a `ReplaceRange`
292-
// with `None`, which means the relevant tokens will be removed. (More
293-
// details below.)
294-
let mut inner_attr_replace_ranges = Vec::new();
299+
// Take the captured `ParserRange`s for any inner attributes that we parsed in
300+
// `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
301+
// which means the relevant tokens will be removed. (More details below.)
302+
let mut inner_attr_parser_replacements = Vec::new();
295303
for attr in ret.attrs() {
296304
if attr.style == ast::AttrStyle::Inner {
297-
if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&attr.id) {
298-
inner_attr_replace_ranges.push((attr_range, None));
305+
if let Some(inner_attr_parser_range) =
306+
self.capture_state.inner_attr_parser_ranges.remove(&attr.id)
307+
{
308+
inner_attr_parser_replacements.push((inner_attr_parser_range, None));
299309
} else {
300310
self.dcx().span_delayed_bug(attr.span, "Missing token range for attribute");
301311
}
@@ -304,37 +314,41 @@ impl<'a> Parser<'a> {
304314

305315
// This is hot enough for `deep-vector` that checking the conditions for an empty iterator
306316
// is measurably faster than actually executing the iterator.
307-
let replace_ranges: Box<[ReplaceRange]> =
308-
if replace_ranges_start == replace_ranges_end && inner_attr_replace_ranges.is_empty() {
309-
Box::new([])
310-
} else {
311-
// Grab any replace ranges that occur *inside* the current AST node. We will
312-
// perform the actual replacement only when we convert the `LazyAttrTokenStream` to
313-
// an `AttrTokenStream`.
314-
self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end]
315-
.iter()
316-
.cloned()
317-
.chain(inner_attr_replace_ranges.iter().cloned())
318-
.map(|(range, data)| ((range.start - start_pos)..(range.end - start_pos), data))
319-
.collect()
320-
};
317+
let node_replacements: Box<[_]> = if parser_replacements_start == parser_replacements_end
318+
&& inner_attr_parser_replacements.is_empty()
319+
{
320+
Box::new([])
321+
} else {
322+
// Grab any replace ranges that occur *inside* the current AST node. Convert them
323+
// from `ParserRange` form to `NodeRange` form. We will perform the actual
324+
// replacement only when we convert the `LazyAttrTokenStream` to an
325+
// `AttrTokenStream`.
326+
self.capture_state.parser_replacements
327+
[parser_replacements_start..parser_replacements_end]
328+
.iter()
329+
.cloned()
330+
.chain(inner_attr_parser_replacements.iter().cloned())
331+
.map(|(parser_range, data)| (NodeRange::new(parser_range, start_pos), data))
332+
.collect()
333+
};
321334

322335
// What is the status here when parsing the example code at the top of this method?
323336
//
324337
// When parsing `g`:
325338
// - `start_pos..end_pos` is `12..33` (`fn g { ... }`, excluding the outer attr).
326-
// - `inner_attr_replace_ranges` has one entry (`5..15`, when counting from `fn`), to
339+
// - `inner_attr_parser_replacements` has one entry (`ParserRange(17..27)`), to
327340
// delete the inner attr's tokens.
328-
// - This entry is put into the lazy tokens for `g`, i.e. deleting the inner attr from
329-
// those tokens (if they get evaluated).
341+
// - This entry is converted to `NodeRange(5..15)` (relative to the `fn`) and put into
342+
// the lazy tokens for `g`, i.e. deleting the inner attr from those tokens (if they get
343+
// evaluated).
330344
// - Those lazy tokens are also put into an `AttrsTarget` that is appended to `self`'s
331345
// replace ranges at the bottom of this function, for processing when parsing `m`.
332-
// - `replace_ranges_start..replace_ranges_end` is empty.
346+
// - `parser_replacements_start..parser_replacements_end` is empty.
333347
//
334348
// When parsing `m`:
335349
// - `start_pos..end_pos` is `0..34` (`mod m`, excluding the `#[cfg_eval]` attribute).
336-
// - `inner_attr_replace_ranges` is empty.
337-
// - `replace_range_start..replace_ranges_end` has one entry.
350+
// - `inner_attr_parser_replacements` is empty.
351+
// - `parser_replacements_start..parser_replacements_end` has one entry.
338352
// - One `AttrsTarget` (added below when parsing `g`) to replace all of `g` (`3..33`,
339353
// including its outer attribute), with:
340354
// - `attrs`: includes the outer and the inner attr.
@@ -345,7 +359,7 @@ impl<'a> Parser<'a> {
345359
num_calls,
346360
cursor_snapshot,
347361
break_last_token: self.break_last_token,
348-
replace_ranges,
362+
node_replacements,
349363
});
350364

351365
// If we support tokens and don't already have them, store the newly captured tokens.
@@ -366,7 +380,7 @@ impl<'a> Parser<'a> {
366380
// What is the status here when parsing the example code at the top of this method?
367381
//
368382
// When parsing `g`, we add one entry:
369-
// - The `start_pos..end_pos` (`3..33`) entry has a new `AttrsTarget` with:
383+
// - The pushed entry (`ParserRange(3..33)`) has a new `AttrsTarget` with:
370384
// - `attrs`: includes the outer and the inner attr.
371385
// - `tokens`: lazy tokens for `g` (with its inner attr deleted).
372386
//
@@ -377,12 +391,14 @@ impl<'a> Parser<'a> {
377391
// cfg-expand this AST node.
378392
let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos };
379393
let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
380-
self.capture_state.replace_ranges.push((start_pos..end_pos, Some(target)));
394+
self.capture_state
395+
.parser_replacements
396+
.push((ParserRange(start_pos..end_pos), Some(target)));
381397
} else if matches!(self.capture_state.capturing, Capturing::No) {
382398
// Only clear the ranges once we've finished capturing entirely, i.e. we've finished
383399
// the outermost call to this method.
384-
self.capture_state.replace_ranges.clear();
385-
self.capture_state.inner_attr_ranges.clear();
400+
self.capture_state.parser_replacements.clear();
401+
self.capture_state.inner_attr_parser_ranges.clear();
386402
}
387403
Ok(ret)
388404
}

compiler/rustc_parse/src/parser/mod.rs

+50-20
Original file line numberDiff line numberDiff line change
@@ -192,24 +192,54 @@ struct ClosureSpans {
192192
body: Span,
193193
}
194194

195-
/// Indicates a range of tokens that should be replaced by
196-
/// the tokens in the provided `AttrsTarget`. This is used in two
197-
/// places during token collection:
195+
/// A token range within a `Parser`'s full token stream.
196+
#[derive(Clone, Debug)]
197+
struct ParserRange(Range<u32>);
198+
199+
/// A token range within an individual AST node's (lazy) token stream, i.e.
200+
/// relative to that node's first token. Distinct from `ParserRange` so the two
201+
/// kinds of range can't be mixed up.
202+
#[derive(Clone, Debug)]
203+
struct NodeRange(Range<u32>);
204+
205+
/// Indicates a range of tokens that should be replaced by an `AttrsTarget`
206+
/// (replacement) or be replaced by nothing (deletion). This is used in two
207+
/// places during token collection.
208+
///
209+
/// 1. Replacement. During the parsing of an AST node that may have a
210+
/// `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]`
211+
/// or `#[cfg_attr]`, we replace the entire inner AST node with
212+
/// `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an
213+
/// `AttrTokenStream`.
198214
///
199-
/// 1. During the parsing of an AST node that may have a `#[derive]`
200-
/// attribute, we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]`
201-
/// In this case, we use a `ReplaceRange` to replace the entire inner AST node
202-
/// with `FlatToken::AttrsTarget`, allowing us to perform eager cfg-expansion
203-
/// on an `AttrTokenStream`.
215+
/// 2. Deletion. We delete inner attributes from all collected token streams,
216+
/// and instead track them through the `attrs` field on the AST node. This
217+
/// lets us manipulate them similarly to outer attributes. When we create a
218+
/// `TokenStream`, the inner attributes are inserted into the proper place
219+
/// in the token stream.
204220
///
205-
/// 2. When we parse an inner attribute while collecting tokens. We
206-
/// remove inner attributes from the token stream entirely, and
207-
/// instead track them through the `attrs` field on the AST node.
208-
/// This allows us to easily manipulate them (for example, removing
209-
/// the first macro inner attribute to invoke a proc-macro).
210-
/// When create a `TokenStream`, the inner attributes get inserted
211-
/// into the proper place in the token stream.
212-
type ReplaceRange = (Range<u32>, Option<AttrsTarget>);
221+
/// Each replacement starts off in `ParserReplacement` form but is converted to
222+
/// `NodeReplacement` form when it is attached to a single AST node, via
223+
/// `LazyAttrTokenStreamImpl`.
224+
type ParserReplacement = (ParserRange, Option<AttrsTarget>);
225+
226+
/// See the comment on `ParserReplacement`.
227+
type NodeReplacement = (NodeRange, Option<AttrsTarget>);
228+
229+
impl NodeRange {
230+
// Converts a range within a parser's tokens to a range within a
231+
// node's tokens beginning at `start_pos`.
232+
//
233+
// For example, imagine a parser with 50 tokens in its token stream, a
234+
// function that spans `ParserRange(20..40)` and an inner attribute within
235+
// that function that spans `ParserRange(30..35)`. We would find the inner
236+
// attribute's range within the function's tokens by subtracting 20, which
237+
// is the position of the function's start token. This gives
238+
// `NodeRange(10..15)`.
239+
fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange {
240+
NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos))
241+
}
242+
}
213243

214244
/// Controls how we capture tokens. Capturing can be expensive,
215245
/// so we try to avoid performing capturing in cases where
@@ -226,8 +256,8 @@ enum Capturing {
226256
#[derive(Clone, Debug)]
227257
struct CaptureState {
228258
capturing: Capturing,
229-
replace_ranges: Vec<ReplaceRange>,
230-
inner_attr_ranges: FxHashMap<AttrId, Range<u32>>,
259+
parser_replacements: Vec<ParserReplacement>,
260+
inner_attr_parser_ranges: FxHashMap<AttrId, ParserRange>,
231261
}
232262

233263
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
@@ -417,8 +447,8 @@ impl<'a> Parser<'a> {
417447
subparser_name,
418448
capture_state: CaptureState {
419449
capturing: Capturing::No,
420-
replace_ranges: Vec::new(),
421-
inner_attr_ranges: Default::default(),
450+
parser_replacements: Vec::new(),
451+
inner_attr_parser_ranges: Default::default(),
422452
},
423453
current_closure: None,
424454
recovery: Recovery::Allowed,

0 commit comments

Comments
 (0)