@@ -17,12 +17,12 @@ use std::{iter, mem};
17
17
///
18
18
/// This wrapper prevents direct access to the underlying `ast::AttrVec`.
19
19
/// Parsing code can only get access to the underlying attributes
20
- /// by passing an `AttrWrapper` to `collect_tokens_trailing_tokens `.
20
+ /// by passing an `AttrWrapper` to `collect_tokens_trailing_token `.
21
21
/// This makes it difficult to accidentally construct an AST node
22
22
/// (which stores an `ast::AttrVec`) without first collecting tokens.
23
23
///
24
24
/// This struct has its own module, to ensure that the parser code
25
- /// cannot directly access the `attrs` field
25
+ /// cannot directly access the `attrs` field.
26
26
#[ derive( Debug , Clone ) ]
27
27
pub struct AttrWrapper {
28
28
attrs : AttrVec ,
@@ -76,14 +76,13 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
76
76
} )
77
77
}
78
78
79
- // Produces a `TokenStream` on-demand. Using `cursor_snapshot`
80
- // and `num_calls`, we can reconstruct the `TokenStream` seen
81
- // by the callback. This allows us to avoid producing a `TokenStream`
82
- // if it is never needed - for example, a captured `macro_rules!`
83
- // argument that is never passed to a proc macro.
84
- // In practice token stream creation happens rarely compared to
85
- // calls to `collect_tokens` (see some statistics in #78736),
86
- // so we are doing as little up-front work as possible.
79
+ // From a value of this type we can reconstruct the `TokenStream` seen by the
80
+ // `f` callback passed to a call to `Parser::collect_tokens_trailing_token`, by
81
+ // replaying the getting of the tokens. This saves us producing a `TokenStream`
82
+ // if it is never needed, e.g. a captured `macro_rules!` argument that is never
83
+ // passed to a proc macro. In practice, token stream creation happens rarely
84
+ // compared to calls to `collect_tokens` (see some statistics in #78736) so we
85
+ // are doing as little up-front work as possible.
87
86
//
88
87
// This also makes `Parser` very cheap to clone, since
89
88
// there is no intermediate collection buffer to clone.
@@ -163,46 +162,55 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
163
162
}
164
163
165
164
impl < ' a > Parser < ' a > {
166
- /// Records all tokens consumed by the provided callback,
167
- /// including the current token. These tokens are collected
168
- /// into a `LazyAttrTokenStream`, and returned along with the first part of
169
- /// the callback's result. The second (bool) part of the callback's result
170
- /// indicates if an extra token should be captured, e.g. a comma or
165
+ /// Parses code with `f`. If appropriate, it records the tokens (in
166
+ /// `LazyAttrTokenStream` form) that were parsed in the result, accessible
167
+ /// via the `HasTokens` trait. The second (bool) part of the callback's
168
+ /// result indicates if an extra token should be captured, e.g. a comma or
171
169
/// semicolon.
172
170
///
173
171
/// The `attrs` passed in are in `AttrWrapper` form, which is opaque. The
174
172
/// `AttrVec` within is passed to `f`. See the comment on `AttrWrapper` for
175
173
/// details.
176
174
///
177
- /// Note: If your callback consumes an opening delimiter
178
- /// (including the case where you call `collect_tokens`
179
- /// when the current token is an opening delimiter),
180
- /// you must also consume the corresponding closing delimiter.
175
+ /// Note: If your callback consumes an opening delimiter (including the
176
+ /// case where `self.token` is an opening delimiter on entry to this
177
+ /// function), you must also consume the corresponding closing delimiter.
178
+ /// E.g. you can consume `something ([{ }])` or `([{}])`, but not `([{}]`.
179
+ /// This restriction isn't a problem in practice, because parsed AST items
180
+ /// always have matching delimiters.
181
181
///
182
- /// That is, you can consume
183
- /// `something ([{ }])` or `([{}])`, but not `([{}]`
184
- ///
185
- /// This restriction shouldn't be an issue in practice,
186
- /// since this function is used to record the tokens for
187
- /// a parsed AST item, which always has matching delimiters.
182
+ /// The following example code will be used to explain things in comments
183
+ /// below. It has an outer attribute and an inner attribute. Parsing it
184
+ /// involves two calls to this method, one of which is indirectly
185
+ /// recursive.
186
+ /// ```ignore (fake attributes)
187
+ /// #[cfg_eval] // token pos
188
+ /// mod m { // 0.. 3
189
+ /// #[cfg_attr(cond1, attr1)] // 3..12
190
+ /// fn g() { // 12..17
191
+ /// #![cfg_attr(cond2, attr2)] // 17..27
192
+ /// let _x = 3; // 27..32
193
+ /// } // 32..33
194
+ /// } // 33..34
195
+ /// ```
188
196
pub fn collect_tokens_trailing_token < R : HasAttrs + HasTokens > (
189
197
& mut self ,
190
198
attrs : AttrWrapper ,
191
199
force_collect : ForceCollect ,
192
200
f : impl FnOnce ( & mut Self , ast:: AttrVec ) -> PResult < ' a , ( R , bool ) > ,
193
201
) -> PResult < ' a , R > {
194
- // We only bail out when nothing could possibly observe the collected tokens:
195
- // 1. We cannot be force collecting tokens (since force-collecting requires tokens
196
- // by definition
202
+ // Skip collection when nothing could observe the collected tokens, i.e.
203
+ // all of the following conditions hold.
204
+ // - We are not force collecting tokens (because force collection
205
+ // requires tokens by definition).
197
206
if matches ! ( force_collect, ForceCollect :: No )
198
- // None of our outer attributes can require tokens (e.g. a proc-macro)
207
+ // - None of our outer attributes require tokens.
199
208
&& attrs. is_complete ( )
200
- // If our target supports custom inner attributes, then we cannot bail
201
- // out early, since we may need to capture tokens for a custom inner attribute
202
- // invocation.
209
+ // - Our target doesn't support custom inner attributes (custom
210
+ // inner attribute invocation might require token capturing).
203
211
&& !R :: SUPPORTS_CUSTOM_INNER_ATTRS
204
- // Never bail out early in `capture_cfg` mode, since there might be `#[cfg]`
205
- // or `#[cfg_attr]` attributes.
212
+ // - We are not in `capture_cfg` mode (which requires tokens if
213
+ // the parsed node has `#[cfg]` or `#[cfg_attr]` attributes) .
206
214
&& !self . capture_cfg
207
215
{
208
216
return Ok ( f ( self , attrs. attrs ) ?. 0 ) ;
@@ -214,58 +222,59 @@ impl<'a> Parser<'a> {
214
222
let has_outer_attrs = !attrs. attrs . is_empty ( ) ;
215
223
let replace_ranges_start = self . capture_state . replace_ranges . len ( ) ;
216
224
225
+ // We set and restore `Capturing::Yes` on either side of the call to
226
+ // `f`, so we can distinguish the outermost call to
227
+ // `collect_tokens_trailing_token` (e.g. parsing `m` in the example
228
+ // above) from any inner (indirectly recursive) calls (e.g. parsing `g`
229
+ // in the example above). This distinction is used below and in
230
+ // `Parser::parse_inner_attributes`.
217
231
let ( mut ret, capture_trailing) = {
218
232
let prev_capturing = mem:: replace ( & mut self . capture_state . capturing , Capturing :: Yes ) ;
219
233
let ret_and_trailing = f ( self , attrs. attrs ) ;
220
234
self . capture_state . capturing = prev_capturing;
221
235
ret_and_trailing?
222
236
} ;
223
237
224
- // When we're not in `capture-cfg` mode, then bail out early if:
225
- // 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`)
226
- // so there's nothing for us to do.
227
- // 2. Our target already has tokens set (e.g. we've parsed something
228
- // like `#[my_attr] $item`). The actual parsing code takes care of
229
- // prepending any attributes to the nonterminal, so we don't need to
230
- // modify the already captured tokens.
231
- // Note that this check is independent of `force_collect`- if we already
232
- // have tokens, or can't even store them, then there's never a need to
233
- // force collection of new tokens.
238
+ // When we're not in `capture_cfg` mode, then skip collecting and
239
+ // return early if either of the following conditions hold.
240
+ // - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`).
241
+ // - `Some(Some(_))`: Our target already has tokens set (e.g. we've
242
+ // parsed something like `#[my_attr] $item`). The actual parsing code
243
+ // takes care of prepending any attributes to the nonterminal, so we
244
+ // don't need to modify the already captured tokens.
245
+ //
246
+ // Note that this check is independent of `force_collect`. There's no
247
+ // need to collect tokens when we don't support tokens or already have
248
+ // tokens.
234
249
if !self . capture_cfg && matches ! ( ret. tokens_mut( ) , None | Some ( Some ( _) ) ) {
235
250
return Ok ( ret) ;
236
251
}
237
252
238
- // This is very similar to the bail out check at the start of this function.
239
- // Now that we've parsed an AST node, we have more information available.
253
+ // This is similar to the "skip collection" check at the start of this
254
+ // function, but now that we've parsed an AST node we have more
255
+ // information available. (If we return early here that means the
256
+ // setup, such as cloning the token cursor, was unnecessary. That's
257
+ // hard to avoid.)
258
+ //
259
+ // Skip collection when nothing could observe the collected tokens, i.e.
260
+ // all of the following conditions hold.
261
+ // - We are not force collecting tokens.
240
262
if matches ! ( force_collect, ForceCollect :: No )
241
- // We now have inner attributes available, so this check is more precise
242
- // than `attrs.is_complete()` at the start of the function.
243
- // As a result, we don't need to check `R::SUPPORTS_CUSTOM_INNER_ATTRS`
263
+ // - None of our outer *or* inner attributes require tokens.
264
+ // (`attrs` was just outer attributes, but `ret.attrs()` is outer
265
+ // and inner attributes. That makes this check more precise than
266
+ // `attrs.is_complete()` at the start of the function, and we can
267
+ // skip the subsequent check on `R::SUPPORTS_CUSTOM_INNER_ATTRS`.
244
268
&& crate :: parser:: attr:: is_complete ( ret. attrs ( ) )
245
- // Subtle: We call `has_cfg_or_cfg_attr` with the attrs from `ret`.
246
- // This ensures that we consider inner attributes (e.g. `#![cfg]`),
247
- // which require us to have tokens available
248
- // We also call `has_cfg_or_cfg_attr` at the beginning of this function,
249
- // but we only bail out if there's no possibility of inner attributes
250
- // (!R::SUPPORTS_CUSTOM_INNER_ATTRS)
251
- // We only capture about `#[cfg]` or `#[cfg_attr]` in `capture_cfg`
252
- // mode - during normal parsing, we don't need any special capturing
253
- // for those attributes, since they're builtin.
254
- && !( self . capture_cfg && has_cfg_or_cfg_attr ( ret. attrs ( ) ) )
269
+ // - We are not in `capture_cfg` mode, or we are but there are no
270
+ // `#[cfg]` or `#[cfg_attr]` attributes. (During normal
271
+ // non-`capture_cfg` parsing, we don't need any special capturing
272
+ // for those attributes, because they're builtin.)
273
+ && ( !self . capture_cfg || !has_cfg_or_cfg_attr ( ret. attrs ( ) ) )
255
274
{
256
275
return Ok ( ret) ;
257
276
}
258
277
259
- let mut inner_attr_replace_ranges = Vec :: new ( ) ;
260
- // Take the captured ranges for any inner attributes that we parsed.
261
- for inner_attr in ret. attrs ( ) . iter ( ) . filter ( |a| a. style == ast:: AttrStyle :: Inner ) {
262
- if let Some ( attr_range) = self . capture_state . inner_attr_ranges . remove ( & inner_attr. id ) {
263
- inner_attr_replace_ranges. push ( attr_range) ;
264
- } else {
265
- self . dcx ( ) . span_delayed_bug ( inner_attr. span , "Missing token range for attribute" ) ;
266
- }
267
- }
268
-
269
278
let replace_ranges_end = self . capture_state . replace_ranges . len ( ) ;
270
279
271
280
assert ! (
@@ -283,15 +292,28 @@ impl<'a> Parser<'a> {
283
292
284
293
let num_calls = end_pos - start_pos;
285
294
295
+ // Take the captured ranges for any inner attributes that we parsed in
296
+ // `Parser::parse_inner_attributes`, and pair them in a `ReplaceRange`
297
+ // with `None`, which means the relevant tokens will be removed. (More
298
+ // details below.)
299
+ let mut inner_attr_replace_ranges = Vec :: new ( ) ;
300
+ for inner_attr in ret. attrs ( ) . iter ( ) . filter ( |a| a. style == ast:: AttrStyle :: Inner ) {
301
+ if let Some ( attr_range) = self . capture_state . inner_attr_ranges . remove ( & inner_attr. id ) {
302
+ inner_attr_replace_ranges. push ( ( attr_range, None ) ) ;
303
+ } else {
304
+ self . dcx ( ) . span_delayed_bug ( inner_attr. span , "Missing token range for attribute" ) ;
305
+ }
306
+ }
307
+
286
308
// This is hot enough for `deep-vector` that checking the conditions for an empty iterator
287
309
// is measurably faster than actually executing the iterator.
288
310
let replace_ranges: Box < [ ReplaceRange ] > =
289
311
if replace_ranges_start == replace_ranges_end && inner_attr_replace_ranges. is_empty ( ) {
290
312
Box :: new ( [ ] )
291
313
} else {
292
- // Grab any replace ranges that occur *inside* the current AST node.
293
- // We will perform the actual replacement when we convert the `LazyAttrTokenStream`
294
- // to an `AttrTokenStream`.
314
+ // Grab any replace ranges that occur *inside* the current AST node. We will
315
+ // perform the actual replacement only when we convert the `LazyAttrTokenStream` to
316
+ // an `AttrTokenStream`.
295
317
self . capture_state . replace_ranges [ replace_ranges_start..replace_ranges_end]
296
318
. iter ( )
297
319
. cloned ( )
@@ -300,6 +322,28 @@ impl<'a> Parser<'a> {
300
322
. collect ( )
301
323
} ;
302
324
325
+ // What is the status here when parsing the example code at the top of this method?
326
+ //
327
+ // When parsing `g`:
328
+ // - `start_pos..end_pos` is `12..33` (`fn g { ... }`, excluding the outer attr).
329
+ // - `inner_attr_replace_ranges` has one entry (`5..15`, when counting from `fn`), to
330
+ // delete the inner attr's tokens.
331
+ // - This entry is put into the lazy tokens for `g`, i.e. deleting the inner attr from
332
+ // those tokens (if they get evaluated).
333
+ // - Those lazy tokens are also put into an `AttrsTarget` that is appended to `self`'s
334
+ // replace ranges at the bottom of this function, for processing when parsing `m`.
335
+ // - `replace_ranges_start..replace_ranges_end` is empty.
336
+ //
337
+ // When parsing `m`:
338
+ // - `start_pos..end_pos` is `0..34` (`mod m`, excluding the `#[cfg_eval]` attribute).
339
+ // - `inner_attr_replace_ranges` is empty.
340
+ // - `replace_range_start..replace_ranges_end` has two entries.
341
+ // - One to delete the inner attribute (`17..27`), obtained when parsing `g` (see above).
342
+ // - One `AttrsTarget` (added below when parsing `g`) to replace all of `g` (`3..33`,
343
+ // including its outer attribute), with:
344
+ // - `attrs`: includes the outer and the inner attr.
345
+ // - `tokens`: lazy tokens for `g` (with its inner attr deleted).
346
+
303
347
let tokens = LazyAttrTokenStream :: new ( LazyAttrTokenStreamImpl {
304
348
start_token,
305
349
num_calls,
@@ -313,27 +357,37 @@ impl<'a> Parser<'a> {
313
357
* target_tokens = Some ( tokens. clone ( ) ) ;
314
358
}
315
359
316
- let final_attrs = ret. attrs ( ) ;
317
-
318
360
// If `capture_cfg` is set and we're inside a recursive call to
319
361
// `collect_tokens_trailing_token`, then we need to register a replace range
320
362
// if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion
321
363
// on the captured token stream.
322
364
if self . capture_cfg
323
365
&& matches ! ( self . capture_state. capturing, Capturing :: Yes )
324
- && has_cfg_or_cfg_attr ( final_attrs )
366
+ && has_cfg_or_cfg_attr ( ret . attrs ( ) )
325
367
{
326
368
assert ! ( !self . break_last_token, "Should not have unglued last token with cfg attr" ) ;
327
369
328
- // Replace the entire AST node that we just parsed, including attributes, with
329
- // `target`. If this AST node is inside an item that has `#[derive]`, then this will
330
- // allow us to cfg-expand this AST node.
370
+ // What is the status here when parsing the example code at the top of this method?
371
+ //
372
+ // When parsing `g`, we add two entries:
373
+ // - The `start_pos..end_pos` (`3..33`) entry has a new `AttrsTarget` with:
374
+ // - `attrs`: includes the outer and the inner attr.
375
+ // - `tokens`: lazy tokens for `g` (with its inner attr deleted).
376
+ // - `inner_attr_replace_ranges` contains the one entry to delete the inner attr's
377
+ // tokens (`17..27`).
378
+ //
379
+ // When parsing `m`, we do nothing here.
380
+
381
+ // Set things up so that the entire AST node that we just parsed, including attributes,
382
+ // will be replaced with `target` in the lazy token stream. This will allow us to
383
+ // cfg-expand this AST node.
331
384
let start_pos = if has_outer_attrs { attrs. start_pos } else { start_pos } ;
332
- let target = AttrsTarget { attrs : final_attrs . iter ( ) . cloned ( ) . collect ( ) , tokens } ;
385
+ let target = AttrsTarget { attrs : ret . attrs ( ) . iter ( ) . cloned ( ) . collect ( ) , tokens } ;
333
386
self . capture_state . replace_ranges . push ( ( start_pos..end_pos, Some ( target) ) ) ;
334
387
self . capture_state . replace_ranges . extend ( inner_attr_replace_ranges) ;
335
388
} else if matches ! ( self . capture_state. capturing, Capturing :: No ) {
336
- // Only clear the ranges once we've finished capturing entirely.
389
+ // Only clear the ranges once we've finished capturing entirely, i.e. we've finished
390
+ // the outermost call to this method.
337
391
self . capture_state . replace_ranges . clear ( ) ;
338
392
self . capture_state . inner_attr_ranges . clear ( ) ;
339
393
}
0 commit comments