Skip to content

Commit 524d21b

Browse files
committed
Overhaul how matches are recorded.
Currently, matches within a sequence are recorded in a new empty `matches` vector. Then when the sequence finishes the matches are merged into the `matches` vector of the parent. This commit changes things so that a sequence mp inherits the matches made so far. This means that additional matches from the sequence don't need to be merged into the parent. `push_match` becomes more complicated, and the current sequence depth needs to be tracked. But it's a sizeable performance win because it avoids one or more `push_match` calls on every iteration of a sequence. The commit also removes `match_hi`, which is no longer necessary.
1 parent a1b140c commit 524d21b

File tree

1 file changed

+55
-48
lines changed

1 file changed

+55
-48
lines changed

compiler/rustc_expand/src/mbe/macro_parser.rs

+55-48
Original file line numberDiff line numberDiff line change
@@ -125,33 +125,23 @@ struct MatcherPos<'tt> {
125125
/// The "dot" position within the current submatcher, i.e. the index into `tts`.
126126
idx: usize,
127127

128-
/// This boxed slice has one element per metavar in the *top-level* matcher, even when this
128+
/// This vector ends up with one element per metavar in the *top-level* matcher, even when this
129129
/// `MatcherPos` is for a submatcher. Each element records token trees matched against the
130-
/// relevant metavar by the black box parser.
131-
///
132-
/// In a top-level `MatcherPos` each `NamedMatchVec` will have zero elements before processing
133-
/// and one element after processing; the one element will be a `MatchedSeq` if the
130+
/// relevant metavar by the black box parser. The element will be a `MatchedSeq` if the
134131
/// corresponding metavar is within a sequence.
135-
///
136-
/// In a sequence submatcher each `NamedMatchVec` will have zero elements before processing and
137-
/// any number of elements after processing (as allowed by the sequence's Kleene op, i.e.
138-
/// zero-or-one, zero-or-more, one-or-more). After processing these elements will be merged
139-
/// into the parent `MatcherPos`'s matches (within a `MatchedSeq`).
140-
matches: Box<[Lrc<NamedMatchVec>]>,
132+
matches: Lrc<NamedMatchVec>,
133+
134+
/// The number of sequences this mp is within.
135+
seq_depth: usize,
141136

142137
/// The position in `matches` of the first metavar in this (sub)matcher. Zero if there are
143138
/// no metavars.
144139
match_lo: usize,
145140

146141
/// The position in `matches` of the next metavar to be matched against the source token
147-
/// stream. `match_lo <= match_cur <= match_hi`. Should not be used if there are no metavars,
148-
/// i.e. `match_lo == match_hi`.
142+
/// stream. Should not be used if there are no metavars.
149143
match_cur: usize,
150144

151-
/// The position in `matches` one past the last metavar in this (sub)matcher. Equal to
152-
/// `match_lo` if there are not metavars.
153-
match_hi: usize,
154-
155145
/// This field is only used if we are matching a sequence.
156146
sequence: Option<MatcherPosSequence<'tt>>,
157147

@@ -162,50 +152,73 @@ struct MatcherPos<'tt> {
162152

163153
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
164154
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
165-
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 112);
155+
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 104);
166156

167157
impl<'tt> MatcherPos<'tt> {
168-
fn empty_matches(len: usize) -> Box<[Lrc<NamedMatchVec>]> {
169-
if len == 0 {
170-
vec![]
171-
} else {
172-
let empty_matches = Lrc::new(SmallVec::new());
173-
vec![empty_matches; len]
174-
}
175-
.into_boxed_slice()
176-
}
177-
178158
fn top_level(matcher: &'tt [TokenTree]) -> Self {
179-
let match_idx_hi = count_metavar_decls(matcher);
180159
MatcherPos {
181160
tts: matcher,
182161
idx: 0,
183-
matches: Self::empty_matches(match_idx_hi),
162+
matches: Lrc::new(smallvec![]),
163+
seq_depth: 0,
184164
match_lo: 0,
185165
match_cur: 0,
186-
match_hi: match_idx_hi,
187166
stack: smallvec![],
188167
sequence: None,
189168
}
190169
}
191170

192171
fn sequence(parent: Box<MatcherPos<'tt>>, seq: &'tt SequenceRepetition) -> Self {
193-
MatcherPos {
172+
let mut mp = MatcherPos {
194173
tts: &seq.tts,
195174
idx: 0,
196-
matches: Self::empty_matches(parent.matches.len()),
175+
matches: parent.matches.clone(),
176+
seq_depth: parent.seq_depth,
197177
match_lo: parent.match_cur,
198178
match_cur: parent.match_cur,
199-
match_hi: parent.match_cur + seq.num_captures,
200179
sequence: Some(MatcherPosSequence { parent, seq }),
201180
stack: smallvec![],
181+
};
182+
// Start with an empty vec for each metavar within the sequence. Note that `mp.seq_depth`
183+
// must have the parent's depth at this point for these `push_match` calls to work.
184+
for idx in mp.match_lo..mp.match_lo + seq.num_captures {
185+
mp.push_match(idx, MatchedSeq(Lrc::new(smallvec![])));
202186
}
187+
mp.seq_depth += 1;
188+
mp
203189
}
204190

205191
/// Adds `m` as a named match for the `idx`-th metavar.
206192
fn push_match(&mut self, idx: usize, m: NamedMatch) {
207-
let matches = Lrc::make_mut(&mut self.matches[idx]);
208-
matches.push(m);
193+
let matches = Lrc::make_mut(&mut self.matches);
194+
match self.seq_depth {
195+
0 => {
196+
// We are not within a sequence. Just append `m`.
197+
assert_eq!(idx, matches.len());
198+
matches.push(m);
199+
}
200+
_ => {
201+
// We are within a sequence. Find the final `MatchedSeq` at the appropriate depth
202+
// and append `m` to its vector.
203+
let mut curr = &mut matches[idx];
204+
for _ in 0..self.seq_depth - 1 {
205+
match curr {
206+
MatchedSeq(seq) => {
207+
let seq = Lrc::make_mut(seq);
208+
curr = seq.last_mut().unwrap();
209+
}
210+
_ => unreachable!(),
211+
}
212+
}
213+
match curr {
214+
MatchedSeq(seq) => {
215+
let seq = Lrc::make_mut(seq);
216+
seq.push(m);
217+
}
218+
_ => unreachable!(),
219+
}
220+
}
221+
}
209222
}
210223
}
211224

@@ -528,11 +541,8 @@ impl<'tt> TtParser<'tt> {
528541
// sequence in `parent`. This allows for the case where the sequence matching
529542
// is finished.
530543
let mut new_mp = sequence.parent.clone();
531-
for idx in mp.match_lo..mp.match_hi {
532-
let sub = mp.matches[idx].clone();
533-
new_mp.push_match(idx, MatchedSeq(sub));
534-
}
535-
new_mp.match_cur = mp.match_hi;
544+
new_mp.matches = mp.matches.clone();
545+
new_mp.match_cur = mp.match_lo + sequence.seq.num_captures;
536546
new_mp.idx += 1;
537547
self.cur_mps.push(new_mp);
538548
}
@@ -577,13 +587,10 @@ impl<'tt> TtParser<'tt> {
577587
if *token == token::Eof {
578588
Some(match eof_mps {
579589
EofMatcherPositions::One(mut eof_mp) => {
580-
let matches = eof_mp.matches.iter_mut().map(|dv| {
581-
// Top-level metavars only ever get one match. (Sub-matchers can get
582-
// multiple matches, which get aggregated into a `MatcherSeq` before being
583-
// put into the top-level.)
584-
debug_assert_eq!(dv.len(), 1);
585-
Lrc::make_mut(dv).pop().unwrap()
586-
});
590+
assert_eq!(eof_mp.matches.len(), count_metavar_decls(matcher));
591+
// Need to take ownership of the matches from within the `Lrc`.
592+
Lrc::make_mut(&mut eof_mp.matches);
593+
let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter();
587594
nameize(sess, matcher, matches)
588595
}
589596
EofMatcherPositions::Multiple => {

0 commit comments

Comments
 (0)