Skip to content

Commit 10fa210

Browse files
committed
Auto merge of rust-lang#131650 - saethlin:post-mono-mir-opts, r=<try>
Add post-mono MIR optimizations Before this PR, all MIR passes had to operate on polymorphic MIR. Thus any MIR transform maybe unable to determine the type of an argument or local (because it's still generic) or it may be unable to determine which function a Call terminator is calling (because it's still generic). MIR transforms are a highly maintainable solution to a number of compiler problems, but this polymorphic limitation means that they are cannot solve some of our problems that we'd like them to; the most recent examples that come to mind are rust-lang#134082 which has extra limitations because of the polymorphic inliner, and rust-lang#139088 which is explicitly waiting for post-mono MIR passes to happen. In addition, the lack of post-mono MIR optimizations means that MIR optimizations just miss out on profitable optimizations, which are so valuable that we've added kludges like rust-lang#121421 (a MIR traversal that you better only run at mono-time). In addition, rustc_codegen_ssa is riddled with on-the-fly monomorphization and optimization; the logic for these trick that we do in codegen in my experience are hard to maintain, and I would much rather have those implemented in a MIR transform. So this PR adds a new query `codegen_mir` (the MIR for codegen, not that I like the name). I've then replaced _some_ of the kludges in rustc_codegen_ssa with `PostMono` variants of existing MIR transforms. I've also un-querified `check_mono_item` and put it at the end of the post-mono pass list. Those checks should be post-mono passes too, but I've tried to keep this PR to a reviewable size. It's easy to imagine lots of other places to use post-mono MIR opts and I want the usefulness of this to be clear while the diff is also manageable. --- This PR has a perf regression. I've hammered on the perf in a number of ways to get it down to what it is. incr-full builds suffer the most because they need to clone, intern, and cache a monomorphized copy of every MIR body. Things are mixed for every other build scenario. In almost all cases, binary sizes improve.
2 parents 5ae50d3 + 6a7b022 commit 10fa210

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+400
-659
lines changed

Cargo.lock

-1
Original file line numberDiff line numberDiff line change
@@ -4135,7 +4135,6 @@ dependencies = [
41354135
name = "rustc_monomorphize"
41364136
version = "0.0.0"
41374137
dependencies = [
4138-
"rustc_abi",
41394138
"rustc_ast",
41404139
"rustc_attr_parsing",
41414140
"rustc_data_structures",

compiler/rustc_codegen_cranelift/src/base.rs

+1-10
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ pub(crate) fn codegen_fn<'tcx>(
4343
let symbol_name = tcx.symbol_name(instance).name.to_string();
4444
let _timer = tcx.prof.generic_activity_with_arg("codegen fn", &*symbol_name);
4545

46-
let mir = tcx.instance_mir(instance.def);
46+
let mir = tcx.codegen_mir(instance);
4747
let _mir_guard = crate::PrintOnPanic(|| {
4848
let mut buf = Vec::new();
4949
with_no_trimmed_paths!({
@@ -305,19 +305,10 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
305305
.generic_activity("codegen prelude")
306306
.run(|| crate::abi::codegen_fn_prelude(fx, start_block));
307307

308-
let reachable_blocks = traversal::mono_reachable_as_bitset(fx.mir, fx.tcx, fx.instance);
309-
310308
for (bb, bb_data) in fx.mir.basic_blocks.iter_enumerated() {
311309
let block = fx.get_block(bb);
312310
fx.bcx.switch_to_block(block);
313311

314-
if !reachable_blocks.contains(bb) {
315-
// We want to skip this block, because it's not reachable. But we still create
316-
// the block so terminators in other blocks can reference it.
317-
fx.bcx.ins().trap(TrapCode::user(1 /* unreachable */).unwrap());
318-
continue;
319-
}
320-
321312
if bb_data.is_cleanup {
322313
// Unwinding after panicking is not supported
323314
continue;

compiler/rustc_codegen_ssa/src/base.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ pub(crate) fn codegen_instance<'a, 'tcx: 'a, Bx: BuilderMethods<'a, 'tcx>>(
414414
// release builds.
415415
info!("codegen_instance({})", instance);
416416

417-
mir::codegen_mir::<Bx>(cx, instance);
417+
mir::lower_mir::<Bx>(cx, instance);
418418
}
419419

420420
/// Creates the `main` function which will initialize the rust runtime and call

compiler/rustc_codegen_ssa/src/mir/block.rs

-10
Original file line numberDiff line numberDiff line change
@@ -1318,16 +1318,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
13181318
}
13191319
}
13201320

1321-
pub(crate) fn codegen_block_as_unreachable(&mut self, bb: mir::BasicBlock) {
1322-
let llbb = match self.try_llbb(bb) {
1323-
Some(llbb) => llbb,
1324-
None => return,
1325-
};
1326-
let bx = &mut Bx::build(self.cx, llbb);
1327-
debug!("codegen_block_as_unreachable({:?})", bb);
1328-
bx.unreachable();
1329-
}
1330-
13311321
fn codegen_terminator(
13321322
&mut self,
13331323
bx: &mut Bx,

compiler/rustc_codegen_ssa/src/mir/mod.rs

+5-20
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
126126
where
127127
T: Copy + TypeFoldable<TyCtxt<'tcx>>,
128128
{
129-
debug!("monomorphize: self.instance={:?}", self.instance);
130-
self.instance.instantiate_mir_and_normalize_erasing_regions(
131-
self.cx.tcx(),
132-
self.cx.typing_env(),
133-
ty::EarlyBinder::bind(value),
134-
)
129+
value
135130
}
136131
}
137132

@@ -164,7 +159,7 @@ impl<'tcx, V: CodegenObject> LocalRef<'tcx, V> {
164159
///////////////////////////////////////////////////////////////////////////
165160

166161
#[instrument(level = "debug", skip(cx))]
167-
pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
162+
pub fn lower_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
168163
cx: &'a Bx::CodegenCx,
169164
instance: Instance<'tcx>,
170165
) {
@@ -173,7 +168,7 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
173168
let tcx = cx.tcx();
174169
let llfn = cx.get_fn(instance);
175170

176-
let mut mir = tcx.instance_mir(instance.def);
171+
let mut mir = tcx.codegen_mir(instance);
177172

178173
let fn_abi = cx.fn_abi_of_instance(instance, ty::List::empty());
179174
debug!("fn_abi: {:?}", fn_abi);
@@ -243,7 +238,8 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
243238
fx.compute_per_local_var_debug_info(&mut start_bx).unzip();
244239
fx.per_local_var_debug_info = per_local_var_debug_info;
245240

246-
let traversal_order = traversal::mono_reachable_reverse_postorder(mir, tcx, instance);
241+
let traversal_order: Vec<_> =
242+
traversal::reverse_postorder(mir).map(|(block, _data)| block).collect();
247243
let memory_locals = analyze::non_ssa_locals(&fx, &traversal_order);
248244

249245
// Allocate variable and temp allocas
@@ -303,20 +299,9 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
303299
// So drop the builder of `start_llbb` to avoid having two at the same time.
304300
drop(start_bx);
305301

306-
let mut unreached_blocks = DenseBitSet::new_filled(mir.basic_blocks.len());
307302
// Codegen the body of each reachable block using our reverse postorder list.
308303
for bb in traversal_order {
309304
fx.codegen_block(bb);
310-
unreached_blocks.remove(bb);
311-
}
312-
313-
// FIXME: These empty unreachable blocks are *mostly* a waste. They are occasionally
314-
// targets for a SwitchInt terminator, but the reimplementation of the mono-reachable
315-
// simplification in SwitchInt lowering sometimes misses cases that
316-
// mono_reachable_reverse_postorder manages to figure out.
317-
// The solution is to do something like post-mono GVN. But for now we have this hack.
318-
for bb in unreached_blocks.iter() {
319-
fx.codegen_block_as_unreachable(bb);
320305
}
321306
}
322307

compiler/rustc_middle/src/mir/basic_blocks.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ impl<'tcx> BasicBlocks<'tcx> {
8080
#[inline]
8181
pub fn reverse_postorder(&self) -> &[BasicBlock] {
8282
self.cache.reverse_postorder.get_or_init(|| {
83-
let mut rpo: Vec<_> = Postorder::new(&self.basic_blocks, START_BLOCK, None).collect();
83+
let mut rpo: Vec<_> = Postorder::new(&self.basic_blocks, START_BLOCK).collect();
8484
rpo.reverse();
8585
rpo
8686
})

compiler/rustc_middle/src/mir/mod.rs

+2-83
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ use crate::mir::interpret::{AllocRange, Scalar};
3333
use crate::ty::codec::{TyDecoder, TyEncoder};
3434
use crate::ty::print::{FmtPrinter, Printer, pretty_print_const, with_no_trimmed_paths};
3535
use crate::ty::{
36-
self, GenericArg, GenericArgsRef, Instance, InstanceKind, List, Ty, TyCtxt, TypeVisitableExt,
37-
TypingEnv, UserTypeAnnotationIndex,
36+
self, GenericArg, GenericArgsRef, InstanceKind, List, Ty, TyCtxt, TypeVisitableExt, TypingEnv,
37+
UserTypeAnnotationIndex,
3838
};
3939

4040
mod basic_blocks;
@@ -605,74 +605,6 @@ impl<'tcx> Body<'tcx> {
605605
self.injection_phase.is_some()
606606
}
607607

608-
/// If this basic block ends with a [`TerminatorKind::SwitchInt`] for which we can evaluate the
609-
/// discriminant in monomorphization, we return the discriminant bits and the
610-
/// [`SwitchTargets`], just so the caller doesn't also have to match on the terminator.
611-
fn try_const_mono_switchint<'a>(
612-
tcx: TyCtxt<'tcx>,
613-
instance: Instance<'tcx>,
614-
block: &'a BasicBlockData<'tcx>,
615-
) -> Option<(u128, &'a SwitchTargets)> {
616-
// There are two places here we need to evaluate a constant.
617-
let eval_mono_const = |constant: &ConstOperand<'tcx>| {
618-
// FIXME(#132279): what is this, why are we using an empty environment here.
619-
let typing_env = ty::TypingEnv::fully_monomorphized();
620-
let mono_literal = instance.instantiate_mir_and_normalize_erasing_regions(
621-
tcx,
622-
typing_env,
623-
crate::ty::EarlyBinder::bind(constant.const_),
624-
);
625-
mono_literal.try_eval_bits(tcx, typing_env)
626-
};
627-
628-
let TerminatorKind::SwitchInt { discr, targets } = &block.terminator().kind else {
629-
return None;
630-
};
631-
632-
// If this is a SwitchInt(const _), then we can just evaluate the constant and return.
633-
let discr = match discr {
634-
Operand::Constant(constant) => {
635-
let bits = eval_mono_const(constant)?;
636-
return Some((bits, targets));
637-
}
638-
Operand::Move(place) | Operand::Copy(place) => place,
639-
};
640-
641-
// MIR for `if false` actually looks like this:
642-
// _1 = const _
643-
// SwitchInt(_1)
644-
//
645-
// And MIR for if intrinsics::ub_checks() looks like this:
646-
// _1 = UbChecks()
647-
// SwitchInt(_1)
648-
//
649-
// So we're going to try to recognize this pattern.
650-
//
651-
// If we have a SwitchInt on a non-const place, we find the most recent statement that
652-
// isn't a storage marker. If that statement is an assignment of a const to our
653-
// discriminant place, we evaluate and return the const, as if we've const-propagated it
654-
// into the SwitchInt.
655-
656-
let last_stmt = block.statements.iter().rev().find(|stmt| {
657-
!matches!(stmt.kind, StatementKind::StorageDead(_) | StatementKind::StorageLive(_))
658-
})?;
659-
660-
let (place, rvalue) = last_stmt.kind.as_assign()?;
661-
662-
if discr != place {
663-
return None;
664-
}
665-
666-
match rvalue {
667-
Rvalue::NullaryOp(NullOp::UbChecks, _) => Some((tcx.sess.ub_checks() as u128, targets)),
668-
Rvalue::Use(Operand::Constant(constant)) => {
669-
let bits = eval_mono_const(constant)?;
670-
Some((bits, targets))
671-
}
672-
_ => None,
673-
}
674-
}
675-
676608
/// For a `Location` in this scope, determine what the "caller location" at that point is. This
677609
/// is interesting because of inlining: the `#[track_caller]` attribute of inlined functions
678610
/// must be honored. Falls back to the `tracked_caller` value for `#[track_caller]` functions,
@@ -1353,19 +1285,6 @@ impl<'tcx> BasicBlockData<'tcx> {
13531285
pub fn is_empty_unreachable(&self) -> bool {
13541286
self.statements.is_empty() && matches!(self.terminator().kind, TerminatorKind::Unreachable)
13551287
}
1356-
1357-
/// Like [`Terminator::successors`] but tries to use information available from the [`Instance`]
1358-
/// to skip successors like the `false` side of an `if const {`.
1359-
///
1360-
/// This is used to implement [`traversal::mono_reachable`] and
1361-
/// [`traversal::mono_reachable_reverse_postorder`].
1362-
pub fn mono_successors(&self, tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Successors<'_> {
1363-
if let Some((bits, targets)) = Body::try_const_mono_switchint(tcx, instance, self) {
1364-
targets.successors_for_value(bits)
1365-
} else {
1366-
self.terminator().successors()
1367-
}
1368-
}
13691288
}
13701289

13711290
///////////////////////////////////////////////////////////////////////////

compiler/rustc_middle/src/mir/traversal.rs

+1-111
Original file line numberDiff line numberDiff line change
@@ -98,21 +98,17 @@ pub struct Postorder<'a, 'tcx> {
9898
basic_blocks: &'a IndexSlice<BasicBlock, BasicBlockData<'tcx>>,
9999
visited: DenseBitSet<BasicBlock>,
100100
visit_stack: Vec<(BasicBlock, Successors<'a>)>,
101-
/// A non-empty `extra` allows for a precise calculation of the successors.
102-
extra: Option<(TyCtxt<'tcx>, Instance<'tcx>)>,
103101
}
104102

105103
impl<'a, 'tcx> Postorder<'a, 'tcx> {
106104
pub fn new(
107105
basic_blocks: &'a IndexSlice<BasicBlock, BasicBlockData<'tcx>>,
108106
root: BasicBlock,
109-
extra: Option<(TyCtxt<'tcx>, Instance<'tcx>)>,
110107
) -> Postorder<'a, 'tcx> {
111108
let mut po = Postorder {
112109
basic_blocks,
113110
visited: DenseBitSet::new_empty(basic_blocks.len()),
114111
visit_stack: Vec::new(),
115-
extra,
116112
};
117113

118114
po.visit(root);
@@ -126,11 +122,7 @@ impl<'a, 'tcx> Postorder<'a, 'tcx> {
126122
return;
127123
}
128124
let data = &self.basic_blocks[bb];
129-
let successors = if let Some(extra) = self.extra {
130-
data.mono_successors(extra.0, extra.1)
131-
} else {
132-
data.terminator().successors()
133-
};
125+
let successors = data.terminator().successors();
134126
self.visit_stack.push((bb, successors));
135127
}
136128

@@ -225,20 +217,6 @@ pub fn postorder<'a, 'tcx>(
225217
reverse_postorder(body).rev()
226218
}
227219

228-
pub fn mono_reachable_reverse_postorder<'a, 'tcx>(
229-
body: &'a Body<'tcx>,
230-
tcx: TyCtxt<'tcx>,
231-
instance: Instance<'tcx>,
232-
) -> Vec<BasicBlock> {
233-
let mut iter = Postorder::new(&body.basic_blocks, START_BLOCK, Some((tcx, instance)));
234-
let mut items = Vec::with_capacity(body.basic_blocks.len());
235-
while let Some(block) = iter.next() {
236-
items.push(block);
237-
}
238-
items.reverse();
239-
items
240-
}
241-
242220
/// Returns an iterator over all basic blocks reachable from the `START_BLOCK` in no particular
243221
/// order.
244222
///
@@ -286,91 +264,3 @@ pub fn reverse_postorder<'a, 'tcx>(
286264
{
287265
body.basic_blocks.reverse_postorder().iter().map(|&bb| (bb, &body.basic_blocks[bb]))
288266
}
289-
290-
/// Traversal of a [`Body`] that tries to avoid unreachable blocks in a monomorphized [`Instance`].
291-
///
292-
/// This is allowed to have false positives; blocks may be visited even if they are not actually
293-
/// reachable.
294-
///
295-
/// Such a traversal is mostly useful because it lets us skip lowering the `false` side
296-
/// of `if <T as Trait>::CONST`, as well as [`NullOp::UbChecks`].
297-
///
298-
/// [`NullOp::UbChecks`]: rustc_middle::mir::NullOp::UbChecks
299-
pub fn mono_reachable<'a, 'tcx>(
300-
body: &'a Body<'tcx>,
301-
tcx: TyCtxt<'tcx>,
302-
instance: Instance<'tcx>,
303-
) -> MonoReachable<'a, 'tcx> {
304-
MonoReachable::new(body, tcx, instance)
305-
}
306-
307-
/// [`MonoReachable`] internally accumulates a [`DenseBitSet`] of visited blocks. This is just a
308-
/// convenience function to run that traversal then extract its set of reached blocks.
309-
pub fn mono_reachable_as_bitset<'a, 'tcx>(
310-
body: &'a Body<'tcx>,
311-
tcx: TyCtxt<'tcx>,
312-
instance: Instance<'tcx>,
313-
) -> DenseBitSet<BasicBlock> {
314-
let mut iter = mono_reachable(body, tcx, instance);
315-
while let Some(_) = iter.next() {}
316-
iter.visited
317-
}
318-
319-
pub struct MonoReachable<'a, 'tcx> {
320-
body: &'a Body<'tcx>,
321-
tcx: TyCtxt<'tcx>,
322-
instance: Instance<'tcx>,
323-
visited: DenseBitSet<BasicBlock>,
324-
// Other traversers track their worklist in a Vec. But we don't care about order, so we can
325-
// store ours in a DenseBitSet and thus save allocations because DenseBitSet has a small size
326-
// optimization.
327-
worklist: DenseBitSet<BasicBlock>,
328-
}
329-
330-
impl<'a, 'tcx> MonoReachable<'a, 'tcx> {
331-
pub fn new(
332-
body: &'a Body<'tcx>,
333-
tcx: TyCtxt<'tcx>,
334-
instance: Instance<'tcx>,
335-
) -> MonoReachable<'a, 'tcx> {
336-
let mut worklist = DenseBitSet::new_empty(body.basic_blocks.len());
337-
worklist.insert(START_BLOCK);
338-
MonoReachable {
339-
body,
340-
tcx,
341-
instance,
342-
visited: DenseBitSet::new_empty(body.basic_blocks.len()),
343-
worklist,
344-
}
345-
}
346-
347-
fn add_work(&mut self, blocks: impl IntoIterator<Item = BasicBlock>) {
348-
for block in blocks.into_iter() {
349-
if !self.visited.contains(block) {
350-
self.worklist.insert(block);
351-
}
352-
}
353-
}
354-
}
355-
356-
impl<'a, 'tcx> Iterator for MonoReachable<'a, 'tcx> {
357-
type Item = (BasicBlock, &'a BasicBlockData<'tcx>);
358-
359-
fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> {
360-
while let Some(idx) = self.worklist.iter().next() {
361-
self.worklist.remove(idx);
362-
if !self.visited.insert(idx) {
363-
continue;
364-
}
365-
366-
let data = &self.body[idx];
367-
368-
let targets = data.mono_successors(self.tcx, self.instance);
369-
self.add_work(targets);
370-
371-
return Some((idx, data));
372-
}
373-
374-
None
375-
}
376-
}

0 commit comments

Comments
 (0)