Skip to content

Allow more top-down inlining for single-BB callees #138157

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 26 additions & 21 deletions compiler/rustc_mir_transform/src/cost_checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,29 +37,11 @@ impl<'b, 'tcx> CostChecker<'b, 'tcx> {
/// and even the full `Inline` doesn't call `visit_body`, so there's nowhere
/// to put this logic in the visitor.
pub(super) fn add_function_level_costs(&mut self) {
fn is_call_like(bbd: &BasicBlockData<'_>) -> bool {
use TerminatorKind::*;
match bbd.terminator().kind {
Call { .. } | TailCall { .. } | Drop { .. } | Assert { .. } | InlineAsm { .. } => {
true
}

Goto { .. }
| SwitchInt { .. }
| UnwindResume
| UnwindTerminate(_)
| Return
| Unreachable => false,

Yield { .. } | CoroutineDrop | FalseEdge { .. } | FalseUnwind { .. } => {
unreachable!()
}
}
}

// If the only has one Call (or similar), inlining isn't increasing the total
// number of calls, so give extra encouragement to inlining that.
if self.callee_body.basic_blocks.iter().filter(|bbd| is_call_like(bbd)).count() == 1 {
if self.callee_body.basic_blocks.iter().filter(|bbd| is_call_like(bbd.terminator())).count()
== 1
{
self.bonus += CALL_PENALTY;
}
}
Expand Down Expand Up @@ -193,3 +175,26 @@ impl<'tcx> Visitor<'tcx> for CostChecker<'_, 'tcx> {
}
}
}

/// A terminator that's more call-like (might do a bunch of work, might panic, etc)
/// than it is goto-/return-like (no side effects, etc).
///
/// Used to treat multi-call functions (which could inline exponentially)
/// different from those that only do one or none of these "complex" things.
pub(super) fn is_call_like(terminator: &Terminator<'_>) -> bool {
use TerminatorKind::*;
match terminator.kind {
Call { .. } | TailCall { .. } | Drop { .. } | Assert { .. } | InlineAsm { .. } => true,

Goto { .. }
| SwitchInt { .. }
| UnwindResume
| UnwindTerminate(_)
| Return
| Unreachable => false,

Yield { .. } | CoroutineDrop | FalseEdge { .. } | FalseUnwind { .. } => {
unreachable!()
}
}
}
94 changes: 57 additions & 37 deletions compiler/rustc_mir_transform/src/inline.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Inlining pass for MIR functions.

use std::assert_matches::debug_assert_matches;
use std::iter;
use std::ops::{Range, RangeFrom};

Expand All @@ -18,14 +19,15 @@ use rustc_session::config::{DebugInfo, OptLevel};
use rustc_span::source_map::Spanned;
use tracing::{debug, instrument, trace, trace_span};

use crate::cost_checker::CostChecker;
use crate::cost_checker::{CostChecker, is_call_like};
use crate::deref_separator::deref_finder;
use crate::simplify::simplify_cfg;
use crate::validate::validate_types;
use crate::{check_inline, util};

pub(crate) mod cycle;

const HISTORY_DEPTH_LIMIT: usize = 20;
const TOP_DOWN_DEPTH_LIMIT: usize = 5;

#[derive(Clone, Debug)]
Expand Down Expand Up @@ -117,6 +119,11 @@ trait Inliner<'tcx> {
/// Should inlining happen for a given callee?
fn should_inline_for_callee(&self, def_id: DefId) -> bool;

fn check_codegen_attributes_extra(
&self,
callee_attrs: &CodegenFnAttrs,
) -> Result<(), &'static str>;

fn check_caller_mir_body(&self, body: &Body<'tcx>) -> bool;

/// Returns inlining decision that is based on the examination of callee MIR body.
Expand All @@ -128,10 +135,6 @@ trait Inliner<'tcx> {
callee_attrs: &CodegenFnAttrs,
) -> Result<(), &'static str>;

// How many callsites in a body are we allowed to inline? We need to limit this in order
// to prevent super-linear growth in MIR size.
fn inline_limit_for_block(&self) -> Option<usize>;

/// Called when inlining succeeds.
fn on_inline_success(
&mut self,
Expand All @@ -142,9 +145,6 @@ trait Inliner<'tcx> {

/// Called when inlining failed or was not performed.
fn on_inline_failure(&self, callsite: &CallSite<'tcx>, reason: &'static str);

/// Called when the inline limit for a body is reached.
fn on_inline_limit_reached(&self) -> bool;
}

struct ForceInliner<'tcx> {
Expand Down Expand Up @@ -191,6 +191,14 @@ impl<'tcx> Inliner<'tcx> for ForceInliner<'tcx> {
ForceInline::should_run_pass_for_callee(self.tcx(), def_id)
}

fn check_codegen_attributes_extra(
&self,
callee_attrs: &CodegenFnAttrs,
) -> Result<(), &'static str> {
debug_assert_matches!(callee_attrs.inline, InlineAttr::Force { .. });
Ok(())
}

fn check_caller_mir_body(&self, _: &Body<'tcx>) -> bool {
true
}
Expand Down Expand Up @@ -224,10 +232,6 @@ impl<'tcx> Inliner<'tcx> for ForceInliner<'tcx> {
}
}

fn inline_limit_for_block(&self) -> Option<usize> {
Some(usize::MAX)
}

fn on_inline_success(
&mut self,
callsite: &CallSite<'tcx>,
Expand Down Expand Up @@ -261,10 +265,6 @@ impl<'tcx> Inliner<'tcx> for ForceInliner<'tcx> {
justification: justification.map(|sym| crate::errors::ForceInlineJustification { sym }),
});
}

fn on_inline_limit_reached(&self) -> bool {
false
}
}

struct NormalInliner<'tcx> {
Expand All @@ -278,13 +278,23 @@ struct NormalInliner<'tcx> {
/// The number of `DefId`s is finite, so checking history is enough
/// to ensure that we do not loop endlessly while inlining.
history: Vec<DefId>,
/// How many (multi-call) callsites have we inlined for the top-level call?
///
/// We need to limit this in order to prevent super-linear growth in MIR size.
top_down_counter: usize,
/// Indicates that the caller body has been modified.
changed: bool,
/// Indicates that the caller is #[inline] and just calls another function,
/// and thus we can inline less into it as it'll be inlined itself.
caller_is_inline_forwarder: bool,
}

impl<'tcx> NormalInliner<'tcx> {
fn past_depth_limit(&self) -> bool {
self.history.len() > HISTORY_DEPTH_LIMIT || self.top_down_counter > TOP_DOWN_DEPTH_LIMIT
}
}

impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
fn new(tcx: TyCtxt<'tcx>, def_id: DefId, body: &Body<'tcx>) -> Self {
let typing_env = body.typing_env(tcx);
Expand All @@ -295,6 +305,7 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
typing_env,
def_id,
history: Vec::new(),
top_down_counter: 0,
changed: false,
caller_is_inline_forwarder: matches!(
codegen_fn_attrs.inline,
Expand Down Expand Up @@ -327,6 +338,17 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
true
}

fn check_codegen_attributes_extra(
&self,
callee_attrs: &CodegenFnAttrs,
) -> Result<(), &'static str> {
if self.past_depth_limit() && matches!(callee_attrs.inline, InlineAttr::None) {
Err("Past depth limit so not inspecting unmarked callee")
} else {
Ok(())
}
}

fn check_caller_mir_body(&self, body: &Body<'tcx>) -> bool {
// Avoid inlining into coroutines, since their `optimized_mir` is used for layout computation,
// which can create a cycle, even when no attempt is made to inline the function in the other
Expand All @@ -351,7 +373,11 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
return Err("body has errors");
}

let mut threshold = if self.caller_is_inline_forwarder {
if self.past_depth_limit() && callee_body.basic_blocks.len() > 1 {
return Err("Not inlining multi-block body as we're past a depth limit");
}

let mut threshold = if self.caller_is_inline_forwarder || self.past_depth_limit() {
tcx.sess.opts.unstable_opts.inline_mir_forwarder_threshold.unwrap_or(30)
} else if tcx.cross_crate_inlinable(callsite.callee.def_id()) {
tcx.sess.opts.unstable_opts.inline_mir_hint_threshold.unwrap_or(100)
Expand Down Expand Up @@ -431,14 +457,6 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
}
}

fn inline_limit_for_block(&self) -> Option<usize> {
match self.history.len() {
0 => Some(usize::MAX),
1..=TOP_DOWN_DEPTH_LIMIT => Some(1),
_ => None,
}
}

fn on_inline_success(
&mut self,
callsite: &CallSite<'tcx>,
Expand All @@ -447,13 +465,21 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
) {
self.changed = true;

let new_calls_count = new_blocks
.clone()
.filter(|&bb| is_call_like(caller_body.basic_blocks[bb].terminator()))
.count();
if new_calls_count > 1 {
self.top_down_counter += 1;
}

self.history.push(callsite.callee.def_id());
process_blocks(self, caller_body, new_blocks);
self.history.pop();
}

fn on_inline_limit_reached(&self) -> bool {
true
if self.history.is_empty() {
self.top_down_counter = 0;
}
}

fn on_inline_failure(&self, _: &CallSite<'tcx>, _: &'static str) {}
Expand Down Expand Up @@ -482,8 +508,6 @@ fn process_blocks<'tcx, I: Inliner<'tcx>>(
caller_body: &mut Body<'tcx>,
blocks: Range<BasicBlock>,
) {
let Some(inline_limit) = inliner.inline_limit_for_block() else { return };
let mut inlined_count = 0;
for bb in blocks {
let bb_data = &caller_body[bb];
if bb_data.is_cleanup {
Expand All @@ -505,13 +529,6 @@ fn process_blocks<'tcx, I: Inliner<'tcx>>(
Ok(new_blocks) => {
debug!("inlined {}", callsite.callee);
inliner.on_inline_success(&callsite, caller_body, new_blocks);

inlined_count += 1;
if inlined_count == inline_limit {
if inliner.on_inline_limit_reached() {
return;
}
}
}
}
}
Expand Down Expand Up @@ -584,6 +601,7 @@ fn try_inlining<'tcx, I: Inliner<'tcx>>(
let callee_attrs = tcx.codegen_fn_attrs(callsite.callee.def_id());
check_inline::is_inline_valid_on_fn(tcx, callsite.callee.def_id())?;
check_codegen_attributes(inliner, callsite, callee_attrs)?;
inliner.check_codegen_attributes_extra(callee_attrs)?;

let terminator = caller_body[callsite.block].terminator.as_ref().unwrap();
let TerminatorKind::Call { args, destination, .. } = &terminator.kind else { bug!() };
Expand Down Expand Up @@ -770,6 +788,8 @@ fn check_codegen_attributes<'tcx, I: Inliner<'tcx>>(
return Err("has DoNotOptimize attribute");
}

inliner.check_codegen_attributes_extra(callee_attrs)?;

// Reachability pass defines which functions are eligible for inlining. Generally inlining
// other functions is incorrect because they could reference symbols that aren't exported.
let is_generic = callsite.callee.args.non_erasable_generics().next().is_some();
Expand Down
9 changes: 7 additions & 2 deletions library/core/src/mem/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -856,8 +856,13 @@ pub const fn replace<T>(dest: &mut T, src: T) -> T {
// such that the old value is not duplicated. Nothing is dropped and
// nothing here can panic.
unsafe {
let result = ptr::read(dest);
ptr::write(dest, src);
// Ideally we wouldn't use the intrinsics here, but going through the
// `ptr` methods introduces two unnecessary UbChecks, so until we can
// remove those for pointers that come from references, this uses the
// intrinsics instead so this stays very cheap in MIR (and debug).

let result = crate::intrinsics::read_via_copy(dest);
crate::intrinsics::write_via_move(dest, src);
result
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
error: Undefined Behavior: write access through <TAG> at ALLOC[0x0] is forbidden
--> RUSTLIB/core/src/mem/mod.rs:LL:CC
|
LL | ptr::write(dest, src);
| ^^^^^^^^^^^^^^^^^^^^^ write access through <TAG> at ALLOC[0x0] is forbidden
LL | crate::intrinsics::write_via_move(dest, src);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ write access through <TAG> at ALLOC[0x0] is forbidden
|
= help: this indicates a potential bug in the program: it performed an invalid operation, but the Tree Borrows rules it violated are still experimental
= help: the accessed tag <TAG> is foreign to the protected tag <TAG> (i.e., it is not a child)
Expand Down
44 changes: 44 additions & 0 deletions tests/codegen/range-loop.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//@ ignore-std-debug-assertions
//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes

#![crate_type = "lib"]

// Ensure that MIR optimizations have cleaned things up enough that the IR we
// emit is good even without running the LLVM optimizations.

// CHECK-NOT: define

// CHECK-LABEL: define{{.+}}void @call_for_zero_to_n
#[no_mangle]
pub fn call_for_zero_to_n(n: u32, f: fn(u32)) {
// CHECK: start:
// CHECK-NOT: alloca
// CHECK: %[[IND:.+]] = alloca [4 x i8]
// CHECK-NEXT: %[[ALWAYS_SOME_OPTION:.+]] = alloca
// CHECK-NOT: alloca
// CHECK: store i32 0, ptr %[[IND]],
// CHECK: br label %[[HEAD:.+]]

// CHECK: [[HEAD]]:
// CHECK: %[[T1:.+]] = load i32, ptr %[[IND]],
// CHECK: %[[NOT_DONE:.+]] = icmp ult i32 %[[T1]], %n
// CHECK: br i1 %[[NOT_DONE]], label %[[BODY:.+]], label %[[BREAK:.+]]

// CHECK: [[BREAK]]:
// CHECK: ret void

// CHECK: [[BODY]]:
// CHECK: %[[T2:.+]] = load i32, ptr %[[IND]],
// CHECK: %[[T3:.+]] = add nuw i32 %[[T2]], 1
// CHECK: store i32 %[[T3]], ptr %[[IND]],

// CHECK: store i32 %[[T2]]
// CHECK: %[[T4:.+]] = load i32
// CHECK: call void %f(i32{{.+}}%[[T4]])

for i in 0..n {
f(i);
}
}

// CHECK-NOT: define
5 changes: 5 additions & 0 deletions tests/mir-opt/inline/exponential_runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,15 @@ fn main() {
// CHECK-LABEL: fn main(
// CHECK-NOT: inlined
// CHECK: (inlined <() as G>::call)
// CHECK-NOT: inlined
// CHECK: (inlined <() as F>::call)
// CHECK-NOT: inlined
// CHECK: (inlined <() as E>::call)
// CHECK-NOT: inlined
// CHECK: (inlined <() as D>::call)
// CHECK-NOT: inlined
// CHECK: (inlined <() as C>::call)
// CHECK-NOT: inlined
// CHECK: (inlined <() as B>::call)
// CHECK-NOT: inlined
<() as G>::call();
Expand Down
Loading
Loading