Skip to content

Commit 7f79e98

Browse files
committed
Auto merge of #72205 - ecstatic-morse:nrvo, r=oli-obk
Dumb NRVO This is a very simple version of an NRVO pass, which scans backwards from the `return` terminator to see if there is an an assignment like `_0 = _1`. If a basic block with two or more predecessors is encountered during this scan without first seeing an assignment to the return place, we bail out. This avoids running a full "reaching definitions" dataflow analysis. I wanted to see how much `rustc` would benefit from even a very limited version of this optimization. We should be able to use this as a point of comparison for more advanced versions that are based on live ranges. r? @ghost
2 parents 963bf52 + f509862 commit 7f79e98

File tree

13 files changed

+350
-48
lines changed

13 files changed

+350
-48
lines changed

src/librustc_codegen_ssa/mir/debuginfo.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
115115
let full_debug_info = bx.sess().opts.debuginfo == DebugInfo::Full;
116116

117117
// FIXME(eddyb) maybe name the return place as `_0` or `return`?
118-
if local == mir::RETURN_PLACE {
118+
if local == mir::RETURN_PLACE && !self.mir.local_decls[mir::RETURN_PLACE].is_user_variable()
119+
{
119120
return;
120121
}
121122

src/librustc_mir/transform/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub mod generator;
2828
pub mod inline;
2929
pub mod instcombine;
3030
pub mod no_landing_pads;
31+
pub mod nrvo;
3132
pub mod promote_consts;
3233
pub mod qualify_min_const_fn;
3334
pub mod remove_noop_landing_pads;
@@ -324,6 +325,7 @@ fn run_optimization_passes<'tcx>(
324325
&remove_noop_landing_pads::RemoveNoopLandingPads,
325326
&simplify::SimplifyCfg::new("after-remove-noop-landing-pads"),
326327
&simplify::SimplifyCfg::new("final"),
328+
&nrvo::RenameReturnPlace,
327329
&simplify::SimplifyLocals,
328330
];
329331

src/librustc_mir/transform/nrvo.rs

+238
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
use rustc_hir::Mutability;
2+
use rustc_index::bit_set::HybridBitSet;
3+
use rustc_middle::mir::visit::{MutVisitor, PlaceContext, Visitor};
4+
use rustc_middle::mir::{self, BasicBlock, Local, Location};
5+
use rustc_middle::ty::TyCtxt;
6+
7+
use crate::transform::{MirPass, MirSource};
8+
9+
/// This pass looks for MIR that always copies the same local into the return place and eliminates
10+
/// the copy by renaming all uses of that local to `_0`.
11+
///
12+
/// This allows LLVM to perform an optimization similar to the named return value optimization
13+
/// (NRVO) that is guaranteed in C++. This avoids a stack allocation and `memcpy` for the
14+
/// relatively common pattern of allocating a buffer on the stack, mutating it, and returning it by
15+
/// value like so:
16+
///
17+
/// ```rust
18+
/// fn foo(init: fn(&mut [u8; 1024])) -> [u8; 1024] {
19+
/// let mut buf = [0; 1024];
20+
/// init(&mut buf);
21+
/// buf
22+
/// }
23+
/// ```
24+
///
25+
/// For now, this pass is very simple and only capable of eliminating a single copy. A more general
26+
/// version of copy propagation, such as the one based on non-overlapping live ranges in [#47954] and
27+
/// [#71003], could yield even more benefits.
28+
///
29+
/// [#47954]: https://github.com/rust-lang/rust/pull/47954
30+
/// [#71003]: https://github.com/rust-lang/rust/pull/71003
31+
pub struct RenameReturnPlace;
32+
33+
impl<'tcx> MirPass<'tcx> for RenameReturnPlace {
34+
fn run_pass(&self, tcx: TyCtxt<'tcx>, src: MirSource<'tcx>, body: &mut mir::Body<'tcx>) {
35+
if tcx.sess.opts.debugging_opts.mir_opt_level == 0 {
36+
return;
37+
}
38+
39+
let returned_local = match local_eligible_for_nrvo(body) {
40+
Some(l) => l,
41+
None => {
42+
debug!("`{:?}` was ineligible for NRVO", src.def_id());
43+
return;
44+
}
45+
};
46+
47+
// Sometimes, the return place is assigned a local of a different but coercable type, for
48+
// example `&T` instead of `&mut T`. Overwriting the `LocalInfo` for the return place would
49+
// result in it having an incorrect type. Although this doesn't seem to cause a problem in
50+
// codegen, bail out anyways since it happens so rarely.
51+
let ret_ty = body.local_decls[mir::RETURN_PLACE].ty;
52+
let assigned_ty = body.local_decls[returned_local].ty;
53+
if ret_ty != assigned_ty {
54+
debug!("`{:?}` was eligible for NRVO but for type mismatch", src.def_id());
55+
debug!("typeof(_0) != typeof({:?}); {:?} != {:?}", returned_local, ret_ty, assigned_ty);
56+
return;
57+
}
58+
59+
debug!(
60+
"`{:?}` was eligible for NRVO, making {:?} the return place",
61+
src.def_id(),
62+
returned_local
63+
);
64+
65+
RenameToReturnPlace { tcx, to_rename: returned_local }.visit_body(body);
66+
67+
// Clean up the `NOP`s we inserted for statements made useless by our renaming.
68+
for block_data in body.basic_blocks_mut() {
69+
block_data.statements.retain(|stmt| stmt.kind != mir::StatementKind::Nop);
70+
}
71+
72+
// Overwrite the debuginfo of `_0` with that of the renamed local.
73+
let (renamed_decl, ret_decl) =
74+
body.local_decls.pick2_mut(returned_local, mir::RETURN_PLACE);
75+
ret_decl.clone_from(renamed_decl);
76+
77+
// The return place is always mutable.
78+
ret_decl.mutability = Mutability::Mut;
79+
}
80+
}
81+
82+
/// MIR that is eligible for the NRVO must fulfill two conditions:
83+
/// 1. The return place must not be read prior to the `Return` terminator.
84+
/// 2. A simple assignment of a whole local to the return place (e.g., `_0 = _1`) must be the
85+
/// only definition of the return place reaching the `Return` terminator.
86+
///
87+
/// If the MIR fulfills both these conditions, this function returns the `Local` that is assigned
88+
/// to the return place along all possible paths through the control-flow graph.
89+
fn local_eligible_for_nrvo(body: &mut mir::Body<'_>) -> Option<Local> {
90+
if IsReturnPlaceRead::run(body) {
91+
return None;
92+
}
93+
94+
let mut copied_to_return_place = None;
95+
for block in body.basic_blocks().indices() {
96+
// Look for blocks with a `Return` terminator.
97+
if !matches!(body[block].terminator().kind, mir::TerminatorKind::Return) {
98+
continue;
99+
}
100+
101+
// Look for an assignment of a single local to the return place prior to the `Return`.
102+
let returned_local = find_local_assigned_to_return_place(block, body)?;
103+
match body.local_kind(returned_local) {
104+
// FIXME: Can we do this for arguments as well?
105+
mir::LocalKind::Arg => return None,
106+
107+
mir::LocalKind::ReturnPointer => bug!("Return place was assigned to itself?"),
108+
mir::LocalKind::Var | mir::LocalKind::Temp => {}
109+
}
110+
111+
// If multiple different locals are copied to the return place. We can't pick a
112+
// single one to rename.
113+
if copied_to_return_place.map_or(false, |old| old != returned_local) {
114+
return None;
115+
}
116+
117+
copied_to_return_place = Some(returned_local);
118+
}
119+
120+
return copied_to_return_place;
121+
}
122+
123+
fn find_local_assigned_to_return_place(
124+
start: BasicBlock,
125+
body: &mut mir::Body<'_>,
126+
) -> Option<Local> {
127+
let mut block = start;
128+
let mut seen = HybridBitSet::new_empty(body.basic_blocks().len());
129+
130+
// Iterate as long as `block` has exactly one predecessor that we have not yet visited.
131+
while seen.insert(block) {
132+
trace!("Looking for assignments to `_0` in {:?}", block);
133+
134+
let local = body[block].statements.iter().rev().find_map(as_local_assigned_to_return_place);
135+
if local.is_some() {
136+
return local;
137+
}
138+
139+
match body.predecessors()[block].as_slice() {
140+
&[pred] => block = pred,
141+
_ => return None,
142+
}
143+
}
144+
145+
return None;
146+
}
147+
148+
// If this statement is an assignment of an unprojected local to the return place,
149+
// return that local.
150+
fn as_local_assigned_to_return_place(stmt: &mir::Statement<'_>) -> Option<Local> {
151+
if let mir::StatementKind::Assign(box (lhs, rhs)) = &stmt.kind {
152+
if lhs.as_local() == Some(mir::RETURN_PLACE) {
153+
if let mir::Rvalue::Use(mir::Operand::Copy(rhs) | mir::Operand::Move(rhs)) = rhs {
154+
return rhs.as_local();
155+
}
156+
}
157+
}
158+
159+
None
160+
}
161+
162+
struct RenameToReturnPlace<'tcx> {
163+
to_rename: Local,
164+
tcx: TyCtxt<'tcx>,
165+
}
166+
167+
/// Replaces all uses of `self.to_rename` with `_0`.
168+
impl MutVisitor<'tcx> for RenameToReturnPlace<'tcx> {
169+
fn tcx(&self) -> TyCtxt<'tcx> {
170+
self.tcx
171+
}
172+
173+
fn visit_statement(&mut self, stmt: &mut mir::Statement<'tcx>, loc: Location) {
174+
// Remove assignments of the local being replaced to the return place, since it is now the
175+
// return place:
176+
// _0 = _1
177+
if as_local_assigned_to_return_place(stmt) == Some(self.to_rename) {
178+
stmt.kind = mir::StatementKind::Nop;
179+
return;
180+
}
181+
182+
// Remove storage annotations for the local being replaced:
183+
// StorageLive(_1)
184+
if let mir::StatementKind::StorageLive(local) | mir::StatementKind::StorageDead(local) =
185+
stmt.kind
186+
{
187+
if local == self.to_rename {
188+
stmt.kind = mir::StatementKind::Nop;
189+
return;
190+
}
191+
}
192+
193+
self.super_statement(stmt, loc)
194+
}
195+
196+
fn visit_terminator(&mut self, terminator: &mut mir::Terminator<'tcx>, loc: Location) {
197+
// Ignore the implicit "use" of the return place in a `Return` statement.
198+
if let mir::TerminatorKind::Return = terminator.kind {
199+
return;
200+
}
201+
202+
self.super_terminator(terminator, loc);
203+
}
204+
205+
fn visit_local(&mut self, l: &mut Local, _: PlaceContext, _: Location) {
206+
assert_ne!(*l, mir::RETURN_PLACE);
207+
if *l == self.to_rename {
208+
*l = mir::RETURN_PLACE;
209+
}
210+
}
211+
}
212+
213+
struct IsReturnPlaceRead(bool);
214+
215+
impl IsReturnPlaceRead {
216+
fn run(body: &mir::Body<'_>) -> bool {
217+
let mut vis = IsReturnPlaceRead(false);
218+
vis.visit_body(body);
219+
vis.0
220+
}
221+
}
222+
223+
impl Visitor<'tcx> for IsReturnPlaceRead {
224+
fn visit_local(&mut self, &l: &Local, ctxt: PlaceContext, _: Location) {
225+
if l == mir::RETURN_PLACE && ctxt.is_use() && !ctxt.is_place_assignment() {
226+
self.0 = true;
227+
}
228+
}
229+
230+
fn visit_terminator(&mut self, terminator: &mir::Terminator<'tcx>, loc: Location) {
231+
// Ignore the implicit "use" of the return place in a `Return` statement.
232+
if let mir::TerminatorKind::Return = terminator.kind {
233+
return;
234+
}
235+
236+
self.super_terminator(terminator, loc);
237+
}
238+
}

src/test/codegen/align-enum.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// compile-flags: -C no-prepopulate-passes
1+
// compile-flags: -C no-prepopulate-passes -Z mir-opt-level=0
22
// ignore-tidy-linelength
33

44
#![crate_type = "lib"]

src/test/codegen/align-struct.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// compile-flags: -C no-prepopulate-passes
1+
// compile-flags: -C no-prepopulate-passes -Z mir-opt-level=0
22
// ignore-tidy-linelength
33

44
#![crate_type = "lib"]

src/test/codegen/nrvo.rs

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// compile-flags: -O
2+
3+
#![crate_type = "lib"]
4+
5+
// Ensure that we do not call `memcpy` for the following function.
6+
// `memset` and `init` should be called directly on the return pointer.
7+
#[no_mangle]
8+
pub fn nrvo(init: fn(&mut [u8; 4096])) -> [u8; 4096] {
9+
// CHECK-LABEL: nrvo
10+
// CHECK: @llvm.memset
11+
// CHECK-NOT: @llvm.memcpy
12+
// CHECK: ret
13+
// CHECK-EMPTY
14+
let mut buf = [0; 4096];
15+
init(&mut buf);
16+
buf
17+
}

src/test/debuginfo/generic-function.rs

+9-30
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
// ignore-tidy-linelength
2-
31
// min-lldb-version: 310
42

53
// compile-flags:-g
@@ -12,31 +10,21 @@
1210
// gdb-check:$1 = 1
1311
// gdb-command:print *t1
1412
// gdb-check:$2 = 2.5
15-
// gdb-command:print ret
16-
// gdbg-check:$3 = {__0 = {__0 = 1, __1 = 2.5}, __1 = {__0 = 2.5, __1 = 1}}
17-
// gdbr-check:$3 = ((1, 2.5), (2.5, 1))
1813
// gdb-command:continue
1914

2015
// gdb-command:print *t0
21-
// gdb-check:$4 = 3.5
16+
// gdb-check:$3 = 3.5
2217
// gdb-command:print *t1
23-
// gdb-check:$5 = 4
24-
// gdb-command:print ret
25-
// gdbg-check:$6 = {__0 = {__0 = 3.5, __1 = 4}, __1 = {__0 = 4, __1 = 3.5}}
26-
// gdbr-check:$6 = ((3.5, 4), (4, 3.5))
18+
// gdb-check:$4 = 4
2719
// gdb-command:continue
2820

2921
// gdb-command:print *t0
30-
// gdb-check:$7 = 5
22+
// gdb-check:$5 = 5
3123
// gdb-command:print *t1
32-
// gdbg-check:$8 = {a = 6, b = 7.5}
33-
// gdbr-check:$8 = generic_function::Struct {a: 6, b: 7.5}
34-
// gdb-command:print ret
35-
// gdbg-check:$9 = {__0 = {__0 = 5, __1 = {a = 6, b = 7.5}}, __1 = {__0 = {a = 6, b = 7.5}, __1 = 5}}
36-
// gdbr-check:$9 = ((5, generic_function::Struct {a: 6, b: 7.5}), (generic_function::Struct {a: 6, b: 7.5}, 5))
24+
// gdbg-check:$6 = {a = 6, b = 7.5}
25+
// gdbr-check:$6 = generic_function::Struct {a: 6, b: 7.5}
3726
// gdb-command:continue
3827

39-
4028
// === LLDB TESTS ==================================================================================
4129

4230
// lldb-command:run
@@ -47,31 +35,22 @@
4735
// lldb-command:print *t1
4836
// lldbg-check:[...]$1 = 2.5
4937
// lldbr-check:(f64) *t1 = 2.5
50-
// lldb-command:print ret
51-
// lldbg-check:[...]$2 = ((1, 2.5), (2.5, 1))
52-
// lldbr-check:(((i32, f64), (f64, i32))) ret = { = { = 1 = 2.5 } = { = 2.5 = 1 } }
5338
// lldb-command:continue
5439

5540
// lldb-command:print *t0
56-
// lldbg-check:[...]$3 = 3.5
41+
// lldbg-check:[...]$2 = 3.5
5742
// lldbr-check:(f64) *t0 = 3.5
5843
// lldb-command:print *t1
59-
// lldbg-check:[...]$4 = 4
44+
// lldbg-check:[...]$3 = 4
6045
// lldbr-check:(u16) *t1 = 4
61-
// lldb-command:print ret
62-
// lldbg-check:[...]$5 = ((3.5, 4), (4, 3.5))
63-
// lldbr-check:(((f64, u16), (u16, f64))) ret = { = { = 3.5 = 4 } = { = 4 = 3.5 } }
6446
// lldb-command:continue
6547

6648
// lldb-command:print *t0
67-
// lldbg-check:[...]$6 = 5
49+
// lldbg-check:[...]$4 = 5
6850
// lldbr-check:(i32) *t0 = 5
6951
// lldb-command:print *t1
70-
// lldbg-check:[...]$7 = Struct { a: 6, b: 7.5 }
52+
// lldbg-check:[...]$5 = Struct { a: 6, b: 7.5 }
7153
// lldbr-check:(generic_function::Struct) *t1 = Struct { a: 6, b: 7.5 }
72-
// lldb-command:print ret
73-
// lldbg-check:[...]$8 = ((5, Struct { a: 6, b: 7.5 }), (Struct { a: 6, b: 7.5 }, 5))
74-
// lldbr-check:(((i32, generic_function::Struct), (generic_function::Struct, i32))) ret = { = { = 5 = Struct { a: 6, b: 7.5 } } = { = Struct { a: 6, b: 7.5 } = 5 } }
7554
// lldb-command:continue
7655

7756
#![feature(omit_gdb_pretty_printer_section)]

src/test/mir-opt/inline/issue-58867-inline-as-ref-as-mut/rustc.a.Inline.after.mir

+1-5
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,14 @@ fn a(_1: &mut [T]) -> &mut [T] {
88
let mut _4: &mut [T]; // in scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:6
99
scope 1 {
1010
debug self => _4; // in scope 1 at $SRC_DIR/libcore/convert/mod.rs:LL:COL
11-
let mut _5: &mut [T]; // in scope 1 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:15
1211
}
1312

1413
bb0: {
1514
StorageLive(_2); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:15
1615
StorageLive(_3); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:15
1716
StorageLive(_4); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:6
1817
_4 = &mut (*_1); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:6
19-
StorageLive(_5); // scope 1 at $SRC_DIR/libcore/convert/mod.rs:LL:COL
20-
_5 = _4; // scope 1 at $SRC_DIR/libcore/convert/mod.rs:LL:COL
21-
_3 = _5; // scope 1 at $SRC_DIR/libcore/convert/mod.rs:LL:COL
22-
StorageDead(_5); // scope 1 at $SRC_DIR/libcore/convert/mod.rs:LL:COL
18+
_3 = _4; // scope 1 at $SRC_DIR/libcore/convert/mod.rs:LL:COL
2319
_2 = &mut (*_3); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:15
2420
StorageDead(_4); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:14: 3:15
2521
_0 = &mut (*_2); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:15

0 commit comments

Comments
 (0)