Closed
Description
Note: I am aware of #91521 and related issues for this function, and believe that this has a separate cause
I tried this code:
pub fn foo<'a>(n: [*const u8; 2]) -> [&'a u8; 2] {
n.map(|p| unsafe { &*p })
}
Assembly (rustc -Copt-level=3 -Cdebuginfo=0 --edition 2021
, godbolt):
example::foo:
mov rax, rdi
mov rcx, qword ptr [rsi]
mov rdx, qword ptr [rsi + 8]
test rdx, rdx
mov rsi, rcx
cmove rsi, rdx
test rcx, rcx
cmove rsi, rcx
mov qword ptr [rdi], rsi
mov qword ptr [rdi + 8], rdx
ret
The interesting thing to note here is the comparisons, which should definitely not be there. This problem significantly worsens when you increase the number of elements. I believe the source of these comparisons is that map
is implemented via an iterator, which means that we build an Option<&u8>
at some point. Still though, the LLVM IR:
LLVM IR
define void @_ZN7example3foo17h2b4bd706abe6f6b0E([2 x i8*]* noalias nocapture noundef writeonly sret([2 x i8*]) dereferenceable(16) %0, [2 x i8*]* noalias nocapture noundef readonly dereferenceable(16) %n) unnamed_addr #0 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality {
%_2.sroa.0.0..sroa_cast = bitcast [2 x i8*]* %n to i64*
%_2.sroa.0.0.copyload = load i64, i64* %_2.sroa.0.0..sroa_cast, align 8
%_2.sroa.4.0..sroa_idx2 = getelementptr inbounds [2 x i8*], [2 x i8*]* %n, i64 0, i64 1
%_2.sroa.4.0..sroa_cast = bitcast i8** %_2.sroa.4.0..sroa_idx2 to i64*
%_2.sroa.4.0.copyload = load i64, i64* %_2.sroa.4.0..sroa_cast, align 8
%.not.i.i.i.not.i = icmp eq i64 %_2.sroa.0.0.copyload, 0
%.not.1.i.i.i.i = icmp eq i64 %_2.sroa.4.0.copyload, 0
%or.cond.i = select i1 %.not.i.i.i.not.i, i1 true, i1 %.not.1.i.i.i.i
%1 = inttoptr i64 %_2.sroa.4.0.copyload to i8*
%2 = inttoptr i64 %_2.sroa.0.0.copyload to {}*
%_2.sroa.6.0.i.i.i = select i1 %or.cond.i, i8* undef, i8* %1
%_2.sroa.0.0.i.i.i = select i1 %or.cond.i, {}* null, {}* %2
%3 = icmp ne {}* %_2.sroa.0.0.i.i.i, null
tail call void @llvm.assume(i1 %3) #3
%_4.sroa.0.0..sroa_cast.i.i = bitcast [2 x i8*]* %0 to {}**
store {}* %_2.sroa.0.0.i.i.i, {}** %_4.sroa.0.0..sroa_cast.i.i, align 8, !alias.scope !2, !noalias !7
%_4.sroa.4.0..sroa_idx4.i.i = getelementptr inbounds [2 x i8*], [2 x i8*]* %0, i64 0, i64 1
store i8* %_2.sroa.6.0.i.i.i, i8** %_4.sroa.4.0..sroa_idx4.i.i, align 8, !alias.scope !2, !noalias !7
ret void
}
Does look to have all the information necessary to continue optimizing.
Meta
rustc --version --verbose
:
rustc 1.62.0-nightly (ec77f2524 2022-04-17)
binary: rustc
commit-hash: ec77f252434a532fdb5699ae4f21a3072d211edd
commit-date: 2022-04-17
host: x86_64-unknown-linux-gnu
release: 1.62.0-nightly
LLVM version: 14.0.0
@rustbot labels +A-llvm +I-slow