Skip to content

Commit 5d22b18

Browse files
committed
Improve codegen of align_offset when stride == 1
Previously checking for `pmoda == 0` would get LLVM to generate branchy code, when, for `stride = 1` the offset can be computed without such a branch by doing effectively a `-p % a`. For well-known (constant) alignments, with the new ordering of these conditionals, we end up generating 2 to 3 cheap instructions on x86_64: movq %rdi, %rax negl %eax andl $7, %eax instead of 5+ as previously. For unknown alignments the new code also generates just 3 instructions: negq %rdi leaq -1(%rsi), %rax andq %rdi, %rax
1 parent e7271da commit 5d22b18

File tree

1 file changed

+11
-13
lines changed

1 file changed

+11
-13
lines changed

library/core/src/ptr/mod.rs

+11-13
Original file line numberDiff line numberDiff line change
@@ -1172,7 +1172,7 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
11721172

11731173
/// Calculate multiplicative modular inverse of `x` modulo `m`.
11741174
///
1175-
/// This implementation is tailored for align_offset and has following preconditions:
1175+
/// This implementation is tailored for `align_offset` and has following preconditions:
11761176
///
11771177
/// * `m` is a power-of-two;
11781178
/// * `x < m`; (if `x ≥ m`, pass in `x % m` instead)
@@ -1220,23 +1220,21 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
12201220
}
12211221

12221222
let stride = mem::size_of::<T>();
1223-
// SAFETY: `a` is a power-of-two, hence non-zero.
1223+
// SAFETY: `a` is a power-of-two, therefore non-zero.
12241224
let a_minus_one = unsafe { unchecked_sub(a, 1) };
1225-
let pmoda = p as usize & a_minus_one;
1225+
if stride == 1 {
1226+
// `stride == 1` case can be computed more efficiently through `-p (mod a)`.
1227+
return wrapping_sub(0, p as usize) & a_minus_one;
1228+
}
12261229

1230+
let pmoda = p as usize & a_minus_one;
12271231
if pmoda == 0 {
12281232
// Already aligned. Yay!
12291233
return 0;
1230-
}
1231-
1232-
if stride <= 1 {
1233-
return if stride == 0 {
1234-
// If the pointer is not aligned, and the element is zero-sized, then no amount of
1235-
// elements will ever align the pointer.
1236-
!0
1237-
} else {
1238-
wrapping_sub(a, pmoda)
1239-
};
1234+
} else if stride == 0 {
1235+
// If the pointer is not aligned, and the element is zero-sized, then no amount of
1236+
// elements will ever align the pointer.
1237+
return usize::MAX;
12401238
}
12411239

12421240
let smoda = stride & a_minus_one;

0 commit comments

Comments
 (0)