Skip to content

Commit 8dc554a

Browse files
committed
Auto merge of #54534 - nagisa:align-offset-simplification, r=alexcrichton
Simplify implementation of align_offset slightly
2 parents 567557f + 0b3e5eb commit 8dc554a

File tree

2 files changed

+13
-26
lines changed

2 files changed

+13
-26
lines changed

src/libcore/ptr.rs

+12-25
Original file line numberDiff line numberDiff line change
@@ -2370,13 +2370,13 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
23702370
///
23712371
/// Note, that this table does not contain values where inverse does not exist (i.e. for
23722372
/// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.)
2373-
const INV_TABLE_MOD_16: [usize; 8] = [1, 11, 13, 7, 9, 3, 5, 15];
2373+
const INV_TABLE_MOD_16: [u8; 8] = [1, 11, 13, 7, 9, 3, 5, 15];
23742374
/// Modulo for which the `INV_TABLE_MOD_16` is intended.
23752375
const INV_TABLE_MOD: usize = 16;
23762376
/// INV_TABLE_MOD²
23772377
const INV_TABLE_MOD_SQUARED: usize = INV_TABLE_MOD * INV_TABLE_MOD;
23782378

2379-
let table_inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1];
2379+
let table_inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1] as usize;
23802380
if m <= INV_TABLE_MOD {
23812381
table_inverse & (m - 1)
23822382
} else {
@@ -2429,36 +2429,23 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
24292429
let gcdpow = intrinsics::cttz_nonzero(stride).min(intrinsics::cttz_nonzero(a));
24302430
let gcd = 1usize << gcdpow;
24312431

2432-
if gcd == 1 {
2433-
// This branch solves for the variable $o$ in following linear congruence equation:
2434-
//
2435-
// ⎰ p + o ≡ 0 (mod a) # $p + o$ must be aligned to specified alignment $a$
2436-
// ⎱ o ≡ 0 (mod s) # offset $o$ must be a multiple of stride $s$
2437-
//
2438-
// where
2432+
if p as usize & (gcd - 1) == 0 {
2433+
// This branch solves for the following linear congruence equation:
24392434
//
2440-
// * a, s are co-prime
2435+
// $$ p + so ≡ 0 mod a $$
24412436
//
2442-
// This gives us the formula below:
2437+
// $p$ here is the pointer value, $s$ – stride of `T`, $o$ offset in `T`s, and $a$ – the
2438+
// requested alignment.
24432439
//
2444-
// o = (a - (p mod a)) * (s⁻¹ mod a) * s
2440+
// g = gcd(a, s)
2441+
// o = (a - (p mod a))/g * ((s/g)⁻¹ mod a)
24452442
//
24462443
// The first term is “the relative alignment of p to a”, the second term is “how does
2447-
// incrementing p by one s change the relative alignment of p”, the third term is
2448-
// translating change in units of s to a byte count.
2444+
// incrementing p by s bytes change the relative alignment of p”. Division by `g` is
2445+
// necessary to make this equation well formed if $a$ and $s$ are not co-prime.
24492446
//
24502447
// Furthermore, the result produced by this solution is not “minimal”, so it is necessary
2451-
// to take the result $o mod lcm(s, a)$. Since $s$ and $a$ are co-prime (i.e. $gcd(s, a) =
2452-
// 1$) and $lcm(s, a) = s * a / gcd(s, a)$, we can replace $lcm(s, a)$ with just a $s * a$.
2453-
//
2454-
// (Author note: we decided later on to express the offset in "elements" rather than bytes,
2455-
// which drops the multiplication by `s` on both sides of the modulo.)
2456-
return intrinsics::unchecked_rem(a.wrapping_sub(pmoda).wrapping_mul(mod_inv(smoda, a)), a);
2457-
}
2458-
2459-
if p as usize & (gcd - 1) == 0 {
2460-
// This can be aligned, but `a` and `stride` are not co-prime, so a somewhat adapted
2461-
// formula is used.
2448+
// to take the result $o mod lcm(s, a)$. We can replace $lcm(s, a)$ with just a $a / g$.
24622449
let j = a.wrapping_sub(pmoda) >> gcdpow;
24632450
let k = smoda >> gcdpow;
24642451
return intrinsics::unchecked_rem(j.wrapping_mul(mod_inv(k, a)), a >> gcdpow);

src/libcore/slice/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1932,7 +1932,7 @@ impl<T> [T] {
19321932
fn gcd(a: usize, b: usize) -> usize {
19331933
// iterative stein’s algorithm
19341934
// We should still make this `const fn` (and revert to recursive algorithm if we do)
1935-
// because relying on llvm to consteval all this is… well, it makes me
1935+
// because relying on llvm to consteval all this is… well, it makes me uncomfortable.
19361936
let (ctz_a, mut ctz_b) = unsafe {
19371937
if a == 0 { return b; }
19381938
if b == 0 { return a; }

0 commit comments

Comments
 (0)