@@ -2370,13 +2370,13 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
2370
2370
///
2371
2371
/// Note, that this table does not contain values where inverse does not exist (i.e. for
2372
2372
/// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.)
2373
- const INV_TABLE_MOD_16 : [ usize ; 8 ] = [ 1 , 11 , 13 , 7 , 9 , 3 , 5 , 15 ] ;
2373
+ const INV_TABLE_MOD_16 : [ u8 ; 8 ] = [ 1 , 11 , 13 , 7 , 9 , 3 , 5 , 15 ] ;
2374
2374
/// Modulo for which the `INV_TABLE_MOD_16` is intended.
2375
2375
const INV_TABLE_MOD : usize = 16 ;
2376
2376
/// INV_TABLE_MOD²
2377
2377
const INV_TABLE_MOD_SQUARED : usize = INV_TABLE_MOD * INV_TABLE_MOD ;
2378
2378
2379
- let table_inverse = INV_TABLE_MOD_16 [ ( x & ( INV_TABLE_MOD - 1 ) ) >> 1 ] ;
2379
+ let table_inverse = INV_TABLE_MOD_16 [ ( x & ( INV_TABLE_MOD - 1 ) ) >> 1 ] as usize ;
2380
2380
if m <= INV_TABLE_MOD {
2381
2381
table_inverse & ( m - 1 )
2382
2382
} else {
@@ -2429,36 +2429,23 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
2429
2429
let gcdpow = intrinsics:: cttz_nonzero ( stride) . min ( intrinsics:: cttz_nonzero ( a) ) ;
2430
2430
let gcd = 1usize << gcdpow;
2431
2431
2432
- if gcd == 1 {
2433
- // This branch solves for the variable $o$ in following linear congruence equation:
2434
- //
2435
- // ⎰ p + o ≡ 0 (mod a) # $p + o$ must be aligned to specified alignment $a$
2436
- // ⎱ o ≡ 0 (mod s) # offset $o$ must be a multiple of stride $s$
2437
- //
2438
- // where
2432
+ if p as usize & ( gcd - 1 ) == 0 {
2433
+ // This branch solves for the following linear congruence equation:
2439
2434
//
2440
- // * a, s are co-prime
2435
+ // $$ p + so ≡ 0 mod a $$
2441
2436
//
2442
- // This gives us the formula below:
2437
+ // $p$ here is the pointer value, $s$ – stride of `T`, $o$ offset in `T`s, and $a$ – the
2438
+ // requested alignment.
2443
2439
//
2444
- // o = (a - (p mod a)) * (s⁻¹ mod a) * s
2440
+ // g = gcd(a, s)
2441
+ // o = (a - (p mod a))/g * ((s/g)⁻¹ mod a)
2445
2442
//
2446
2443
// The first term is “the relative alignment of p to a”, the second term is “how does
2447
- // incrementing p by one s change the relative alignment of p”, the third term is
2448
- // translating change in units of s to a byte count .
2444
+ // incrementing p by s bytes change the relative alignment of p”. Division by `g` is
2445
+ // necessary to make this equation well formed if $a$ and $s$ are not co-prime .
2449
2446
//
2450
2447
// Furthermore, the result produced by this solution is not “minimal”, so it is necessary
2451
- // to take the result $o mod lcm(s, a)$. Since $s$ and $a$ are co-prime (i.e. $gcd(s, a) =
2452
- // 1$) and $lcm(s, a) = s * a / gcd(s, a)$, we can replace $lcm(s, a)$ with just a $s * a$.
2453
- //
2454
- // (Author note: we decided later on to express the offset in "elements" rather than bytes,
2455
- // which drops the multiplication by `s` on both sides of the modulo.)
2456
- return intrinsics:: unchecked_rem ( a. wrapping_sub ( pmoda) . wrapping_mul ( mod_inv ( smoda, a) ) , a) ;
2457
- }
2458
-
2459
- if p as usize & ( gcd - 1 ) == 0 {
2460
- // This can be aligned, but `a` and `stride` are not co-prime, so a somewhat adapted
2461
- // formula is used.
2448
+ // to take the result $o mod lcm(s, a)$. We can replace $lcm(s, a)$ with just a $a / g$.
2462
2449
let j = a. wrapping_sub ( pmoda) >> gcdpow;
2463
2450
let k = smoda >> gcdpow;
2464
2451
return intrinsics:: unchecked_rem ( j. wrapping_mul ( mod_inv ( k, a) ) , a >> gcdpow) ;
0 commit comments