@@ -2370,13 +2370,13 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
23702370 ///
23712371 /// Note, that this table does not contain values where inverse does not exist (i.e. for
23722372 /// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.)
2373- const INV_TABLE_MOD_16 : [ usize ; 8 ] = [ 1 , 11 , 13 , 7 , 9 , 3 , 5 , 15 ] ;
2373+ const INV_TABLE_MOD_16 : [ u8 ; 8 ] = [ 1 , 11 , 13 , 7 , 9 , 3 , 5 , 15 ] ;
23742374 /// Modulo for which the `INV_TABLE_MOD_16` is intended.
23752375 const INV_TABLE_MOD : usize = 16 ;
23762376 /// INV_TABLE_MOD²
23772377 const INV_TABLE_MOD_SQUARED : usize = INV_TABLE_MOD * INV_TABLE_MOD ;
23782378
2379- let table_inverse = INV_TABLE_MOD_16 [ ( x & ( INV_TABLE_MOD - 1 ) ) >> 1 ] ;
2379+ let table_inverse = INV_TABLE_MOD_16 [ ( x & ( INV_TABLE_MOD - 1 ) ) >> 1 ] as usize ;
23802380 if m <= INV_TABLE_MOD {
23812381 table_inverse & ( m - 1 )
23822382 } else {
@@ -2429,36 +2429,23 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
24292429 let gcdpow = intrinsics:: cttz_nonzero ( stride) . min ( intrinsics:: cttz_nonzero ( a) ) ;
24302430 let gcd = 1usize << gcdpow;
24312431
2432- if gcd == 1 {
2433- // This branch solves for the variable $o$ in following linear congruence equation:
2434- //
2435- // ⎰ p + o ≡ 0 (mod a) # $p + o$ must be aligned to specified alignment $a$
2436- // ⎱ o ≡ 0 (mod s) # offset $o$ must be a multiple of stride $s$
2437- //
2438- // where
2432+ if p as usize & ( gcd - 1 ) == 0 {
2433+ // This branch solves for the following linear congruence equation:
24392434 //
2440- // * a, s are co-prime
2435+ // $$ p + so ≡ 0 mod a $$
24412436 //
2442- // This gives us the formula below:
2437+ // $p$ here is the pointer value, $s$ – stride of `T`, $o$ offset in `T`s, and $a$ – the
2438+ // requested alignment.
24432439 //
2444- // o = (a - (p mod a)) * (s⁻¹ mod a) * s
2440+ // g = gcd(a, s)
2441+ // o = (a - (p mod a))/g * ((s/g)⁻¹ mod a)
24452442 //
24462443 // The first term is “the relative alignment of p to a”, the second term is “how does
2447- // incrementing p by one s change the relative alignment of p”, the third term is
2448- // translating change in units of s to a byte count .
2444+ // incrementing p by s bytes change the relative alignment of p”. Division by `g` is
2445+ // necessary to make this equation well formed if $a$ and $s$ are not co-prime .
24492446 //
24502447 // Furthermore, the result produced by this solution is not “minimal”, so it is necessary
2451- // to take the result $o mod lcm(s, a)$. Since $s$ and $a$ are co-prime (i.e. $gcd(s, a) =
2452- // 1$) and $lcm(s, a) = s * a / gcd(s, a)$, we can replace $lcm(s, a)$ with just a $s * a$.
2453- //
2454- // (Author note: we decided later on to express the offset in "elements" rather than bytes,
2455- // which drops the multiplication by `s` on both sides of the modulo.)
2456- return intrinsics:: unchecked_rem ( a. wrapping_sub ( pmoda) . wrapping_mul ( mod_inv ( smoda, a) ) , a) ;
2457- }
2458-
2459- if p as usize & ( gcd - 1 ) == 0 {
2460- // This can be aligned, but `a` and `stride` are not co-prime, so a somewhat adapted
2461- // formula is used.
2448+ // to take the result $o mod lcm(s, a)$. We can replace $lcm(s, a)$ with just a $a / g$.
24622449 let j = a. wrapping_sub ( pmoda) >> gcdpow;
24632450 let k = smoda >> gcdpow;
24642451 return intrinsics:: unchecked_rem ( j. wrapping_mul ( mod_inv ( k, a) ) , a >> gcdpow) ;
0 commit comments