Skip to content

Commit 82389f4

Browse files
committed
aaaa
1 parent 31577f5 commit 82389f4

File tree

1 file changed

+195
-152
lines changed

1 file changed

+195
-152
lines changed

library/core/src/num/int_sqrt.rs

+195-152
Original file line numberDiff line numberDiff line change
@@ -50,142 +50,227 @@ pub const fn u8(n: u8) -> u8 {
5050
U8_ISQRT_WITH_REMAINDER[n as usize].0
5151
}
5252

53-
/// Returns the [integer square root][1] and remainder of any [`u8`](prim@u8)
54-
/// input.
55-
///
56-
/// For example, `u8_with_remainder(17) == (4, 1)` because the integer square
57-
/// root of 17 is 4 and because 17 is 1 higher than 4 squared.
58-
///
59-
/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
60-
/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
61-
/// Encyclopedia."
62-
#[must_use = "this returns the result of the operation, \
63-
without modifying the original"]
64-
// `#[inline(always)]` because this is just a memory access.
65-
#[inline(always)]
66-
const fn u8_with_remainder(n: u8) -> (u8, u8) {
67-
U8_ISQRT_WITH_REMAINDER[n as usize]
53+
/*macro_rules! unsigned_fn {
54+
($unsigned_type:ident, $stages:block) => {
55+
pub const fn $unsigned_type(mut n: u16) -> u16 {
56+
if n == 0 {
57+
return 0;
58+
}
59+
const EVEN_BITMASK: u32 = u32::MAX & !1;
60+
let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
61+
n <<= precondition_shift;
62+
63+
let s = $stages;
64+
65+
let result_shift = precondition_shift >> 1;
66+
s >> result_shift
67+
}
68+
};
69+
}*/
70+
71+
macro_rules! first_stage {
72+
($original_bits:literal, $n:ident) => {{
73+
const N_SHIFT: u32 = $original_bits - 8;
74+
let n = $n >> N_SHIFT;
75+
76+
U8_ISQRT_WITH_REMAINDER[n as usize]
77+
}};
6878
}
6979

70-
/// Returns the [integer square root][1] of any [`usize`](prim@usize) input.
71-
///
72-
/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
73-
/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
74-
/// Encyclopedia."
75-
#[must_use = "this returns the result of the operation, \
76-
without modifying the original"]
77-
// `#[inline(always)]` because the programmer-accessible functions will use
78-
// this internally and the contents of this should be inlined there.
79-
#[inline(always)]
80-
pub const fn usize(n: usize) -> usize {
81-
#[cfg(target_pointer_width = "16")]
82-
{
83-
u16(n as u16) as usize
80+
macro_rules! middle_stage {
81+
($original_bits:literal, $ty:ty, $n:ident, $s:ident, $r:ident) => {{
82+
const N_SHIFT: u32 = $original_bits - <$ty>::BITS;
83+
let n = ($n >> N_SHIFT) as $ty;
84+
85+
const HALF_BITS: u32 = <$ty>::BITS >> 1;
86+
const QUARTER_BITS: u32 = <$ty>::BITS >> 2;
87+
const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1;
88+
const LOWEST_QUARTER_1_BITS: $ty = (1 << QUARTER_BITS) - 1;
89+
90+
let lo = n & LOWER_HALF_1_BITS;
91+
let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS);
92+
let denominator = ($s as $ty) << 1;
93+
let q = numerator / denominator;
94+
let u = numerator % denominator;
95+
let mut s = ($s << QUARTER_BITS) as $ty + q;
96+
let (mut r, overflow) =
97+
((u << QUARTER_BITS) | (lo & LOWEST_QUARTER_1_BITS)).overflowing_sub(q * q);
98+
if overflow {
99+
r = r.wrapping_add(2 * s - 1);
100+
s -= 1;
101+
}
102+
(s, r)
103+
}};
104+
}
105+
106+
macro_rules! last_stage {
107+
($ty:ty, $n:ident, $s:ident, $r:ident) => {{
108+
const HALF_BITS: u32 = <$ty>::BITS >> 1;
109+
const QUARTER_BITS: u32 = <$ty>::BITS >> 2;
110+
const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1;
111+
112+
let lo = $n & LOWER_HALF_1_BITS;
113+
let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS);
114+
let denominator = ($s as $ty) << 1;
115+
let q = numerator / denominator;
116+
let mut s = ($s << QUARTER_BITS) as $ty + q;
117+
let (s_squared, overflow) = s.overflowing_mul(s);
118+
if overflow || s_squared > $n {
119+
s -= 1;
120+
}
121+
s
122+
}};
123+
}
124+
125+
/*unsigned_fn!(u16, {
126+
let (s, r) = first_stage!(16, n);
127+
last_stage!(u16, n, s, r)
128+
});
129+
130+
unsigned_fn!(u32, {
131+
let (s, r) = first_stage!(32, n);
132+
let (s, r) = middle_stage!(32, u16, n, s, r);
133+
last_stage!(u32, n, s, r)
134+
})*/
135+
136+
pub const fn u16(mut n: u16) -> u16 {
137+
if n == 0 {
138+
return 0;
139+
}
140+
const EVEN_BITMASK: u32 = u32::MAX & !1;
141+
let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
142+
n <<= precondition_shift;
143+
144+
let (s, r) = first_stage!(16, n);
145+
let s = last_stage!(u16, n, s, r);
146+
147+
let result_shift = precondition_shift >> 1;
148+
s >> result_shift
149+
}
150+
151+
pub const fn u32(mut n: u32) -> u32 {
152+
if n == 0 {
153+
return 0;
84154
}
155+
const EVEN_BITMASK: u32 = u32::MAX & !1;
156+
let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
157+
n <<= precondition_shift;
158+
159+
let (s, r) = first_stage!(32, n);
160+
let (s, r) = middle_stage!(32, u16, n, s, r);
161+
let s = last_stage!(u32, n, s, r);
162+
163+
let result_shift = precondition_shift >> 1;
164+
s >> result_shift
165+
}
85166

86-
#[cfg(target_pointer_width = "32")]
87-
{
88-
u32(n as u32) as usize
167+
pub const fn u64(mut n: u64) -> u64 {
168+
if n == 0 {
169+
return 0;
89170
}
171+
const EVEN_BITMASK: u32 = u32::MAX & !1;
172+
let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
173+
n <<= precondition_shift;
174+
175+
let (s, r) = first_stage!(64, n);
176+
let (s, r) = middle_stage!(64, u16, n, s, r);
177+
let (s, r) = middle_stage!(64, u32, n, s, r);
178+
let s = last_stage!(u64, n, s, r);
179+
180+
let result_shift = precondition_shift >> 1;
181+
s >> result_shift
182+
}
90183

91-
#[cfg(target_pointer_width = "64")]
92-
{
93-
u64(n as u64) as usize
184+
pub const fn u128(mut n: u128) -> u128 {
185+
if n == 0 {
186+
return 0;
94187
}
188+
const EVEN_BITMASK: u32 = u32::MAX & !1;
189+
let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
190+
n <<= precondition_shift;
191+
192+
let (s, r) = first_stage!(128, n);
193+
let (s, r) = middle_stage!(128, u16, n, s, r);
194+
let (s, r) = middle_stage!(128, u32, n, s, r);
195+
let (s, r) = middle_stage!(128, u64, n, s, r);
196+
let s = last_stage!(u128, n, s, r);
197+
198+
let result_shift = precondition_shift >> 1;
199+
s >> result_shift
95200
}
96201

97-
/// Generates a `u*_with_remainder` function that returns the [integer square
98-
/// root][1] and remainder of any input of a specific unsigned integer type.
202+
/// Returns the [integer square root][1] of any [`usize`](prim@usize) input.
99203
///
100204
/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
101205
/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
102206
/// Encyclopedia."
103-
macro_rules! unsigned_with_remainder_fn {
104-
($FullBitsT:ty, $full_bits_with_remainder_fn:ident, $HalfBitsT:ty, $half_bits_with_remainder_fn:ident) => {
105-
/// Returns the [integer square root][1] and remainder of any
106-
#[doc = concat!("[`", stringify!($FullBitsT), "`](prim@", stringify!($FullBitsT), ")")]
107-
/// input.
108-
///
109-
/// For example,
110-
#[doc = concat!("`", stringify!($full_bits_with_remainder_fn), "(17) == (4, 1)`")]
111-
/// because the integer square root of 17 is 4 and because 17 is 1
112-
/// higher than 4 squared.
113-
///
114-
/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
115-
/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
116-
/// Encyclopedia."
117-
#[must_use = "this returns the result of the operation, \
118-
without modifying the original"]
119-
const fn $full_bits_with_remainder_fn(mut n: $FullBitsT) -> ($FullBitsT, $FullBitsT) {
120-
// Performs a Karatsuba square root.
121-
// https://web.archive.org/web/20230511212802/https://inria.hal.science/inria-00072854v1/file/RR-3805.pdf
122-
123-
const HALF_BITS: u32 = <$FullBitsT>::BITS >> 1;
124-
const QUARTER_BITS: u32 = <$FullBitsT>::BITS >> 2;
207+
#[must_use = "this returns the result of the operation, \
208+
without modifying the original"]
209+
// `#[inline(always)]` because the programmer-accessible functions will use
210+
// this internally and the contents of this should be inlined there.
211+
#[inline(always)]
212+
pub const fn usize(mut n: usize) -> usize {
213+
if n == 0 {
214+
return 0;
215+
}
216+
const EVEN_BITMASK: u32 = u32::MAX & !1;
217+
let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
218+
n <<= precondition_shift;
219+
220+
let s = {
221+
#[cfg(target_pointer_width = "16")]
222+
{
223+
let (s, r) = first_stage!(16, n);
224+
last_stage!(usize, n, s, r)
225+
}
125226

126-
const LOWER_HALF_1_BITS: $FullBitsT = (1 << HALF_BITS) - 1;
127-
const LOWEST_QUARTER_1_BITS: $FullBitsT = (1 << QUARTER_BITS) - 1;
227+
#[cfg(target_pointer_width = "32")]
228+
{
229+
let (s, r) = first_stage!(32, n);
230+
let (s, r) = middle_stage!(32, u16, n, s, r);
231+
last_stage!(usize, n, s, r)
232+
}
128233

129-
let leading_zeros = n.leading_zeros();
130-
if leading_zeros >= HALF_BITS {
131-
let (s, r) = $half_bits_with_remainder_fn(n as $HalfBitsT);
132-
(s as $FullBitsT, r as $FullBitsT)
133-
} else {
134-
// If we've arrived here, there is at least one 1 bit in the
135-
// upper half of the bits. What we want to do is to shift left
136-
// an even number of bits so that the most-significant 1 bit is
137-
// as far left as it can get.
138-
//
139-
// Either the most-significant bit or its neighbor must be a one, so we shift left to make that happen.
140-
const EVEN_BITMASK: u32 = u32::MAX & !1;
141-
let precondition_shift = leading_zeros & EVEN_BITMASK;
142-
n <<= precondition_shift;
234+
#[cfg(target_pointer_width = "64")]
235+
{
236+
let (s, r) = first_stage!(64, n);
237+
let (s, r) = middle_stage!(64, u16, n, s, r);
238+
let (s, r) = middle_stage!(64, u32, n, s, r);
239+
last_stage!(usize, n, s, r)
240+
}
241+
};
143242

144-
let hi = (n >> HALF_BITS) as $HalfBitsT;
145-
let lo = n & LOWER_HALF_1_BITS;
243+
let result_shift = precondition_shift >> 1;
244+
s >> result_shift
245+
}
146246

147-
let (s_prime, r_prime) = $half_bits_with_remainder_fn(hi);
247+
pub const unsafe fn i8(n: i8) -> i8 {
248+
u8(n as u8) as i8
249+
}
148250

149-
let numerator = ((r_prime as $FullBitsT) << QUARTER_BITS) | (lo >> QUARTER_BITS);
150-
let denominator = (s_prime as $FullBitsT) << 1;
251+
pub const unsafe fn i16(n: i16) -> i16 {
252+
u16(n as u16) as i16
253+
}
151254

152-
// Integer type divided by nonzero version of that type is not a `const fn` yet.
153-
// let denominator =
154-
// unsafe { crate::num::NonZero::<$FullBitsT>::new_unchecked(denominator) };
155-
//
156-
// let q = numerator / denominator;
157-
// let u = numerator % denominator;
158-
let (q, u) = unsafe {
159-
(
160-
crate::intrinsics::unchecked_div(numerator, denominator),
161-
crate::intrinsics::unchecked_rem(numerator, denominator),
162-
)
163-
};
255+
pub const unsafe fn i32(n: i32) -> i32 {
256+
u32(n as u32) as i32
257+
}
164258

165-
let mut s = (s_prime << QUARTER_BITS) as $FullBitsT + q;
166-
let (mut r, overflow) =
167-
((u << QUARTER_BITS) | (lo & LOWEST_QUARTER_1_BITS)).overflowing_sub(q * q);
168-
if overflow {
169-
r = r.wrapping_add((s << 1) - 1);
170-
s -= 1;
171-
}
172-
let result_shift = precondition_shift >> 1;
173-
(s >> result_shift, r >> result_shift)
174-
}
175-
}
176-
};
259+
pub const unsafe fn i64(n: i64) -> i64 {
260+
u64(n as u64) as i64
177261
}
178262

179-
unsigned_with_remainder_fn!(u16, u16_with_remainder, u8, u8_with_remainder);
180-
unsigned_with_remainder_fn!(u32, u32_with_remainder, u16, u16_with_remainder);
181-
unsigned_with_remainder_fn!(u64, u64_with_remainder, u32, u32_with_remainder);
263+
pub const unsafe fn i128(n: i128) -> i128 {
264+
u128(n as u128) as i128
265+
}
182266

183-
/// Generates a `u*` function that returns the [integer square root][1] of any
267+
/*
268+
/// Generates a `u*` function that returns the [integer square root][1] of any
184269
/// input of a specific unsigned integer type.
185270
///
186271
/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
187272
/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
188-
/// Encyclopedia."
273+
/// Encyclopedia."e
189274
macro_rules! unsigned_fn {
190275
($FullBitsT:ty, $full_bits_fn:ident, $HalfBitsT:ty, $half_bits_fn:ident, $half_bits_with_remainder_fn:ident) => {
191276
/// Returns the [integer square root][1] of any
@@ -263,49 +348,7 @@ macro_rules! unsigned_fn {
263348
}
264349
}
265350
};
266-
}
267-
268-
unsigned_fn!(u16, u16, u8, u8, u8_with_remainder);
269-
unsigned_fn!(u32, u32, u16, u16, u16_with_remainder);
270-
unsigned_fn!(u64, u64, u32, u32, u32_with_remainder);
271-
unsigned_fn!(u128, u128, u64, u64, u64_with_remainder);
272-
273-
/// Generates an `i*` function that returns the [integer square root][1] of any
274-
/// **nonnegative** input of a specific signed integer type.
275-
///
276-
/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
277-
/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
278-
/// Encyclopedia."
279-
macro_rules! signed_fn {
280-
($SignedT:ty, $signed_fn:ident, $UnsignedT:ty, $unsigned_fn:ident) => {
281-
/// Returns the [integer square root][1] of any **nonnegative**
282-
#[doc = concat!("[`", stringify!($SignedT), "`](prim@", stringify!($SignedT), ")")]
283-
/// input.
284-
///
285-
/// # Safety
286-
///
287-
/// This results in undefined behavior when the input is negative.
288-
///
289-
/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
290-
/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
291-
/// Encyclopedia."
292-
#[must_use = "this returns the result of the operation, \
293-
without modifying the original"]
294-
// `#[inline(always)]` because the programmer-accessible functions will
295-
// use this internally and the contents of this should be inlined
296-
// there.
297-
#[inline(always)]
298-
pub const unsafe fn $signed_fn(n: $SignedT) -> $SignedT {
299-
$unsigned_fn(n as $UnsignedT) as $SignedT
300-
}
301-
};
302-
}
303-
304-
signed_fn!(i8, i8, u8, u8);
305-
signed_fn!(i16, i16, u16, u16);
306-
signed_fn!(i32, i32, u32, u32);
307-
signed_fn!(i64, i64, u64, u64);
308-
signed_fn!(i128, i128, u128, u128);
351+
}*/
309352

310353
/// Instantiate this panic logic once, rather than for all the isqrt methods
311354
/// on every single primitive type.

0 commit comments

Comments
 (0)