@@ -50,142 +50,227 @@ pub const fn u8(n: u8) -> u8 {
50
50
U8_ISQRT_WITH_REMAINDER [ n as usize ] . 0
51
51
}
52
52
53
- /// Returns the [integer square root][1] and remainder of any [`u8`](prim@u8)
54
- /// input.
55
- ///
56
- /// For example, `u8_with_remainder(17) == (4, 1)` because the integer square
57
- /// root of 17 is 4 and because 17 is 1 higher than 4 squared.
58
- ///
59
- /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
60
- /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
61
- /// Encyclopedia."
62
- #[ must_use = "this returns the result of the operation, \
63
- without modifying the original"]
64
- // `#[inline(always)]` because this is just a memory access.
65
- #[ inline( always) ]
66
- const fn u8_with_remainder ( n : u8 ) -> ( u8 , u8 ) {
67
- U8_ISQRT_WITH_REMAINDER [ n as usize ]
53
+ /*macro_rules! unsigned_fn {
54
+ ($unsigned_type:ident, $stages:block) => {
55
+ pub const fn $unsigned_type(mut n: u16) -> u16 {
56
+ if n == 0 {
57
+ return 0;
58
+ }
59
+ const EVEN_BITMASK: u32 = u32::MAX & !1;
60
+ let precondition_shift = n.leading_zeros() & EVEN_BITMASK;
61
+ n <<= precondition_shift;
62
+
63
+ let s = $stages;
64
+
65
+ let result_shift = precondition_shift >> 1;
66
+ s >> result_shift
67
+ }
68
+ };
69
+ }*/
70
+
71
+ macro_rules! first_stage {
72
+ ( $original_bits: literal, $n: ident) => { {
73
+ const N_SHIFT : u32 = $original_bits - 8 ;
74
+ let n = $n >> N_SHIFT ;
75
+
76
+ U8_ISQRT_WITH_REMAINDER [ n as usize ]
77
+ } } ;
68
78
}
69
79
70
- /// Returns the [integer square root][1] of any [`usize`](prim@usize) input.
71
- ///
72
- /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
73
- /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
74
- /// Encyclopedia."
75
- #[ must_use = "this returns the result of the operation, \
76
- without modifying the original"]
77
- // `#[inline(always)]` because the programmer-accessible functions will use
78
- // this internally and the contents of this should be inlined there.
79
- #[ inline( always) ]
80
- pub const fn usize ( n : usize ) -> usize {
81
- #[ cfg( target_pointer_width = "16" ) ]
82
- {
83
- u16 ( n as u16 ) as usize
80
+ macro_rules! middle_stage {
81
+ ( $original_bits: literal, $ty: ty, $n: ident, $s: ident, $r: ident) => { {
82
+ const N_SHIFT : u32 = $original_bits - <$ty>:: BITS ;
83
+ let n = ( $n >> N_SHIFT ) as $ty;
84
+
85
+ const HALF_BITS : u32 = <$ty>:: BITS >> 1 ;
86
+ const QUARTER_BITS : u32 = <$ty>:: BITS >> 2 ;
87
+ const LOWER_HALF_1_BITS : $ty = ( 1 << HALF_BITS ) - 1 ;
88
+ const LOWEST_QUARTER_1_BITS : $ty = ( 1 << QUARTER_BITS ) - 1 ;
89
+
90
+ let lo = n & LOWER_HALF_1_BITS ;
91
+ let numerator = ( ( $r as $ty) << QUARTER_BITS ) | ( lo >> QUARTER_BITS ) ;
92
+ let denominator = ( $s as $ty) << 1 ;
93
+ let q = numerator / denominator;
94
+ let u = numerator % denominator;
95
+ let mut s = ( $s << QUARTER_BITS ) as $ty + q;
96
+ let ( mut r, overflow) =
97
+ ( ( u << QUARTER_BITS ) | ( lo & LOWEST_QUARTER_1_BITS ) ) . overflowing_sub( q * q) ;
98
+ if overflow {
99
+ r = r. wrapping_add( 2 * s - 1 ) ;
100
+ s -= 1 ;
101
+ }
102
+ ( s, r)
103
+ } } ;
104
+ }
105
+
106
+ macro_rules! last_stage {
107
+ ( $ty: ty, $n: ident, $s: ident, $r: ident) => { {
108
+ const HALF_BITS : u32 = <$ty>:: BITS >> 1 ;
109
+ const QUARTER_BITS : u32 = <$ty>:: BITS >> 2 ;
110
+ const LOWER_HALF_1_BITS : $ty = ( 1 << HALF_BITS ) - 1 ;
111
+
112
+ let lo = $n & LOWER_HALF_1_BITS ;
113
+ let numerator = ( ( $r as $ty) << QUARTER_BITS ) | ( lo >> QUARTER_BITS ) ;
114
+ let denominator = ( $s as $ty) << 1 ;
115
+ let q = numerator / denominator;
116
+ let mut s = ( $s << QUARTER_BITS ) as $ty + q;
117
+ let ( s_squared, overflow) = s. overflowing_mul( s) ;
118
+ if overflow || s_squared > $n {
119
+ s -= 1 ;
120
+ }
121
+ s
122
+ } } ;
123
+ }
124
+
125
+ /*unsigned_fn!(u16, {
126
+ let (s, r) = first_stage!(16, n);
127
+ last_stage!(u16, n, s, r)
128
+ });
129
+
130
+ unsigned_fn!(u32, {
131
+ let (s, r) = first_stage!(32, n);
132
+ let (s, r) = middle_stage!(32, u16, n, s, r);
133
+ last_stage!(u32, n, s, r)
134
+ })*/
135
+
136
+ pub const fn u16 ( mut n : u16 ) -> u16 {
137
+ if n == 0 {
138
+ return 0 ;
139
+ }
140
+ const EVEN_BITMASK : u32 = u32:: MAX & !1 ;
141
+ let precondition_shift = n. leading_zeros ( ) & EVEN_BITMASK ;
142
+ n <<= precondition_shift;
143
+
144
+ let ( s, r) = first_stage ! ( 16 , n) ;
145
+ let s = last_stage ! ( u16 , n, s, r) ;
146
+
147
+ let result_shift = precondition_shift >> 1 ;
148
+ s >> result_shift
149
+ }
150
+
151
+ pub const fn u32 ( mut n : u32 ) -> u32 {
152
+ if n == 0 {
153
+ return 0 ;
84
154
}
155
+ const EVEN_BITMASK : u32 = u32:: MAX & !1 ;
156
+ let precondition_shift = n. leading_zeros ( ) & EVEN_BITMASK ;
157
+ n <<= precondition_shift;
158
+
159
+ let ( s, r) = first_stage ! ( 32 , n) ;
160
+ let ( s, r) = middle_stage ! ( 32 , u16 , n, s, r) ;
161
+ let s = last_stage ! ( u32 , n, s, r) ;
162
+
163
+ let result_shift = precondition_shift >> 1 ;
164
+ s >> result_shift
165
+ }
85
166
86
- # [ cfg ( target_pointer_width = "32" ) ]
87
- {
88
- u32 ( n as u32 ) as usize
167
+ pub const fn u64 ( mut n : u64 ) -> u64 {
168
+ if n == 0 {
169
+ return 0 ;
89
170
}
171
+ const EVEN_BITMASK : u32 = u32:: MAX & !1 ;
172
+ let precondition_shift = n. leading_zeros ( ) & EVEN_BITMASK ;
173
+ n <<= precondition_shift;
174
+
175
+ let ( s, r) = first_stage ! ( 64 , n) ;
176
+ let ( s, r) = middle_stage ! ( 64 , u16 , n, s, r) ;
177
+ let ( s, r) = middle_stage ! ( 64 , u32 , n, s, r) ;
178
+ let s = last_stage ! ( u64 , n, s, r) ;
179
+
180
+ let result_shift = precondition_shift >> 1 ;
181
+ s >> result_shift
182
+ }
90
183
91
- # [ cfg ( target_pointer_width = "64" ) ]
92
- {
93
- u64 ( n as u64 ) as usize
184
+ pub const fn u128 ( mut n : u128 ) -> u128 {
185
+ if n == 0 {
186
+ return 0 ;
94
187
}
188
+ const EVEN_BITMASK : u32 = u32:: MAX & !1 ;
189
+ let precondition_shift = n. leading_zeros ( ) & EVEN_BITMASK ;
190
+ n <<= precondition_shift;
191
+
192
+ let ( s, r) = first_stage ! ( 128 , n) ;
193
+ let ( s, r) = middle_stage ! ( 128 , u16 , n, s, r) ;
194
+ let ( s, r) = middle_stage ! ( 128 , u32 , n, s, r) ;
195
+ let ( s, r) = middle_stage ! ( 128 , u64 , n, s, r) ;
196
+ let s = last_stage ! ( u128 , n, s, r) ;
197
+
198
+ let result_shift = precondition_shift >> 1 ;
199
+ s >> result_shift
95
200
}
96
201
97
- /// Generates a `u*_with_remainder` function that returns the [integer square
98
- /// root][1] and remainder of any input of a specific unsigned integer type.
202
+ /// Returns the [integer square root][1] of any [`usize`](prim@usize) input.
99
203
///
100
204
/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
101
205
/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
102
206
/// Encyclopedia."
103
- macro_rules! unsigned_with_remainder_fn {
104
- ( $FullBitsT: ty, $full_bits_with_remainder_fn: ident, $HalfBitsT: ty, $half_bits_with_remainder_fn: ident) => {
105
- /// Returns the [integer square root][1] and remainder of any
106
- #[ doc = concat!( "[`" , stringify!( $FullBitsT) , "`](prim@" , stringify!( $FullBitsT) , ")" ) ]
107
- /// input.
108
- ///
109
- /// For example,
110
- #[ doc = concat!( "`" , stringify!( $full_bits_with_remainder_fn) , "(17) == (4, 1)`" ) ]
111
- /// because the integer square root of 17 is 4 and because 17 is 1
112
- /// higher than 4 squared.
113
- ///
114
- /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
115
- /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
116
- /// Encyclopedia."
117
- #[ must_use = "this returns the result of the operation, \
118
- without modifying the original"]
119
- const fn $full_bits_with_remainder_fn( mut n: $FullBitsT) -> ( $FullBitsT, $FullBitsT) {
120
- // Performs a Karatsuba square root.
121
- // https://web.archive.org/web/20230511212802/https://inria.hal.science/inria-00072854v1/file/RR-3805.pdf
122
-
123
- const HALF_BITS : u32 = <$FullBitsT>:: BITS >> 1 ;
124
- const QUARTER_BITS : u32 = <$FullBitsT>:: BITS >> 2 ;
207
+ #[ must_use = "this returns the result of the operation, \
208
+ without modifying the original"]
209
+ // `#[inline(always)]` because the programmer-accessible functions will use
210
+ // this internally and the contents of this should be inlined there.
211
+ #[ inline( always) ]
212
+ pub const fn usize ( mut n : usize ) -> usize {
213
+ if n == 0 {
214
+ return 0 ;
215
+ }
216
+ const EVEN_BITMASK : u32 = u32:: MAX & !1 ;
217
+ let precondition_shift = n. leading_zeros ( ) & EVEN_BITMASK ;
218
+ n <<= precondition_shift;
219
+
220
+ let s = {
221
+ #[ cfg( target_pointer_width = "16" ) ]
222
+ {
223
+ let ( s, r) = first_stage ! ( 16 , n) ;
224
+ last_stage ! ( usize , n, s, r)
225
+ }
125
226
126
- const LOWER_HALF_1_BITS : $FullBitsT = ( 1 << HALF_BITS ) - 1 ;
127
- const LOWEST_QUARTER_1_BITS : $FullBitsT = ( 1 << QUARTER_BITS ) - 1 ;
227
+ #[ cfg( target_pointer_width = "32" ) ]
228
+ {
229
+ let ( s, r) = first_stage ! ( 32 , n) ;
230
+ let ( s, r) = middle_stage ! ( 32 , u16 , n, s, r) ;
231
+ last_stage ! ( usize , n, s, r)
232
+ }
128
233
129
- let leading_zeros = n. leading_zeros( ) ;
130
- if leading_zeros >= HALF_BITS {
131
- let ( s, r) = $half_bits_with_remainder_fn( n as $HalfBitsT) ;
132
- ( s as $FullBitsT, r as $FullBitsT)
133
- } else {
134
- // If we've arrived here, there is at least one 1 bit in the
135
- // upper half of the bits. What we want to do is to shift left
136
- // an even number of bits so that the most-significant 1 bit is
137
- // as far left as it can get.
138
- //
139
- // Either the most-significant bit or its neighbor must be a one, so we shift left to make that happen.
140
- const EVEN_BITMASK : u32 = u32 :: MAX & !1 ;
141
- let precondition_shift = leading_zeros & EVEN_BITMASK ;
142
- n <<= precondition_shift;
234
+ #[ cfg( target_pointer_width = "64" ) ]
235
+ {
236
+ let ( s, r) = first_stage ! ( 64 , n) ;
237
+ let ( s, r) = middle_stage ! ( 64 , u16 , n, s, r) ;
238
+ let ( s, r) = middle_stage ! ( 64 , u32 , n, s, r) ;
239
+ last_stage ! ( usize , n, s, r)
240
+ }
241
+ } ;
143
242
144
- let hi = ( n >> HALF_BITS ) as $HalfBitsT;
145
- let lo = n & LOWER_HALF_1_BITS ;
243
+ let result_shift = precondition_shift >> 1 ;
244
+ s >> result_shift
245
+ }
146
246
147
- let ( s_prime, r_prime) = $half_bits_with_remainder_fn( hi) ;
247
+ pub const unsafe fn i8 ( n : i8 ) -> i8 {
248
+ u8 ( n as u8 ) as i8
249
+ }
148
250
149
- let numerator = ( ( r_prime as $FullBitsT) << QUARTER_BITS ) | ( lo >> QUARTER_BITS ) ;
150
- let denominator = ( s_prime as $FullBitsT) << 1 ;
251
+ pub const unsafe fn i16 ( n : i16 ) -> i16 {
252
+ u16 ( n as u16 ) as i16
253
+ }
151
254
152
- // Integer type divided by nonzero version of that type is not a `const fn` yet.
153
- // let denominator =
154
- // unsafe { crate::num::NonZero::<$FullBitsT>::new_unchecked(denominator) };
155
- //
156
- // let q = numerator / denominator;
157
- // let u = numerator % denominator;
158
- let ( q, u) = unsafe {
159
- (
160
- crate :: intrinsics:: unchecked_div( numerator, denominator) ,
161
- crate :: intrinsics:: unchecked_rem( numerator, denominator) ,
162
- )
163
- } ;
255
+ pub const unsafe fn i32 ( n : i32 ) -> i32 {
256
+ u32 ( n as u32 ) as i32
257
+ }
164
258
165
- let mut s = ( s_prime << QUARTER_BITS ) as $FullBitsT + q;
166
- let ( mut r, overflow) =
167
- ( ( u << QUARTER_BITS ) | ( lo & LOWEST_QUARTER_1_BITS ) ) . overflowing_sub( q * q) ;
168
- if overflow {
169
- r = r. wrapping_add( ( s << 1 ) - 1 ) ;
170
- s -= 1 ;
171
- }
172
- let result_shift = precondition_shift >> 1 ;
173
- ( s >> result_shift, r >> result_shift)
174
- }
175
- }
176
- } ;
259
+ pub const unsafe fn i64 ( n : i64 ) -> i64 {
260
+ u64 ( n as u64 ) as i64
177
261
}
178
262
179
- unsigned_with_remainder_fn ! ( u16 , u16_with_remainder , u8 , u8_with_remainder ) ;
180
- unsigned_with_remainder_fn ! ( u32 , u32_with_remainder , u16 , u16_with_remainder ) ;
181
- unsigned_with_remainder_fn ! ( u64 , u64_with_remainder , u32 , u32_with_remainder ) ;
263
+ pub const unsafe fn i128 ( n : i128 ) -> i128 {
264
+ u128 ( n as u128 ) as i128
265
+ }
182
266
183
- /// Generates a `u*` function that returns the [integer square root][1] of any
267
+ /*
268
+ /// Generates a `u*` function that returns the [integer square root][1] of any
184
269
/// input of a specific unsigned integer type.
185
270
///
186
271
/// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
187
272
/// "Wikipedia contributors. Integer square root. Wikipedia, The Free
188
- /// Encyclopedia."
273
+ /// Encyclopedia."e
189
274
macro_rules! unsigned_fn {
190
275
($FullBitsT:ty, $full_bits_fn:ident, $HalfBitsT:ty, $half_bits_fn:ident, $half_bits_with_remainder_fn:ident) => {
191
276
/// Returns the [integer square root][1] of any
@@ -263,49 +348,7 @@ macro_rules! unsigned_fn {
263
348
}
264
349
}
265
350
};
266
- }
267
-
268
- unsigned_fn ! ( u16 , u16 , u8 , u8 , u8_with_remainder) ;
269
- unsigned_fn ! ( u32 , u32 , u16 , u16 , u16_with_remainder) ;
270
- unsigned_fn ! ( u64 , u64 , u32 , u32 , u32_with_remainder) ;
271
- unsigned_fn ! ( u128 , u128 , u64 , u64 , u64_with_remainder) ;
272
-
273
- /// Generates an `i*` function that returns the [integer square root][1] of any
274
- /// **nonnegative** input of a specific signed integer type.
275
- ///
276
- /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
277
- /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
278
- /// Encyclopedia."
279
- macro_rules! signed_fn {
280
- ( $SignedT: ty, $signed_fn: ident, $UnsignedT: ty, $unsigned_fn: ident) => {
281
- /// Returns the [integer square root][1] of any **nonnegative**
282
- #[ doc = concat!( "[`" , stringify!( $SignedT) , "`](prim@" , stringify!( $SignedT) , ")" ) ]
283
- /// input.
284
- ///
285
- /// # Safety
286
- ///
287
- /// This results in undefined behavior when the input is negative.
288
- ///
289
- /// [1]: <https://en.wikipedia.org/wiki/Integer_square_root>
290
- /// "Wikipedia contributors. Integer square root. Wikipedia, The Free
291
- /// Encyclopedia."
292
- #[ must_use = "this returns the result of the operation, \
293
- without modifying the original"]
294
- // `#[inline(always)]` because the programmer-accessible functions will
295
- // use this internally and the contents of this should be inlined
296
- // there.
297
- #[ inline( always) ]
298
- pub const unsafe fn $signed_fn( n: $SignedT) -> $SignedT {
299
- $unsigned_fn( n as $UnsignedT) as $SignedT
300
- }
301
- } ;
302
- }
303
-
304
- signed_fn ! ( i8 , i8 , u8 , u8 ) ;
305
- signed_fn ! ( i16 , i16 , u16 , u16 ) ;
306
- signed_fn ! ( i32 , i32 , u32 , u32 ) ;
307
- signed_fn ! ( i64 , i64 , u64 , u64 ) ;
308
- signed_fn ! ( i128 , i128 , u128 , u128 ) ;
351
+ }*/
309
352
310
353
/// Instantiate this panic logic once, rather than for all the isqrt methods
311
354
/// on every single primitive type.
0 commit comments