Skip to content

Commit f4bbacc

Browse files
committed
First modifications to original pull request.
* Simplified `const U8_ISQRT_WITH_REMAINDER` generation code. * Inlined `intermediate` support functions, speeding up underlying `isqrt` functions. * Replaced benchmarks with `ilog10` equivalents. * When telling the optimizer the output range of `checked_isqrt` and `isqrt` output range, explain why that range is correct. * Other minor changes.
1 parent fea585d commit f4bbacc

File tree

5 files changed

+319
-365
lines changed

5 files changed

+319
-365
lines changed
+47-196
Original file line numberDiff line numberDiff line change
@@ -1,207 +1,58 @@
1-
//! This benchmarks the `Integer::isqrt` methods.
2-
3-
macro_rules! benches {
4-
($($T:ident)+) => {
5-
$(
6-
mod $T {
7-
use test::{black_box, Bencher};
8-
9-
// Benchmark the square roots of:
10-
//
11-
// * the first 1,024 perfect squares
12-
// * halfway between each of the first 1,024 perfect squares
13-
// and the next perfect square
14-
// * the next perfect square after the each of the first 1,024
15-
// perfect squares, minus one
16-
// * the last 1,024 perfect squares
17-
// * the last 1,024 perfect squares, minus one
18-
// * halfway between each of the last 1,024 perfect squares
19-
// and the previous perfect square
20-
#[bench]
21-
fn isqrt(bench: &mut Bencher) {
22-
let mut inputs = Vec::with_capacity(6 * 1_024);
23-
24-
// The inputs to benchmark are worked out by using the fact
25-
// that the nth nonzero perfect square is the sum of the
26-
// first n odd numbers:
27-
//
28-
// 1 = 1
29-
// 4 = 1 + 3
30-
// 9 = 1 + 3 + 5
31-
// 16 = 1 + 3 + 5 + 7
32-
//
33-
// Note also that the last odd number added in is two times
34-
// the square root of the previous perfect square, plus
35-
// one:
36-
//
37-
// 1 = 2*0 + 1
38-
// 3 = 2*1 + 1
39-
// 5 = 2*2 + 1
40-
// 7 = 2*3 + 1
41-
//
42-
// That means we can add the square root of this perfect
43-
// square once to get about halfway to the next perfect
44-
// square, then we can add the square root of this perfect
45-
// square again to get to the next perfect square minus
46-
// one, then we can add one to get to the next perfect
47-
// square.
48-
//
49-
// Here we include, for each of the first 1,024 perfect
50-
// squares:
51-
//
52-
// * the current perfect square
53-
// * about halfway to the next perfect square
54-
// * the next perfect square, minus one
55-
let mut n: $T = 0;
56-
for sqrt_n in 0..1_024.min((1_u128 << (($T::BITS - $T::MAX.leading_zeros())/2)) - 1) as $T {
57-
inputs.push(n);
58-
n += sqrt_n;
59-
inputs.push(n);
60-
n += sqrt_n;
61-
inputs.push(n);
62-
n += 1;
1+
use rand::Rng;
2+
use test::{black_box, Bencher};
3+
4+
macro_rules! int_sqrt_bench {
5+
($t:ty, $predictable:ident, $random:ident, $random_small:ident) => {
6+
#[bench]
7+
fn $predictable(bench: &mut Bencher) {
8+
bench.iter(|| {
9+
for n in 0..(<$t>::BITS / 8) {
10+
for i in 1..=(100 as $t) {
11+
let x = black_box(i << (n * 8));
12+
black_box(x.isqrt());
6313
}
64-
65-
// Similarly, we include, for each of the last 1,024
66-
// perfect squares:
67-
//
68-
// * the current perfect square
69-
// * the current perfect square, minus one
70-
// * about halfway to the previous perfect square
71-
let maximum_sqrt = $T::MAX.isqrt();
72-
let mut n = maximum_sqrt * maximum_sqrt;
73-
74-
for sqrt_n in (maximum_sqrt - 1_024.min((1_u128 << (($T::BITS - 1)/2)) - 1) as $T..maximum_sqrt).rev() {
75-
inputs.push(n);
76-
n -= 1;
77-
inputs.push(n);
78-
n -= sqrt_n;
79-
inputs.push(n);
80-
n -= sqrt_n;
81-
}
82-
83-
bench.iter(|| {
84-
for x in &inputs {
85-
black_box(black_box(x).isqrt());
86-
}
87-
});
8814
}
89-
}
90-
)*
91-
};
92-
}
93-
94-
macro_rules! push_n {
95-
($T:ident, $inputs:ident, $n:ident) => {
96-
if $n != 0 {
97-
$inputs.push(
98-
core::num::$T::new($n)
99-
.expect("Cannot create a new `NonZero` value from a nonzero value"),
100-
);
15+
});
10116
}
102-
};
103-
}
104-
105-
macro_rules! nonzero_benches {
106-
($mod:ident $T:ident $RegularT:ident) => {
107-
mod $mod {
108-
use test::{black_box, Bencher};
109-
110-
// Benchmark the square roots of:
111-
//
112-
// * the first 1,024 perfect squares
113-
// * halfway between each of the first 1,024 perfect squares
114-
// and the next perfect square
115-
// * the next perfect square after the each of the first 1,024
116-
// perfect squares, minus one
117-
// * the last 1,024 perfect squares
118-
// * the last 1,024 perfect squares, minus one
119-
// * halfway between each of the last 1,024 perfect squares
120-
// and the previous perfect square
121-
#[bench]
122-
fn isqrt(bench: &mut Bencher) {
123-
let mut inputs: Vec<core::num::$T> = Vec::with_capacity(6 * 1_024);
12417

125-
// The inputs to benchmark are worked out by using the fact
126-
// that the nth nonzero perfect square is the sum of the
127-
// first n odd numbers:
128-
//
129-
// 1 = 1
130-
// 4 = 1 + 3
131-
// 9 = 1 + 3 + 5
132-
// 16 = 1 + 3 + 5 + 7
133-
//
134-
// Note also that the last odd number added in is two times
135-
// the square root of the previous perfect square, plus
136-
// one:
137-
//
138-
// 1 = 2*0 + 1
139-
// 3 = 2*1 + 1
140-
// 5 = 2*2 + 1
141-
// 7 = 2*3 + 1
142-
//
143-
// That means we can add the square root of this perfect
144-
// square once to get about halfway to the next perfect
145-
// square, then we can add the square root of this perfect
146-
// square again to get to the next perfect square minus
147-
// one, then we can add one to get to the next perfect
148-
// square.
149-
//
150-
// Here we include, for each of the first 1,024 perfect
151-
// squares:
152-
//
153-
// * the current perfect square
154-
// * about halfway to the next perfect square
155-
// * the next perfect square, minus one
156-
let mut n: $RegularT = 0;
157-
for sqrt_n in 0..1_024
158-
.min((1_u128 << (($RegularT::BITS - $RegularT::MAX.leading_zeros()) / 2)) - 1)
159-
as $RegularT
160-
{
161-
push_n!($T, inputs, n);
162-
n += sqrt_n;
163-
push_n!($T, inputs, n);
164-
n += sqrt_n;
165-
push_n!($T, inputs, n);
166-
n += 1;
18+
#[bench]
19+
fn $random(bench: &mut Bencher) {
20+
let mut rng = crate::bench_rng();
21+
/* Exponentially distributed random numbers from the whole range of the type. */
22+
let numbers: Vec<$t> = (0..256)
23+
.map(|_| {
24+
let x = rng.gen::<$t>() >> rng.gen_range(0..<$t>::BITS);
25+
if x != 0 { x } else { 1 }
26+
})
27+
.collect();
28+
bench.iter(|| {
29+
for x in &numbers {
30+
black_box(black_box(x).isqrt());
16731
}
32+
});
33+
}
16834

169-
// Similarly, we include, for each of the last 1,024
170-
// perfect squares:
171-
//
172-
// * the current perfect square
173-
// * the current perfect square, minus one
174-
// * about halfway to the previous perfect square
175-
let maximum_sqrt = $RegularT::MAX.isqrt();
176-
let mut n = maximum_sqrt * maximum_sqrt;
177-
178-
for sqrt_n in (maximum_sqrt
179-
- 1_024.min((1_u128 << (($RegularT::BITS - 1) / 2)) - 1) as $RegularT
180-
..maximum_sqrt)
181-
.rev()
182-
{
183-
push_n!($T, inputs, n);
184-
n -= 1;
185-
push_n!($T, inputs, n);
186-
n -= sqrt_n;
187-
push_n!($T, inputs, n);
188-
n -= sqrt_n;
35+
#[bench]
36+
fn $random_small(bench: &mut Bencher) {
37+
let mut rng = crate::bench_rng();
38+
/* Exponentially distributed random numbers from the range 0..256. */
39+
let numbers: Vec<$t> = (0..256)
40+
.map(|_| {
41+
let x = (rng.gen::<u8>() >> rng.gen_range(0..u8::BITS)) as $t;
42+
if x != 0 { x } else { 1 }
43+
})
44+
.collect();
45+
bench.iter(|| {
46+
for x in &numbers {
47+
black_box(black_box(x).isqrt());
18948
}
190-
191-
bench.iter(|| {
192-
for n in &inputs {
193-
black_box(black_box(n).isqrt());
194-
}
195-
});
196-
}
49+
});
19750
}
19851
};
19952
}
20053

201-
benches!(i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize);
202-
nonzero_benches!(non_zero_u8 NonZeroU8 u8);
203-
nonzero_benches!(non_zero_u16 NonZeroU16 u16);
204-
nonzero_benches!(non_zero_u32 NonZeroU32 u32);
205-
nonzero_benches!(non_zero_u64 NonZeroU64 u64);
206-
nonzero_benches!(non_zero_u128 NonZeroU128 u128);
207-
nonzero_benches!(non_zero_usize NonZeroUsize usize);
54+
int_sqrt_bench! {u8, u8_sqrt_predictable, u8_sqrt_random, u8_sqrt_random_small}
55+
int_sqrt_bench! {u16, u16_sqrt_predictable, u16_sqrt_random, u16_sqrt_random_small}
56+
int_sqrt_bench! {u32, u32_sqrt_predictable, u32_sqrt_random, u32_sqrt_random_small}
57+
int_sqrt_bench! {u64, u64_sqrt_predictable, u64_sqrt_random, u64_sqrt_random_small}
58+
int_sqrt_bench! {u128, u128_sqrt_predictable, u128_sqrt_random, u128_sqrt_random_small}

library/core/src/num/int_macros.rs

+28-19
Original file line numberDiff line numberDiff line change
@@ -1580,14 +1580,33 @@ macro_rules! int_impl {
15801580
if self < 0 {
15811581
None
15821582
} else {
1583-
let result = crate::num::int_sqrt::$ActualT(self as $ActualT) as $SelfT;
1583+
// SAFETY: Input is nonnegative in this `else` branch.
1584+
let result = unsafe {
1585+
crate::num::int_sqrt::$ActualT(self as $ActualT) as $SelfT
1586+
};
15841587

1585-
// SAFETY: Inform the optimizer that square roots of
1586-
// nonnegative integers are nonnegative and what the maximum
1587-
// result is.
1588+
// SAFETY: Inform the optimizer what the range of outputs is.
1589+
//
1590+
// Integer square root is a monotonically nondecreasing
1591+
// function, which means that increasing the input will never
1592+
// cause the output to decrease.
1593+
//
1594+
// The minimum input in this `else` branch is 0. The maximum
1595+
// input is `<$ActualT>::MAX`.
1596+
//
1597+
// When n is 0, sqrt(n) is 0. If n increases above 0, sqrt(n)
1598+
// can't decrease below 0, so sqrt(n) can't decrease below 0 no
1599+
// matter what n is.
1600+
//
1601+
// When n is below `<$ActualT>::MAX`, sqrt(n) can't decrease at
1602+
// all when you increase n to `<$ActualT>::MAX`, so sqrt(n)
1603+
// can't be above sqrt(`<$ActualT>::MAX`) no matter what n is.
15881604
unsafe {
15891605
crate::hint::assert_unchecked(result >= 0);
1590-
const MAX_RESULT: $SelfT = crate::num::int_sqrt::$ActualT($ActualT::MAX) as $SelfT;
1606+
// SAFETY: `<$ActualT>::MAX` is nonnegative.
1607+
const MAX_RESULT: $SelfT = unsafe {
1608+
crate::num::int_sqrt::$ActualT(<$ActualT>::MAX) as $SelfT
1609+
};
15911610
crate::hint::assert_unchecked(result <= MAX_RESULT);
15921611
}
15931612

@@ -2776,22 +2795,12 @@ macro_rules! int_impl {
27762795
#[must_use = "this returns the result of the operation, \
27772796
without modifying the original"]
27782797
#[inline]
2798+
#[track_caller]
27792799
pub const fn isqrt(self) -> Self {
2780-
if self < 0 {
2781-
crate::num::int_sqrt::panic_for_negative_argument();
2800+
if let Some(sqrt) = self.checked_isqrt() {
2801+
sqrt
27822802
} else {
2783-
let result = crate::num::int_sqrt::$ActualT(self as $ActualT) as $SelfT;
2784-
2785-
// SAFETY: Inform the optimizer that square roots of
2786-
// nonnegative integers are nonnegative and what the maximum
2787-
// result is.
2788-
unsafe {
2789-
crate::hint::assert_unchecked(result >= 0);
2790-
const MAX_RESULT: $SelfT = crate::num::int_sqrt::$ActualT($ActualT::MAX) as $SelfT;
2791-
crate::hint::assert_unchecked(result <= MAX_RESULT);
2792-
}
2793-
2794-
result
2803+
crate::num::int_sqrt::panic_for_negative_argument()
27952804
}
27962805
}
27972806

0 commit comments

Comments
 (0)