Skip to content

Commit c7c94fd

Browse files
clean up a bit, add package metadata
1 parent cd56a76 commit c7c94fd

File tree

7 files changed

+337
-346
lines changed

7 files changed

+337
-346
lines changed

Cargo.toml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
[package]
2-
name = "fast_math"
2+
name = "fath"
3+
authors = [ "burgerindividual", "duplexsystem" ]
34
version = "0.1.0"
45
edition = "2021"
5-
6-
[dependencies]
7-
#num = "0.4.0"
8-
#fixed = "2.0.0-alpha.11"
6+
license = "GPL-3.0"
7+
repository = "https://github.com/burgerindividual/fath"
8+
description = "Fa(st ma)th library built for speed."
99

1010
[dev-dependencies]
1111
rand = "0.8.5"
@@ -16,4 +16,3 @@ opt-level = 3
1616
[profile.release]
1717
opt-level = 3
1818
panic = "abort"
19-

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
# fast_math
1+
# fath
22
SIMD-Capable fast approximate math library written in Rust

src/lib.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,4 @@ pub mod shared;
1111
pub mod simd;
1212

1313
#[cfg(test)]
14-
pub mod test;
15-
16-
pub mod comp_test;
14+
pub mod test;

src/main.rs

Lines changed: 0 additions & 9 deletions
This file was deleted.

src/test/checks.rs

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
use crate::shared::float::*;
2+
use crate::shared::int::*;
3+
use core::f32::consts::FRAC_PI_2;
4+
use core::ops::Range;
5+
use core::simd::*;
6+
use rand::rngs::ThreadRng;
7+
use rand::{thread_rng, Rng, RngCore};
8+
// #[cfg(target_arch = "x86")]
9+
// #[allow(unused_imports)]
10+
// use core::arch::x86::*;
11+
// #[cfg(target_arch = "x86_64")]
12+
// #[allow(unused_imports)]
13+
// use core::arch::x86_64::*;
14+
15+
const ITERS: usize = 1 << 20;
16+
17+
#[inline(never)]
18+
#[test]
19+
pub fn scalar_error() {
20+
const RANGE: Range<f32> = -FRAC_PI_2..FRAC_PI_2;
21+
const MAX_ERROR_0: f32 = 2.9e-2_f32;
22+
const MAX_ERROR_1: f32 = 6.0e-4_f32;
23+
const MAX_ERROR_2: f32 = 6.9e-6_f32;
24+
const MAX_ERROR_3: f32 = 2.7e-7_f32;
25+
26+
let mut rng = thread_rng();
27+
for _i in 0..ITERS {
28+
let x = rng.gen_range(RANGE);
29+
30+
let approx_0 = unsafe { x.sin_fast_approx::<0>() };
31+
let approx_1 = unsafe { x.sin_fast_approx::<1>() };
32+
let approx_2 = unsafe { x.sin_fast_approx::<2>() };
33+
let approx_3 = unsafe { x.sin_fast_approx::<3>() };
34+
let exact = x.sin();
35+
36+
assert!(
37+
(exact - approx_0).abs() <= MAX_ERROR_0,
38+
"Error greater than set maximum: true: {exact}, approx: {approx_0}, x: {x}"
39+
);
40+
assert!(
41+
(exact - approx_1).abs() <= MAX_ERROR_1,
42+
"Error greater than set maximum: true: {exact}, approx: {approx_1}, x: {x}"
43+
);
44+
assert!(
45+
(exact - approx_2).abs() <= MAX_ERROR_2,
46+
"Error greater than set maximum: true: {exact}, approx: {approx_2}, x: {x}"
47+
);
48+
assert!(
49+
(exact - approx_3).abs() <= MAX_ERROR_3,
50+
"Error greater than set maximum: true: {exact}, approx: {approx_3}, x: {x}"
51+
);
52+
}
53+
}
54+
55+
#[inline(never)]
56+
#[test]
57+
pub fn simd_error() {
58+
const RANGE: Range<f32> = -FRAC_PI_2..FRAC_PI_2;
59+
const MAX_ERROR_0: f32 = 2.9e-2_f32;
60+
const MAX_ERROR_1: f32 = 6.0e-4_f32;
61+
const MAX_ERROR_2: f32 = 6.9e-6_f32;
62+
const MAX_ERROR_3: f32 = 2.7e-7_f32;
63+
64+
let rng = &mut thread_rng();
65+
66+
test::<2>(rng);
67+
test::<4>(rng);
68+
test::<8>(rng);
69+
test::<16>(rng);
70+
71+
#[inline(always)]
72+
fn test<const LANES: usize>(rng: &mut ThreadRng)
73+
where
74+
LaneCount<LANES>: SupportedLaneCount,
75+
{
76+
for _i in 0..ITERS {
77+
let mut vec_uninit: core::mem::MaybeUninit<Simd<f32, LANES>> =
78+
core::mem::MaybeUninit::uninit();
79+
let vec_ptr = vec_uninit.as_mut_ptr();
80+
81+
for i in 0..LANES {
82+
unsafe {
83+
(*vec_ptr)[i] = rng.gen_range(RANGE);
84+
}
85+
}
86+
87+
let x = unsafe { vec_uninit.assume_init() };
88+
89+
let approx_0 = unsafe { x.sin_fast_approx::<0>() };
90+
let approx_1 = unsafe { x.sin_fast_approx::<1>() };
91+
let approx_2 = unsafe { x.sin_fast_approx::<2>() };
92+
let approx_3 = unsafe { x.sin_fast_approx::<3>() };
93+
94+
let mut vec_uninit: core::mem::MaybeUninit<Simd<f32, LANES>> =
95+
core::mem::MaybeUninit::uninit();
96+
let vec_ptr = vec_uninit.as_mut_ptr();
97+
98+
for i in 0..LANES {
99+
unsafe {
100+
(*vec_ptr)[i] = x[i].sin();
101+
}
102+
}
103+
104+
let exact = unsafe { vec_uninit.assume_init() };
105+
106+
assert!(
107+
(exact - approx_0)
108+
.abs()
109+
.simd_le(Simd::splat(MAX_ERROR_0))
110+
.all(),
111+
"Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}",
112+
exact,
113+
approx_0,
114+
x
115+
);
116+
assert!(
117+
(exact - approx_1)
118+
.abs()
119+
.simd_le(Simd::splat(MAX_ERROR_1))
120+
.all(),
121+
"Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}",
122+
exact,
123+
approx_1,
124+
x
125+
);
126+
assert!(
127+
(exact - approx_2)
128+
.abs()
129+
.simd_le(Simd::splat(MAX_ERROR_2))
130+
.all(),
131+
"Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}",
132+
exact,
133+
approx_2,
134+
x
135+
);
136+
assert!(
137+
(exact - approx_3)
138+
.abs()
139+
.simd_le(Simd::splat(MAX_ERROR_3))
140+
.all(),
141+
"Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}",
142+
exact,
143+
approx_3,
144+
x
145+
);
146+
}
147+
}
148+
}
149+
150+
#[inline(never)]
151+
#[test]
152+
pub fn simd_ilog_error() {
153+
let rng = &mut thread_rng();
154+
155+
test::<2>(rng);
156+
test::<4>(rng);
157+
test::<8>(rng);
158+
test::<16>(rng);
159+
160+
#[inline(always)]
161+
fn test<const LANES: usize>(rng: &mut ThreadRng)
162+
where
163+
LaneCount<LANES>: SupportedLaneCount,
164+
{
165+
for _i in 0..ITERS {
166+
let mut vec_uninit: core::mem::MaybeUninit<Simd<u32, LANES>> =
167+
core::mem::MaybeUninit::uninit();
168+
let vec_ptr = vec_uninit.as_mut_ptr();
169+
170+
for i in 0..LANES {
171+
unsafe {
172+
(*vec_ptr)[i] = rng.next_u32();
173+
}
174+
}
175+
176+
let x = unsafe { vec_uninit.assume_init() };
177+
178+
let fast = unsafe { x.ilog_const_base_unchecked::<3>() };
179+
180+
let mut vec_uninit: core::mem::MaybeUninit<Simd<u32, LANES>> =
181+
core::mem::MaybeUninit::uninit();
182+
let vec_ptr = vec_uninit.as_mut_ptr();
183+
184+
for i in 0..LANES {
185+
unsafe {
186+
(*vec_ptr)[i] = x[i].ilog(3);
187+
}
188+
}
189+
190+
let exact = unsafe { vec_uninit.assume_init() };
191+
192+
assert!(
193+
exact.simd_eq(fast).all(),
194+
"Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}",
195+
exact,
196+
fast,
197+
x
198+
);
199+
}
200+
}
201+
}
202+
203+
// /// Options:
204+
// /// --cfg print_values
205+
// /// --cfg print_error
206+
// /// --cfg print_cycles
207+
// #[allow(dead_code)]
208+
// pub fn main() {
209+
// const STEPS: usize = 1000; //1 << 24;
210+
// const WARMUP_ITRS: usize = 1 << 24;
211+
// const START: f32 = 0.0;
212+
// const END: f32 = FRAC_PI_2;
213+
//
214+
// const ITRS: usize = STEPS / LANES;
215+
// const SLICE: f32 = (END - START) / (STEPS as f32);
216+
// const INCR: Simd<f32, LANES> = Simd::from_array([SLICE * LANES as f32; LANES]);
217+
//
218+
// println!("Count: {STEPS}");
219+
//
220+
// #[allow(unused_mut)]
221+
// let mut vec = Simd::<f32, LANES>::splat(SLICE).mul_add(
222+
// Simd::from_slice(&(0..LANES).collect::<Box<[usize]>>()).cast::<f32>(),
223+
// Simd::splat(START),
224+
// );
225+
//
226+
// if cfg!(print_cycles) {
227+
// if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
228+
// for _i in 0..WARMUP_ITRS {
229+
// unsafe {
230+
// black_box(wrap_auto_vectorize!(
231+
// sin_fast_approx::<PRECISION, COS>,
232+
// LANES,
233+
// black_box(vec)
234+
// ));
235+
// }
236+
// }
237+
// } else {
238+
// panic!("CPU cycle timings are not supported on this platform");
239+
// }
240+
// }
241+
//
242+
// #[allow(unused_variables)]
243+
// let mut total_error = 0.0_f64;
244+
// let mut max_error = 0.0_f64;
245+
// #[allow(unused_variables)]
246+
// let mut built_string: String;
247+
// #[cfg(print_values)]
248+
// {
249+
// built_string = String::with_capacity(STEPS * 16);
250+
// }
251+
// #[allow(unused_variables, unused_mut)]
252+
// let mut cycles_1: u64;
253+
// #[cfg(all(print_cycles, any(target_arch = "x86", target_arch = "x86_64")))]
254+
// unsafe {
255+
// let mut _unused = 0_u32;
256+
// cycles_1 = __rdtscp(&mut _unused);
257+
// }
258+
//
259+
// for _i in 0..ITRS {
260+
// let result = unsafe {
261+
// black_box(wrap_auto_vectorize!(
262+
// sin_fast_approx::<PRECISION, COS>,
263+
// LANES,
264+
// black_box(vec)
265+
// ))
266+
// };
267+
//
268+
// if cfg!(print_error) {
269+
// let mut array: [f32; LANES] = [0.0; LANES];
270+
//
271+
// for i in 0..LANES {
272+
// array[i] = if COS { vec[i].cos() } else { vec[i].sin() };
273+
// }
274+
//
275+
// let true_result = Simd::from_array(array);
276+
//
277+
// // the range of sin and cos are between -1 and 1
278+
// let distance = (result.cast::<f64>() - true_result.cast::<f64>()).abs();
279+
// let distance_epsilons = distance / Simd::splat(f32::EPSILON as f64);
280+
// total_error += distance_epsilons.reduce_sum();
281+
// max_error = max_error.max(distance_epsilons.reduce_max());
282+
//
283+
// #[cfg(print_values)]
284+
// {
285+
// for i in 0..LANES {
286+
// built_string.push_str(&format!(
287+
// "{:?} {:?} {:?} {:.3}\n",
288+
// vec[i], result[i], true_result[i], distance_epsilons[i]
289+
// ));
290+
// }
291+
// }
292+
// } else if cfg!(print_values) {
293+
// #[cfg(print_values)]
294+
// {
295+
// for i in 0..LANES {
296+
// built_string.push_str(&format!("{:?} {:?}\n", vec[i], result[i]));
297+
// }
298+
// }
299+
// }
300+
//
301+
// #[cfg(any(print_values, print_error))]
302+
// {
303+
// vec += INCR;
304+
// }
305+
// }
306+
// #[cfg(all(print_cycles, any(target_arch = "x86", target_arch = "x86_64")))]
307+
// unsafe {
308+
// let mut _unused = 0_u32;
309+
// let cycles_2 = __rdtscp(&mut _unused);
310+
//
311+
// let cycles_total = cycles_2 - cycles_1;
312+
// let per_iter_cycles = cycles_total as f64 / (ITRS as f64);
313+
// let per_op_cycles = cycles_total as f64 / (STEPS as f64);
314+
// println!("Avg Cycles Per Iter: {per_iter_cycles}\nAvg Cycles Per Op: {per_op_cycles}");
315+
// }
316+
//
317+
// #[cfg(print_error)]
318+
// {
319+
// let per_op_error = total_error / (STEPS as f64);
320+
// println!("Avg Error Per Op (epsilons): {per_op_error}\nMax Error (epsilons): {max_error}")
321+
// }
322+
//
323+
// #[cfg(print_values)]
324+
// {
325+
// println!("Values:\n{built_string}");
326+
// }
327+
// }

src/comp_test/mod.rs renamed to src/test/compile.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::shared::int::*;
22
use core::simd::*;
33

44
#[inline(never)]
5+
#[allow(dead_code)]
56
pub fn test(x: u32x8) -> u32x8 {
67
unsafe { x.ilog_const_base_unchecked::<2>() }
78
}

0 commit comments

Comments
 (0)