diff --git a/benches/rblas_overhead.rs b/benches/rblas_overhead.rs index 159c547..0dd1298 100644 --- a/benches/rblas_overhead.rs +++ b/benches/rblas_overhead.rs @@ -16,244 +16,63 @@ fn backend() -> Backend { Backend::::default().unwrap() } -#[bench] -fn bench_1000_dot_100_rblas(b: &mut Bencher) { +fn bench_dot_rblas(b: &mut Bencher, n: usize) { let mut rng = thread_rng(); - let slice_a = rng.gen_iter::().take(100).collect::>(); - let slice_b = rng.gen_iter::().take(100).collect::>(); + let slice_a = rng.gen_iter::().take(n).collect::>(); + let slice_b = rng.gen_iter::().take(n).collect::>(); b.iter(|| { - for _ in 0..1000 { - let res = rblas::Dot::dot(&slice_a, &slice_b); - test::black_box(res); - } + let res = rblas::Dot::dot(&slice_a, &slice_b); + test::black_box(res); }); } -#[bench] -fn bench_1000_dot_100_collenchyma(b: &mut Bencher) { +fn bench_dot_collenchyma(b: &mut Bencher, n: usize) { let mut rng = thread_rng(); - let slice_a = rng.gen_iter::().take(100).collect::>(); - let slice_b = rng.gen_iter::().take(100).collect::>(); + let slice_a = rng.gen_iter::().take(n).collect::>(); + let slice_b = rng.gen_iter::().take(n).collect::>(); let backend = backend(); - let shared_a = &mut SharedTensor::::new(backend.device(), &100).unwrap(); - let shared_b = &mut SharedTensor::::new(backend.device(), &100).unwrap(); - let shared_res = &mut SharedTensor::::new(backend.device(), &()).unwrap(); - shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a); - shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b); + let shared_a = &mut SharedTensor::::new(&[n]); + let shared_b = &mut SharedTensor::::new(&[n]); + let shared_res = &mut SharedTensor::::new(&[1]); + shared_a.write_only(backend.device()).unwrap().as_mut_native().unwrap() + .as_mut_slice().clone_from_slice(&slice_a); + shared_b.write_only(backend.device()).unwrap().as_mut_native().unwrap() + .as_mut_slice().clone_from_slice(&slice_b); let _ = backend.dot(shared_a, shared_b, shared_res); - bench_1000_dot_100_collenchyma_profile(b, &backend, shared_a, shared_b, shared_res); -} -#[inline(never)] -fn bench_1000_dot_100_collenchyma_profile( - b: &mut Bencher, - backend: &Backend, - shared_a: &mut SharedTensor, - shared_b: &mut SharedTensor, - shared_res: &mut SharedTensor -) { - b.iter(|| { - for _ in 0..1000 { - let _ = backend.dot(shared_a, shared_b, shared_res); - } - }); + b.iter(|| backend.dot(shared_a, shared_b, shared_res).unwrap()); } -#[bench] -fn bench_1000_dot_100_collenchyma_plain(b: &mut Bencher) { - let mut rng = thread_rng(); - let slice_a = rng.gen_iter::().take(100).collect::>(); - let slice_b = rng.gen_iter::().take(100).collect::>(); - let backend = backend(); - let shared_a = &mut SharedTensor::::new(backend.device(), &100).unwrap(); - let shared_b = &mut SharedTensor::::new(backend.device(), &100).unwrap(); - let shared_res = &mut SharedTensor::::new(backend.device(), &()).unwrap(); - shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a); - shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b); - let _ = backend.dot(shared_a, shared_b, shared_res); - bench_1000_dot_100_collenchyma_plain_profile(b, &backend, shared_a, shared_b, shared_res); -} - -#[inline(never)] -fn bench_1000_dot_100_collenchyma_plain_profile( - b: &mut Bencher, - backend: &Backend, - shared_a: &mut SharedTensor, - shared_b: &mut SharedTensor, - shared_res: &mut SharedTensor -) { - b.iter(|| { - for _ in 0..1000 { - let _ = backend.dot_plain(shared_a, shared_b, shared_res); - } - }); -} #[bench] -fn bench_100_dot_1000_rblas(b: &mut Bencher) { - let mut rng = thread_rng(); - let slice_a = rng.gen_iter::().take(1000).collect::>(); - let slice_b = rng.gen_iter::().take(1000).collect::>(); - - b.iter(|| { - for _ in 0..100 { - let res = rblas::Dot::dot(&slice_a, &slice_b); - test::black_box(res); - } - }); -} +fn bench_dot_100_rblas(b: &mut Bencher) { bench_dot_rblas(b, 100); } #[bench] -fn bench_100_dot_1000_collenchyma(b: &mut Bencher) { - let mut rng = thread_rng(); - let slice_a = rng.gen_iter::().take(1000).collect::>(); - let slice_b = rng.gen_iter::().take(1000).collect::>(); - - let backend = backend(); - let shared_a = &mut SharedTensor::::new(backend.device(), &1000).unwrap(); - let shared_b = &mut SharedTensor::::new(backend.device(), &1000).unwrap(); - let shared_res = &mut SharedTensor::::new(backend.device(), &()).unwrap(); - shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a); - shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b); - let _ = backend.dot(shared_a, shared_b, shared_res); - bench_100_dot_1000_collenchyma_profile(b, &backend, shared_a, shared_b, shared_res); -} - -#[inline(never)] -fn bench_100_dot_1000_collenchyma_profile( - b: &mut Bencher, - backend: &Backend, - shared_a: &mut SharedTensor, - shared_b: &mut SharedTensor, - shared_res: &mut SharedTensor -) { - b.iter(|| { - for _ in 0..100 { - let _ = backend.dot(shared_a, shared_b, shared_res); - } - }); -} +fn bench_dot_100_collenchyma(b: &mut Bencher) { bench_dot_collenchyma(b, 100); } #[bench] -fn bench_50_dot_2000_collenchyma(b: &mut Bencher) { - let mut rng = thread_rng(); - let slice_a = rng.gen_iter::().take(2000).collect::>(); - let slice_b = rng.gen_iter::().take(2000).collect::>(); - - let backend = backend(); - let shared_a = &mut SharedTensor::::new(backend.device(), &2000).unwrap(); - let shared_b = &mut SharedTensor::::new(backend.device(), &2000).unwrap(); - let shared_res = &mut SharedTensor::::new(backend.device(), &()).unwrap(); - shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a); - shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b); - let _ = backend.dot(shared_a, shared_b, shared_res); - bench_50_dot_2000_collenchyma_profile(b, &backend, shared_a, shared_b, shared_res); -} - -#[inline(never)] -fn bench_50_dot_2000_collenchyma_profile( - b: &mut Bencher, - backend: &Backend, - shared_a: &mut SharedTensor, - shared_b: &mut SharedTensor, - shared_res: &mut SharedTensor -) { - b.iter(|| { - for _ in 0..50 { - let _ = backend.dot(shared_a, shared_b, shared_res); - } - }); -} +fn bench_dot_1000_rblas(b: &mut Bencher) { bench_dot_rblas(b, 1000); } #[bench] -fn bench_10_dot_10000_rblas(b: &mut Bencher) { - let mut rng = thread_rng(); - let slice_a = rng.gen_iter::().take(10000).collect::>(); - let slice_b = rng.gen_iter::().take(10000).collect::>(); - - b.iter(|| { - for _ in 0..10 { - let res = rblas::Dot::dot(&slice_a, &slice_b); - test::black_box(res); - } - }); -} +fn bench_dot_1000_collenchyma(b: &mut Bencher) { bench_dot_collenchyma(b, 1000); } #[bench] -fn bench_10_dot_10000_collenchyma(b: &mut Bencher) { - let mut rng = thread_rng(); - let slice_a = rng.gen_iter::().take(10000).collect::>(); - let slice_b = rng.gen_iter::().take(10000).collect::>(); - - let backend = backend(); - let shared_a = &mut SharedTensor::::new(backend.device(), &10000).unwrap(); - let shared_b = &mut SharedTensor::::new(backend.device(), &10000).unwrap(); - let shared_res = &mut SharedTensor::::new(backend.device(), &()).unwrap(); - shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a); - shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b); - let _ = backend.dot(shared_a, shared_b, shared_res); - bench_10_dot_10000_collenchyma_profile(b, &backend, shared_a, shared_b, shared_res); -} - -#[inline(never)] -fn bench_10_dot_10000_collenchyma_profile( - b: &mut Bencher, - backend: &Backend, - shared_a: &mut SharedTensor, - shared_b: &mut SharedTensor, - shared_res: &mut SharedTensor -) { - b.iter(|| { - for _ in 0..10 { - let _ = backend.dot(shared_a, shared_b, shared_res); - } - }); -} +fn bench_dot_2000_rblas(b: &mut Bencher) { bench_dot_rblas(b, 2000); } #[bench] -fn bench_5_dot_20000_rblas(b: &mut Bencher) { - let mut rng = thread_rng(); - let slice_a = rng.gen_iter::().take(20000).collect::>(); - let slice_b = rng.gen_iter::().take(20000).collect::>(); +fn bench_dot_2000_collenchyma(b: &mut Bencher) { bench_dot_collenchyma(b, 2000); } - b.iter(|| { - for _ in 0..5 { - let res = rblas::Dot::dot(&slice_a, &slice_b); - test::black_box(res); - } - }); -} +#[bench] +fn bench_dot_10000_rblas(b: &mut Bencher) { bench_dot_rblas(b, 10000); } #[bench] -fn bench_5_dot_20000_collenchyma(b: &mut Bencher) { - let mut rng = thread_rng(); - let slice_a = rng.gen_iter::().take(20000).collect::>(); - let slice_b = rng.gen_iter::().take(20000).collect::>(); +fn bench_dot_10000_collenchyma(b: &mut Bencher) { bench_dot_collenchyma(b, 10000); } - let backend = backend(); - let shared_a = &mut SharedTensor::::new(backend.device(), &20000).unwrap(); - let shared_b = &mut SharedTensor::::new(backend.device(), &20000).unwrap(); - let shared_res = &mut SharedTensor::::new(backend.device(), &()).unwrap(); - shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a); - shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b); - let _ = backend.dot(shared_a, shared_b, shared_res); - bench_5_dot_20000_collenchyma_profile(b, &backend, shared_a, shared_b, shared_res); -} +#[bench] +fn bench_dot_20000_rblas(b: &mut Bencher) { bench_dot_rblas(b, 20000); } -#[inline(never)] -fn bench_5_dot_20000_collenchyma_profile( - b: &mut Bencher, - backend: &Backend, - shared_a: &mut SharedTensor, - shared_b: &mut SharedTensor, - shared_res: &mut SharedTensor -) { - b.iter(|| { - for _ in 0..5 { - let _ = backend.dot(shared_a, shared_b, shared_res); - } - }); -} +#[bench] +fn bench_dot_20000_collenchyma(b: &mut Bencher) { bench_dot_collenchyma(b, 20000); } diff --git a/src/frameworks/native.rs b/src/frameworks/native.rs index d803205..60994b6 100644 --- a/src/frameworks/native.rs +++ b/src/frameworks/native.rs @@ -254,7 +254,7 @@ mod test { #[test] fn it_converts_correctly_to_and_from_matrix() { let backend = get_native_backend(); - let mut a = SharedTensor::::new(&vec![3, 2]).unwrap(); + let mut a = SharedTensor::::new(&vec![3, 2]); write_to_memory(a.write_only(backend.device()).unwrap(), &[2f32, 5f32, 2f32, 5f32, diff --git a/tests/blas_specs.rs b/tests/blas_specs.rs index 9b12cf4..7ef0dc7 100644 --- a/tests/blas_specs.rs +++ b/tests/blas_specs.rs @@ -1,510 +1,244 @@ +#![cfg(feature = "native")] // required for data i/o + extern crate collenchyma_blas as co_blas; extern crate collenchyma as co; -#[cfg(test)] -mod blas_spec { - use co::backend::Backend; - use co::framework::IFramework; - use co_blas::plugin::*; - use co::memory::MemoryType; - use co::tensor::SharedTensor; - use co::plugin::numeric_helpers::{cast, Float}; - - pub fn write_to_memory(mem: &mut MemoryType, data: &[T]) { - match mem { - &mut MemoryType::Native(ref mut mem) => { - let mut mem_buffer = mem.as_mut_slice::(); - for (index, datum) in data.iter().enumerate() { - mem_buffer[index] = *datum; - } - }, - #[cfg(any(feature = "cuda", feature = "opencl"))] - _ => assert!(false) - } - } - - pub fn get_asum_memory(backend: &Backend) -> (SharedTensor, SharedTensor){ - let mut x = SharedTensor::::new(backend.device(), &vec![3]).unwrap(); - write_to_memory(x.get_mut(backend.device()).unwrap(), &[cast::(1).unwrap(), cast::(-2).unwrap(), cast::(3).unwrap()]); - - let result = SharedTensor::::new(backend.device(), &()).unwrap(); - (x, result) - } - - pub fn get_axpy_memory(backend: &Backend) -> (SharedTensor, SharedTensor, SharedTensor){ - let mut a = SharedTensor::::new(backend.device(), &()).unwrap(); - write_to_memory(a.get_mut(backend.device()).unwrap(), &[cast::(2).unwrap()]); - - let mut x = SharedTensor::::new(backend.device(), &vec![3]).unwrap(); - write_to_memory(x.get_mut(backend.device()).unwrap(), &[cast::(1).unwrap(), cast::(2).unwrap(), cast::(3).unwrap()]); - - let mut y = SharedTensor::::new(backend.device(), &vec![3]).unwrap(); - write_to_memory(y.get_mut(backend.device()).unwrap(), &[cast::(1).unwrap(), cast::(2).unwrap(), cast::(3).unwrap()]); - (a, x, y) - } - - pub fn get_copy_memory(backend: &Backend) -> (SharedTensor, SharedTensor){ - let mut x = SharedTensor::::new(backend.device(), &vec![3]).unwrap(); - write_to_memory(x.get_mut(backend.device()).unwrap(), &[cast::(1).unwrap(), cast::(2).unwrap(), cast::(3).unwrap()]); - - let y = SharedTensor::::new(backend.device(), &vec![3]).unwrap(); - (x, y) - } - - pub fn get_dot_memory(backend: &Backend) -> (SharedTensor, SharedTensor, SharedTensor){ - let mut x = SharedTensor::::new(backend.device(), &vec![3]).unwrap(); - write_to_memory(x.get_mut(backend.device()).unwrap(), &[cast::(1).unwrap(), cast::(2).unwrap(), cast::(3).unwrap()]); - - let mut y = SharedTensor::::new(backend.device(), &vec![3]).unwrap(); - write_to_memory(y.get_mut(backend.device()).unwrap(), &[cast::(1).unwrap(), cast::(2).unwrap(), cast::(3).unwrap()]); - - let result = SharedTensor::::new(backend.device(), &()).unwrap(); - (x, y, result) - } - - pub fn get_nrm2_memory(backend: &Backend) -> (SharedTensor, SharedTensor){ - let mut x = SharedTensor::::new(backend.device(), &vec![3]).unwrap(); - write_to_memory(x.get_mut(backend.device()).unwrap(), &[cast::(1).unwrap(), cast::(2).unwrap(), cast::(2).unwrap()]); - - let result = SharedTensor::::new(backend.device(), &()).unwrap(); - (x, result) - } - - pub fn get_scal_memory(backend: &Backend) -> (SharedTensor, SharedTensor){ - let mut a = SharedTensor::::new(backend.device(), &()).unwrap(); - write_to_memory(a.get_mut(backend.device()).unwrap(), &[cast::(2).unwrap()]); - - let mut y = SharedTensor::::new(backend.device(), &vec![3]).unwrap(); - write_to_memory(y.get_mut(backend.device()).unwrap(), &[cast::(1).unwrap(), cast::(2).unwrap(), cast::(3).unwrap()]); - - (a, y) - } - - pub fn get_swap_memory(backend: &Backend) -> (SharedTensor, SharedTensor){ - let mut x = SharedTensor::::new(backend.device(), &vec![3]).unwrap(); - write_to_memory(x.get_mut(backend.device()).unwrap(), &[cast::(1).unwrap(), cast::(2).unwrap(), cast::(3).unwrap()]); - - let mut y = SharedTensor::::new(backend.device(), &vec![3]).unwrap(); - write_to_memory(y.get_mut(backend.device()).unwrap(), &[cast::(3).unwrap(), cast::(2).unwrap(), cast::(1).unwrap()]); - - (x, y) - } - - pub fn get_gemm_memory(backend: &Backend) -> (SharedTensor, SharedTensor, SharedTensor){ - let mut a = SharedTensor::::new(backend.device(), &vec![3, 2]).unwrap(); - write_to_memory(a.get_mut(backend.device()).unwrap(), - &[cast::(2).unwrap(), cast::(5).unwrap(), - cast::(2).unwrap(), cast::(5).unwrap(), - cast::(2).unwrap(), cast::(5).unwrap()]); - - let mut b = SharedTensor::::new(backend.device(), &vec![2, 3]).unwrap(); - write_to_memory(b.get_mut(backend.device()).unwrap(), - &[cast::(4).unwrap(), cast::(1).unwrap(), cast::(1).unwrap(), - cast::(4).unwrap(), cast::(1).unwrap(), cast::(1).unwrap()]); +use std::fmt; +use co::backend::{Backend, BackendConfig, IBackend}; +use co::framework::{IFramework}; +use co::plugin::numeric_helpers::{cast, Float, NumCast}; +use co::tensor::SharedTensor; +use co_blas::plugin::*; +use co_blas::transpose::Transpose; + +#[cfg(feature = "cuda")] +use co::frameworks::Cuda; + +pub fn get_native_backend() -> Backend<::co::frameworks::Native> { + let framework = ::co::frameworks::Native::new(); + let hardwares = framework.hardwares().to_vec(); + let backend_config = BackendConfig::new(framework, &hardwares); + Backend::new(backend_config).unwrap() +} - let c = SharedTensor::::new(backend.device(), &vec![3, 3]).unwrap(); +#[cfg(feature = "cuda")] +pub fn get_cuda_backend() -> Backend { + let framework = Cuda::new(); + let hardwares = framework.hardwares().to_vec(); + let backend_config = BackendConfig::new(framework, &hardwares); + Backend::new(backend_config).unwrap() +} - (a, b, c) - } - pub fn get_gemm_transpose_memory(backend: &Backend) -> (SharedTensor, SharedTensor, SharedTensor){ - let (a, b, _) = get_gemm_memory(backend); - let c = SharedTensor::::new(backend.device(), &vec![2, 2]).unwrap(); +pub fn write_to_tensor(x: &mut SharedTensor, data: &[S]) + where T: ::std::marker::Copy + NumCast, + S: ::std::marker::Copy + NumCast { - (a, b, c) + let native = get_native_backend(); + let mem = x.write_only(native.device()).unwrap().as_mut_native().unwrap(); + let mut mem_buffer = mem.as_mut_slice::(); + for (i, x) in data.iter().enumerate() { + mem_buffer[i] = cast::(*x).unwrap(); } +} - pub fn get_scale_one_zero_memory(backend: &Backend) -> (SharedTensor, SharedTensor){ - let mut alpha = SharedTensor::::new(backend.device(), &vec![1]).unwrap(); - write_to_memory(alpha.get_mut(backend.device()).unwrap(), &[cast::(1).unwrap()]); - - let mut beta = SharedTensor::::new(backend.device(), &vec![1]).unwrap(); - write_to_memory(beta.get_mut(backend.device()).unwrap(), &[cast::(0).unwrap()]); +pub fn tensor_assert_eq(x: &SharedTensor, data: &[f64]) + where T: fmt::Debug + PartialEq + NumCast { + let native = get_native_backend(); + let mem = x.read(native.device()).unwrap().as_native().unwrap(); + let mem_slice = mem.as_slice::(); - (alpha, beta) + assert_eq!(mem_slice.len(), data.len()); + for (x1, x2) in mem_slice.iter().zip(data.iter()) { + let x2_t = cast::(*x2).unwrap(); + assert_eq!(*x1, x2_t); // TODO: compare approximately } +} - #[cfg(feature = "native")] - mod native { - use co::backend::{Backend, BackendConfig}; - use co::framework::IFramework; - use co::frameworks::Native; - use co_blas::plugin::*; - use co_blas::transpose::Transpose; - use super::*; - - fn get_native_backend() -> Backend { - let framework = Native::new(); - let hardwares = framework.hardwares().to_vec(); - let backend_config = BackendConfig::new(framework, &hardwares); - Backend::new(backend_config).unwrap() - } - - #[test] - fn it_computes_correct_asum_on_native_for_f32() { - let backend = get_native_backend(); - let (mut x, mut result) = get_asum_memory::(&backend); - - if let Ok(_) = backend.asum(&mut x, &mut result) { - if let Some(mem) = result.get(backend.device()).unwrap().as_native() { assert_eq!(&[6f32], mem.as_slice::()) } - } - backend.asum(&mut x, &mut result).unwrap(); - } - - #[test] - fn it_computes_correct_asum_on_native_for_f64() { - let backend = get_native_backend(); - let (mut x, mut result) = get_asum_memory::(&backend); - - if let Ok(_) = backend.asum(&mut x, &mut result) { - if let Some(mem) = result.get(backend.device()).unwrap().as_native() { assert_eq!(&[6f64], mem.as_slice::()) } - } - backend.asum(&mut x, &mut result).unwrap(); - } - - #[test] - fn it_computes_correct_axpy_on_native_for_f32() { - let backend = get_native_backend(); - let (mut a, mut x, mut y) = get_axpy_memory::(&backend); - - if let Ok(_) = backend.axpy(&mut a, &mut x, &mut y) { - if let Some(mem) = y.get(backend.device()).unwrap().as_native() { assert_eq!(&[3f32, 6f32, 9f32], mem.as_slice::()) } - } - backend.axpy(&mut a, &mut x, &mut y).unwrap(); - } - - #[test] - fn it_computes_correct_axpy_on_native_for_f64() { - let backend = get_native_backend(); - let (mut a, mut x, mut y) = get_axpy_memory::(&backend); - - if let Ok(_) = backend.axpy(&mut a, &mut x, &mut y) { - if let Some(mem) = y.get(backend.device()).unwrap().as_native() { assert_eq!(&[3f64, 6f64, 9f64], mem.as_slice::()) } - } - backend.axpy(&mut a, &mut x, &mut y).unwrap(); - } - - #[test] - fn it_computes_correct_copy_on_native_for_f32() { - let backend = get_native_backend(); - let (mut x, mut y) = get_copy_memory::(&backend); - - if let Ok(_) = backend.copy(&mut x, &mut y) { - if let Some(mem) = y.get(backend.device()).unwrap().as_native() { assert_eq!(&[1f32, 2f32, 3f32], mem.as_slice::()) } - } - backend.copy(&mut x, &mut y).unwrap(); - } - - #[test] - fn it_computes_correct_copy_on_native_for_f64() { - let backend = get_native_backend(); - let (mut x, mut y) = get_copy_memory::(&backend); - - if let Ok(_) = backend.copy(&mut x, &mut y) { - if let Some(mem) = y.get(backend.device()).unwrap().as_native() { assert_eq!(&[1f64, 2f64, 3f64], mem.as_slice::()) } - } - backend.copy(&mut x, &mut y).unwrap(); - } - - #[test] - fn it_computes_correct_dot_on_native_for_f32() { - let backend = get_native_backend(); - let (mut x, mut y, mut result) = get_dot_memory::(&backend); - - if let Ok(_) = backend.dot(&mut x, &mut y, &mut result) { - if let Some(mem) = result.get(backend.device()).unwrap().as_native() { assert_eq!(14f32, mem.as_slice::()[0]) } - } - backend.dot(&mut x, &mut y, &mut result).unwrap(); - } - - #[test] - fn it_computes_correct_dot_on_native_for_f64() { - let backend = get_native_backend(); - let (mut x, mut y, mut result) = get_dot_memory::(&backend); - - if let Ok(_) = backend.dot(&mut x, &mut y, &mut result) { - if let Some(mem) = result.get(backend.device()).unwrap().as_native() { assert_eq!(14f64, mem.as_slice::()[0]) } - } - backend.dot(&mut x, &mut y, &mut result).unwrap(); - } - - // NRM2 - - #[test] - fn it_computes_correct_nrm2_on_native_for_f32() { - let backend = get_native_backend(); - let (mut x, mut result) = get_nrm2_memory::(&backend); - - if let Ok(_) = backend.nrm2(&mut x, &mut result) { - if let Some(mem) = result.get(backend.device()).unwrap().as_native() { assert_eq!(3f32, mem.as_slice::()[0]) } - } - backend.nrm2(&mut x, &mut result).unwrap(); - } - - #[test] - fn it_computes_correct_nrm2_on_native_for_f64() { - let backend = get_native_backend(); - let (mut x, mut result) = get_nrm2_memory::(&backend); - - if let Ok(_) = backend.nrm2(&mut x, &mut result) { - if let Some(mem) = result.get(backend.device()).unwrap().as_native() { assert_eq!(3f64, mem.as_slice::()[0]) } - } - backend.nrm2(&mut x, &mut result).unwrap(); - } - - /// SCAL - - #[test] - fn it_computes_correct_scal_on_native_for_f32() { - let backend = get_native_backend(); - let (mut x, mut y) = get_scal_memory::(&backend); - - if let Ok(_) = backend.scal(&mut x, &mut y) { - if let Some(mem) = y.get(backend.device()).unwrap().as_native() { assert_eq!(&[2f32, 4f32, 6f32], mem.as_slice::()) } - } - backend.scal(&mut x, &mut y).unwrap(); - } - - #[test] - fn it_computes_correct_scal_on_native_for_f64() { - let backend = get_native_backend(); - let (mut x, mut y) = get_scal_memory::(&backend); - - if let Ok(_) = backend.scal(&mut x, &mut y) { - if let Some(mem) = y.get(backend.device()).unwrap().as_native() { assert_eq!(&[2f64, 4f64, 6f64], mem.as_slice::()) } - } - backend.scal(&mut x, &mut y).unwrap(); - } - - /// SWAP - - #[test] - fn it_computes_correct_swap_on_native_for_f32() { - let backend = get_native_backend(); - let (mut x, mut y) = get_swap_memory::(&backend); - - if let Ok(_) = backend.swap(&mut x, &mut y) { - if let Some(mem) = x.get(backend.device()).unwrap().as_native() { assert_eq!(&[3f32, 2f32, 1f32], mem.as_slice::()) } - if let Some(mem) = y.get(backend.device()).unwrap().as_native() { assert_eq!(&[1f32, 2f32, 3f32], mem.as_slice::()) } - } - backend.swap(&mut x, &mut y).unwrap(); - } +pub fn test_asum(backend: Backend) + where T: Float + fmt::Debug, + F: IFramework, + Backend: Asum + IBackend { + let mut x = SharedTensor::::new(&[3]); + let mut result = SharedTensor::::new(&[1]); + + write_to_tensor(&mut x, &[1, -2, 3]); + backend.asum(&x, &mut result).unwrap(); + backend.synchronize().unwrap(); + tensor_assert_eq(&result, &[6.0]); +} - #[test] - fn it_computes_correct_swap_on_native_for_f64() { - let backend = get_native_backend(); - let (mut x, mut y) = get_swap_memory::(&backend); +pub fn test_axpy(backend: Backend) + where T: Float + fmt::Debug, + F: IFramework, + Backend: Axpy + IBackend { + let mut a = SharedTensor::::new(&[1]); + let mut x = SharedTensor::::new(&[3]); + let mut y = SharedTensor::::new(&[3]); + write_to_tensor(&mut a, &[2]); + write_to_tensor(&mut x, &[1, 2, 3]); + write_to_tensor(&mut y, &[1, 2, 3]); + + backend.axpy(&a, &x, &mut y).unwrap(); + backend.synchronize().unwrap(); + tensor_assert_eq(&y, &[3.0, 6.0, 9.0]); +} - if let Ok(_) = backend.swap(&mut x, &mut y) { - if let Some(mem) = x.get(backend.device()).unwrap().as_native() { assert_eq!(&[3f64, 2f64, 1f64], mem.as_slice::()) } - if let Some(mem) = y.get(backend.device()).unwrap().as_native() { assert_eq!(&[1f64, 2f64, 3f64], mem.as_slice::()) } - } - backend.swap(&mut x, &mut y).unwrap(); - } +pub fn test_copy(backend: Backend) + where T: Float + fmt::Debug, + F: IFramework, + Backend: Copy + IBackend { + let mut x = SharedTensor::::new(&[3]); + let mut y = SharedTensor::::new(&[3]); + write_to_tensor(&mut x, &[1, 2, 3]); + + backend.copy(&x, &mut y).unwrap(); + backend.synchronize().unwrap(); + tensor_assert_eq(&y, &[1.0, 2.0, 3.0]); +} - /// GEMM +pub fn test_dot(backend: Backend) + where T: Float + fmt::Debug, + F: IFramework, + Backend: Dot + IBackend { + let mut x = SharedTensor::::new(&[3]); + let mut y = SharedTensor::::new(&[3]); + let mut result = SharedTensor::::new(&[1]); + write_to_tensor(&mut x, &[1, 2, 3]); + write_to_tensor(&mut y, &[1, 2, 3]); + + backend.dot(&x, &y, &mut result).unwrap(); + backend.synchronize().unwrap(); + tensor_assert_eq(&result, &[14.0]); +} - #[test] - fn it_computes_correct_gemm_on_native_for_f32() { - let backend = get_native_backend(); - let (mut a, mut b, mut c) = get_gemm_memory::(&backend); - let (mut alpha, mut beta) = get_scale_one_zero_memory::(&backend); +pub fn test_nrm2(backend: Backend) + where T: Float + fmt::Debug, + F: IFramework, + Backend: Nrm2 + IBackend { + let mut x = SharedTensor::::new(&[3]); + let mut result = SharedTensor::::new(&[1]); + write_to_tensor(&mut x, &[1, 2, 2]); + + backend.nrm2(&x, &mut result).unwrap(); + backend.synchronize().unwrap(); + tensor_assert_eq(&result, &[3.0]); +} - if let Some(mem) = a.get(backend.device()).unwrap().as_native() { assert_eq!(&[2f32, 5f32, 2f32, 5f32, 2f32, 5f32], mem.as_slice::()) } - if let Some(mem) = b.get(backend.device()).unwrap().as_native() { assert_eq!(&[4f32, 1f32, 1f32, 4f32, 1f32, 1f32], mem.as_slice::()) } - if let Ok(_) = backend.gemm(&mut alpha, Transpose::NoTrans, &mut a, Transpose::NoTrans, &mut b, &mut beta, &mut c) { - if let Some(mem) = c.get(backend.device()).unwrap().as_native() { assert_eq!(&[28f32, 7f32, 7f32, 28f32, 7f32, 7f32, 28f32, 7f32, 7f32], mem.as_slice::()) } - } - backend.gemm(&mut alpha, Transpose::NoTrans, &mut a, Transpose::NoTrans, &mut b, &mut beta, &mut c).unwrap(); - } +pub fn test_scal(backend: Backend) + where T: Float + fmt::Debug, + F: IFramework, + Backend: Scal + IBackend { + let mut a = SharedTensor::::new(&[1]); + let mut y = SharedTensor::::new(&[3]); + write_to_tensor(&mut a, &[2]); + write_to_tensor(&mut y, &[1, 2, 3]); + + backend.scal(&a, &mut y).unwrap(); + backend.synchronize().unwrap(); + tensor_assert_eq(&y, &[2.0, 4.0, 6.0]); +} - #[test] - fn it_computes_correct_gemm_on_native_for_f64() { - let backend = get_native_backend(); - let (mut a, mut b, mut c) = get_gemm_memory::(&backend); - let (mut alpha, mut beta) = get_scale_one_zero_memory::(&backend); +pub fn test_swap(backend: Backend) + where T: Float + fmt::Debug, + F: IFramework, + Backend: Swap + IBackend { + let mut x = SharedTensor::::new(&[3]); + let mut y = SharedTensor::::new(&[3]); + write_to_tensor(&mut x, &[1, 2, 3]); + write_to_tensor(&mut y, &[3, 2, 1]); + + backend.swap(&mut x, &mut y).unwrap(); + backend.synchronize().unwrap(); + tensor_assert_eq(&x, &[3.0, 2.0, 1.0]); + tensor_assert_eq(&y, &[1.0, 2.0, 3.0]); +} - if let Ok(_) = backend.gemm(&mut alpha, Transpose::NoTrans, &mut a, Transpose::NoTrans, &mut b, &mut beta, &mut c) { - if let Some(mem) = c.get(backend.device()).unwrap().as_native() { assert_eq!(&[28f64, 7f64, 7f64, 28f64, 7f64, 7f64, 28f64, 7f64, 7f64], mem.as_slice::()) } - } - backend.gemm(&mut alpha, Transpose::NoTrans, &mut a, Transpose::NoTrans, &mut b, &mut beta, &mut c).unwrap(); - } +pub fn test_gemm(backend: Backend) + where T: Float + fmt::Debug, + F: IFramework, + Backend: Gemm + IBackend { + let mut alpha = SharedTensor::::new(&[1]); + let mut beta = SharedTensor::::new(&[1]); + let mut a = SharedTensor::::new(&[3, 2]); + let mut b = SharedTensor::::new(&[2, 3]); + write_to_tensor(&mut alpha, &[1]); + write_to_tensor(&mut beta, &[0]); + write_to_tensor(&mut a, &[2, 5, 2, 5, 2, 5]); + write_to_tensor(&mut b, &[4, 1, 1, 4, 1, 1]); + + let mut c = SharedTensor::::new(&[3, 3]); + backend.gemm(&alpha, + Transpose::NoTrans, &a, + Transpose::NoTrans, &b, + &beta, + &mut c).unwrap(); + backend.synchronize().unwrap(); + tensor_assert_eq(&c, &[ + 28.0, 7.0, 7.0, + 28.0, 7.0, 7.0, + 28.0, 7.0, 7.0]); + + let mut d = SharedTensor::::new(&[2, 2]); + backend.gemm(&alpha, + Transpose::Trans, &a, + Transpose::Trans, &b, + &beta, + &mut d).unwrap(); + backend.synchronize().unwrap(); + tensor_assert_eq(&d, &[12.0, 12.0, 30.0, 30.0]); +} - #[test] - fn it_computes_correct_gemm_transpose_on_native_for_f32() { - let backend = get_native_backend(); - let (mut a, mut b, mut c) = get_gemm_transpose_memory::(&backend); - let (mut alpha, mut beta) = get_scale_one_zero_memory::(&backend); +macro_rules! test_blas { + ($mod_name:ident, $backend_getter:ident, $t:ident) => { + mod $mod_name { + use super::*; - if let Some(mem) = a.get(backend.device()).unwrap().as_native() { assert_eq!(&[2f32, 5f32, 2f32, 5f32, 2f32, 5f32], mem.as_slice::()) } - if let Some(mem) = b.get(backend.device()).unwrap().as_native() { assert_eq!(&[4f32, 1f32, 1f32, 4f32, 1f32, 1f32], mem.as_slice::()) } - if let Ok(_) = backend.gemm(&mut alpha, Transpose::Trans, &mut a, Transpose::Trans, &mut b, &mut beta, &mut c) { - if let Some(mem) = c.get(backend.device()).unwrap().as_native() { assert_eq!(&[12f32, 12f32, 30f32, 30f32], mem.as_slice::()) } + #[test] + fn it_computes_correct_asum() { + test_asum::<$t, _>($backend_getter()); } - backend.gemm(&mut alpha, Transpose::Trans, &mut a, Transpose::Trans, &mut b, &mut beta, &mut c).unwrap(); - } - } - - #[cfg(feature = "cuda")] - mod cuda { - use co::backend::{IBackend, Backend, BackendConfig}; - use co::framework::IFramework; - use co::frameworks::Native; - use co::frameworks::Cuda; - use co_blas::plugin::*; - use co_blas::transpose::Transpose; - use super::*; - - fn get_native_backend() -> Backend { - let framework = Native::new(); - let hardwares = framework.hardwares().to_vec(); - let backend_config = BackendConfig::new(framework, &hardwares); - Backend::new(backend_config).unwrap() - } - - fn get_cuda_backend() -> Backend { - let framework = Cuda::new(); - let hardwares = framework.hardwares().to_vec(); - let backend_config = BackendConfig::new(framework, &hardwares); - Backend::new(backend_config).unwrap() - } - - #[test] - fn it_computes_correct_asum_on_cuda_for_f32() { - let native = get_native_backend(); - let backend = get_cuda_backend(); - let (mut x, mut result) = get_asum_memory::(&native); - if let Ok(_) = backend.asum(&mut x, &mut result) { - backend.synchronize().unwrap(); - result.sync(native.device()).unwrap(); - if let Some(mem) = result.get(native.device()).unwrap().as_native() { assert_eq!(&[6f32], mem.as_slice::()) } + #[test] + fn it_computes_correct_axpy() { + test_axpy::<$t, _>($backend_getter()); } - backend.asum(&mut x, &mut result).unwrap(); - } - - #[test] - fn it_computes_correct_axpy_on_cuda_for_f32() { - let native = get_native_backend(); - let backend = get_cuda_backend(); - let (mut a, mut x, mut y) = get_axpy_memory::(&native); - if let Ok(_) = backend.axpy(&mut a, &mut x, &mut y) { - backend.synchronize().unwrap(); - y.sync(native.device()).unwrap(); - if let Some(mem) = y.get(native.device()).unwrap().as_native() { assert_eq!(&[3f32, 6f32, 9f32], mem.as_slice::()) } + #[test] + fn it_computes_correct_copy() { + test_copy::<$t, _>($backend_getter()); } - backend.axpy(&mut a, &mut x, &mut y).unwrap(); - } - - #[test] - fn it_computes_correct_copy_on_cuda_for_f32() { - let native = get_native_backend(); - let backend = get_cuda_backend(); - let (mut x, mut y) = get_copy_memory::(&native); - if let Ok(_) = backend.copy(&mut x, &mut y) { - backend.synchronize().unwrap(); - y.sync(native.device()).unwrap(); - if let Some(mem) = y.get(native.device()).unwrap().as_native() { assert_eq!(&[1f32, 2f32, 3f32], mem.as_slice::()) } + #[test] + fn it_computes_correct_dot() { + test_dot::<$t, _>($backend_getter()); } - backend.copy(&mut x, &mut y).unwrap(); - } - #[test] - // #[ignore] - fn it_computes_correct_dot_on_cuda_for_f32() { - let native = get_native_backend(); - let backend = get_cuda_backend(); - let (mut x, mut y, mut result) = get_dot_memory::(&native); - backend.synchronize().unwrap(); - - if let Ok(_) = backend.dot(&mut x, &mut y, &mut result) { - println!("DOT"); - backend.synchronize().unwrap(); - result.sync(native.device()).unwrap(); - if let Some(mem) = result.get(native.device()).unwrap().as_native() { println!("{:?}", mem.as_slice::()[0]); assert_eq!(14f32, mem.as_slice::()[0]) } + #[test] + fn it_computes_correct_nrm2() { + test_nrm2::<$t, _>($backend_getter()); } - backend.dot(&mut x, &mut y, &mut result).unwrap(); - } - #[test] - // #[ignore] - fn it_computes_correct_nrm2_on_cuda_for_f32() { - let native = get_native_backend(); - let backend = get_cuda_backend(); - let (mut x, mut result) = get_nrm2_memory::(&native); - - if let Ok(_) = backend.nrm2(&mut x, &mut result) { - backend.synchronize().unwrap(); - result.sync(native.device()).unwrap(); - if let Some(mem) = result.get(native.device()).unwrap().as_native() { assert_eq!(3f32, mem.as_slice::()[0]) } + #[test] + fn it_computes_correct_scal() { + test_scal::<$t, _>($backend_getter()); } - backend.nrm2(&mut x, &mut result).unwrap(); - } - - #[test] - fn it_computes_correct_scal_on_cuda_for_f32() { - let native = get_native_backend(); - let backend = get_cuda_backend(); - let (mut a, mut x) = get_scal_memory::(&native); - if let Ok(_) = backend.scal(&mut a, &mut x) { - backend.synchronize().unwrap(); - x.sync(native.device()).unwrap(); - if let Some(mem) = x.get(native.device()).unwrap().as_native() { assert_eq!(&[2f32, 4f32, 6f32], mem.as_slice::()) } + #[test] + fn it_computes_correct_swap() { + test_swap::<$t, _>($backend_getter()); } - backend.scal(&mut a, &mut x).unwrap(); - } - #[test] - fn it_computes_correct_swap_on_cuda_for_f32() { - let native = get_native_backend(); - let backend = get_cuda_backend(); - let (mut x, mut y) = get_swap_memory::(&native); - - if let Ok(_) = backend.swap(&mut x, &mut y) { - backend.synchronize().unwrap(); - x.sync(native.device()).unwrap(); - y.sync(native.device()).unwrap(); - if let Some(mem) = x.get(native.device()).unwrap().as_native() { assert_eq!(&[3f32, 2f32, 1f32], mem.as_slice::()) } - if let Some(mem) = y.get(native.device()).unwrap().as_native() { assert_eq!(&[1f32, 2f32, 3f32], mem.as_slice::()) } + #[test] + fn it_computes_correct_gemm() { + test_gemm::<$t, _>($backend_getter()); } - backend.swap(&mut x, &mut y).unwrap(); } + }; +} - #[test] - fn it_computes_correct_gemm_on_cuda_for_f32() { - let native = get_native_backend(); - let backend = get_cuda_backend(); - let (mut a, mut b, mut c) = get_gemm_memory::(&native); - let (mut alpha, mut beta) = get_scale_one_zero_memory::(&native); - - if let Some(mem) = a.get(native.device()).unwrap().as_native() { assert_eq!(&[2f32, 5f32, 2f32, 5f32, 2f32, 5f32], mem.as_slice::()) } - if let Some(mem) = b.get(native.device()).unwrap().as_native() { assert_eq!(&[4f32, 1f32, 1f32, 4f32, 1f32, 1f32], mem.as_slice::()) } - if let Ok(_) = backend.gemm(&mut alpha, Transpose::NoTrans, &mut a, Transpose::NoTrans, &mut b, &mut beta, &mut c) { - backend.synchronize().unwrap(); - c.sync(native.device()).unwrap(); - if let Some(mem) = c.get(native.device()).unwrap().as_native() { assert_eq!(&[28f32, 7f32, 7f32, 28f32, 7f32, 7f32, 28f32, 7f32, 7f32], mem.as_slice::()) } - } - backend.gemm(&mut alpha, Transpose::NoTrans, &mut a, Transpose::NoTrans, &mut b, &mut beta, &mut c).unwrap(); - } +test_blas!(native_f32, get_native_backend, f32); +test_blas!(native_f64, get_native_backend, f64); - #[test] - fn it_computes_correct_transpose_gemm_on_cuda_for_f32() { - let native = get_native_backend(); - let backend = get_cuda_backend(); - let (mut a, mut b, mut c) = get_gemm_transpose_memory::(&native); - let (mut alpha, mut beta) = get_scale_one_zero_memory::(&native); - - if let Some(mem) = a.get(native.device()).unwrap().as_native() { assert_eq!(&[2f32, 5f32, 2f32, 5f32, 2f32, 5f32], mem.as_slice::()) } - if let Some(mem) = b.get(native.device()).unwrap().as_native() { assert_eq!(&[4f32, 1f32, 1f32, 4f32, 1f32, 1f32], mem.as_slice::()) } - if let Ok(_) = backend.gemm(&mut alpha, Transpose::Trans, &mut a, Transpose::Trans, &mut b, &mut beta, &mut c) { - backend.synchronize().unwrap(); - c.sync(native.device()).unwrap(); - if let Some(mem) = c.get(native.device()).unwrap().as_native() { assert_eq!(&[12f32, 12f32, 30f32, 30f32], &mem.as_slice::()) } - } - backend.gemm(&mut alpha, Transpose::Trans, &mut a, Transpose::Trans, &mut b, &mut beta, &mut c).unwrap(); - } - } -} +#[cfg(feature = "cuda")] +test_blas!(cuda_f32, get_cuda_backend, f32);