From 4162baba322e936d3f8828470cb0c97129af7ada Mon Sep 17 00:00:00 2001 From: bluss Date: Wed, 16 Mar 2016 20:48:14 +0100 Subject: [PATCH 1/7] Rename some benchmarks, name the iterator-based ones iter_* --- benches/bench1.rs | 64 +++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/benches/bench1.rs b/benches/bench1.rs index 6c6423d90..2ee5173a4 100644 --- a/benches/bench1.rs +++ b/benches/bench1.rs @@ -27,7 +27,7 @@ fn map(bench: &mut test::Bencher) } #[bench] -fn sum_1d_regular(bench: &mut test::Bencher) +fn iter_sum_1d_regular(bench: &mut test::Bencher) { let a = OwnedArray::::zeros(64 * 64); let a = black_box(a); @@ -41,7 +41,7 @@ fn sum_1d_regular(bench: &mut test::Bencher) } #[bench] -fn sum_1d_raw(bench: &mut test::Bencher) +fn iter_sum_1d_raw(bench: &mut test::Bencher) { // this is autovectorized to death (= great performance) let a = OwnedArray::::zeros(64 * 64); @@ -56,7 +56,7 @@ fn sum_1d_raw(bench: &mut test::Bencher) } #[bench] -fn sum_2d_regular(bench: &mut test::Bencher) +fn iter_sum_2d_regular(bench: &mut test::Bencher) { let a = OwnedArray::::zeros((64, 64)); let a = black_box(a); @@ -70,7 +70,7 @@ fn sum_2d_regular(bench: &mut test::Bencher) } #[bench] -fn sum_2d_by_row(bench: &mut test::Bencher) +fn iter_sum_2d_by_row(bench: &mut test::Bencher) { let a = OwnedArray::::zeros((64, 64)); let a = black_box(a); @@ -86,7 +86,7 @@ fn sum_2d_by_row(bench: &mut test::Bencher) } #[bench] -fn sum_2d_raw(bench: &mut test::Bencher) +fn iter_sum_2d_raw(bench: &mut test::Bencher) { // this is autovectorized to death (= great performance) let a = OwnedArray::::zeros((64, 64)); @@ -101,7 +101,7 @@ fn sum_2d_raw(bench: &mut test::Bencher) } #[bench] -fn sum_2d_cutout(bench: &mut test::Bencher) +fn iter_sum_2d_cutout(bench: &mut test::Bencher) { let a = OwnedArray::::zeros((66, 66)); let av = a.slice(s![1..-1, 1..-1]); @@ -116,7 +116,7 @@ fn sum_2d_cutout(bench: &mut test::Bencher) } #[bench] -fn sum_2d_cutout_fold(bench: &mut test::Bencher) +fn iter_sum_2d_cutout_fold(bench: &mut test::Bencher) { let a = OwnedArray::::zeros((66, 66)); let av = a.slice(s![1..-1, 1..-1]); @@ -127,28 +127,7 @@ fn sum_2d_cutout_fold(bench: &mut test::Bencher) } #[bench] -fn scalar_sum_2d_regular(bench: &mut test::Bencher) -{ - let a = OwnedArray::::zeros((64, 64)); - let a = black_box(a); - bench.iter(|| { - a.scalar_sum() - }); -} - -#[bench] -fn scalar_sum_2d_cutout(bench: &mut test::Bencher) -{ - let a = OwnedArray::::zeros((66, 66)); - let av = a.slice(s![1..-1, 1..-1]); - let a = black_box(av); - bench.iter(|| { - a.scalar_sum() - }); -} - -#[bench] -fn sum_2d_cutout_by_row(bench: &mut test::Bencher) +fn iter_sum_2d_cutout_by_row(bench: &mut test::Bencher) { let a = OwnedArray::::zeros((66, 66)); let av = a.slice(s![1..-1, 1..-1]); @@ -165,7 +144,7 @@ fn sum_2d_cutout_by_row(bench: &mut test::Bencher) } #[bench] -fn sum_2d_cutout_outer_iter(bench: &mut test::Bencher) +fn iter_sum_2d_cutout_outer_iter(bench: &mut test::Bencher) { let a = OwnedArray::::zeros((66, 66)); let av = a.slice(s![1..-1, 1..-1]); @@ -182,7 +161,7 @@ fn sum_2d_cutout_outer_iter(bench: &mut test::Bencher) } #[bench] -fn sum_2d_transpose_regular(bench: &mut test::Bencher) +fn iter_sum_2d_transpose_regular(bench: &mut test::Bencher) { let mut a = OwnedArray::::zeros((64, 64)); a.swap_axes(0, 1); @@ -197,7 +176,7 @@ fn sum_2d_transpose_regular(bench: &mut test::Bencher) } #[bench] -fn sum_2d_transpose_by_row(bench: &mut test::Bencher) +fn iter_sum_2d_transpose_by_row(bench: &mut test::Bencher) { let mut a = OwnedArray::::zeros((64, 64)); a.swap_axes(0, 1); @@ -213,6 +192,27 @@ fn sum_2d_transpose_by_row(bench: &mut test::Bencher) }); } +#[bench] +fn scalar_sum_2d_regular(bench: &mut test::Bencher) +{ + let a = OwnedArray::::zeros((64, 64)); + let a = black_box(a); + bench.iter(|| { + a.scalar_sum() + }); +} + +#[bench] +fn scalar_sum_2d_cutout(bench: &mut test::Bencher) +{ + let a = OwnedArray::::zeros((66, 66)); + let av = a.slice(s![1..-1, 1..-1]); + let a = black_box(av); + bench.iter(|| { + a.scalar_sum() + }); +} + #[bench] fn scalar_sum_2d_float(bench: &mut test::Bencher) { From 2d9b15b4c7ec3a822b2a1f3e4641e6f52a152219 Mon Sep 17 00:00:00 2001 From: bluss Date: Wed, 16 Mar 2016 20:48:14 +0100 Subject: [PATCH 2/7] Add .sum(Axis) benchmarks --- benches/bench1.rs | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/benches/bench1.rs b/benches/bench1.rs index 2ee5173a4..1a8536726 100644 --- a/benches/bench1.rs +++ b/benches/bench1.rs @@ -12,6 +12,7 @@ use rblas::matrix::Matrix; use ndarray::{ OwnedArray, Axis, + Ix, }; use ndarray::{arr0, arr1, arr2}; @@ -642,9 +643,33 @@ fn dot_extended(bench: &mut test::Bencher) { }) } +const MEAN_SUM_N: usize = 127; + +fn range_mat(m: Ix, n: Ix) -> OwnedArray { + assert!(m * n != 0); + OwnedArray::linspace(0., (m * n - 1) as f32, m * n).into_shape((m, n)).unwrap() +} + #[bench] -fn means(bench: &mut test::Bencher) { - let a = OwnedArray::from_iter(0..100_000i64); - let a = a.into_shape((100, 1000)).unwrap(); +fn mean_axis0(bench: &mut test::Bencher) { + let a = range_mat(MEAN_SUM_N, MEAN_SUM_N); bench.iter(|| a.mean(Axis(0))); } + +#[bench] +fn mean_axis1(bench: &mut test::Bencher) { + let a = range_mat(MEAN_SUM_N, MEAN_SUM_N); + bench.iter(|| a.mean(Axis(1))); +} + +#[bench] +fn sum_axis0(bench: &mut test::Bencher) { + let a = range_mat(MEAN_SUM_N, MEAN_SUM_N); + bench.iter(|| a.sum(Axis(0))); +} + +#[bench] +fn sum_axis1(bench: &mut test::Bencher) { + let a = range_mat(MEAN_SUM_N, MEAN_SUM_N); + bench.iter(|| a.sum(Axis(1))); +} From fccd6f5277c4be33dfd97b05ae43b35bba3f10fa Mon Sep 17 00:00:00 2001 From: bluss Date: Wed, 16 Mar 2016 20:48:14 +0100 Subject: [PATCH 3/7] Add mapv, mapv_into, apply, applyv, visit --- src/impl_methods.rs | 107 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 104 insertions(+), 3 deletions(-) diff --git a/src/impl_methods.rs b/src/impl_methods.rs index f7d981861..0a801b9d0 100644 --- a/src/impl_methods.rs +++ b/src/impl_methods.rs @@ -1095,10 +1095,12 @@ impl ArrayBase where S: Data, D: Dimension init } - /// Apply `f` elementwise and return a new array with - /// the results. + /// Call `f` by reference on each element and create a new array + /// with the new values. /// - /// Return an array with the same shape as *self*. + /// Elements are visited in arbitrary order. + /// + /// Return an array with the same shape as `self`. /// /// ``` /// use ndarray::arr2; @@ -1128,4 +1130,103 @@ impl ArrayBase where S: Data, D: Dimension } } } + + /// Call `f` by **v**alue on each element and create a new array + /// with the new values. + /// + /// Elements are visited in arbitrary order. + /// + /// Return an array with the same shape as `self`. + /// + /// ``` + /// use ndarray::arr2; + /// + /// let a = arr2(&[[ 0., 1.], + /// [-1., 2.]]); + /// assert!( + /// a.mapv(f32::abs) == arr2(&[[0., 1.], + /// [1., 2.]]) + /// ); + /// ``` + pub fn mapv(&self, mut f: F) -> OwnedArray + where F: FnMut(A) -> B, + A: Clone, + { + self.map(move |x| f(x.clone())) + } + + /// Call `f` by **v**alue on each element, update the array with the new values + /// and return it. + /// + /// Elements are visited in arbitrary order. + pub fn mapv_into(mut self, f: F) -> Self + where S: DataMut, + F: FnMut(A) -> A, + A: Clone, + { + self.applyv(f); + self + } + + /// Modify the array in place by calling `f` by mutable reference on each element. + /// + /// Elements are visited in arbitrary order. + pub fn apply(&mut self, f: F) + where S: DataMut, + F: FnMut(&mut A), + { + self.unordered_foreach_mut(f); + } + + /// Modify the array in place by calling `f` by **v**alue on each element. + /// The array is updated with the new values. + /// + /// Elements are visited in arbitrary order. + /// + /// ``` + /// use ndarray::arr2; + /// + /// let mut a = arr2(&[[ 0., 1.], + /// [-1., 2.]]); + /// a.applyv(f32::exp); + /// assert!( + /// a.allclose(&arr2(&[[1.00000, 2.71828], + /// [0.36788, 7.38906]]), 1e-5) + /// ); + /// ``` + pub fn applyv(&mut self, mut f: F) + where S: DataMut, + F: FnMut(A) -> A, + A: Clone, + { + self.unordered_foreach_mut(move |x| *x = f(x.clone())); + } + + /// Visit each element in the array by calling `f` by reference + /// on each element. + /// + /// Elements are visited in arbitrary order. + pub fn visit<'a, F>(&'a self, mut f: F) + where F: FnMut(&'a A), + A: 'a, + { + if let Some(slc) = self.as_slice_memory_order() { + // FIXME: Use for loop when slice iterator is perf is restored + for i in 0..slc.len() { + f(&slc[i]); + } + } else { + for row in self.inner_iter() { + if let Some(slc) = row.into_slice() { + for i in 0..slc.len() { + f(&slc[i]); + } + } else { + for elt in row { + f(elt); + } + } + } + } + } } From 1cab7d2c4577cc56902eb5e91f38e812e987d6bf Mon Sep 17 00:00:00 2001 From: bluss Date: Wed, 16 Mar 2016 20:48:14 +0100 Subject: [PATCH 4/7] Edit main docs --- README.rst | 12 ++++++++---- src/lib.rs | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index ab538f16a..1d2ac7fd2 100644 --- a/README.rst +++ b/README.rst @@ -53,16 +53,20 @@ Status and Lookout - Performance status: - + Arithmetic involving arrays of contiguous inner dimension optimizes very well. - + ``.fold()`` and ``.zip_mut_with()`` are the most efficient ways to + + Performance of an operation depends on the memory layout of the array + or array view. Especially if it's a binary operation, which + needs matching memory layout to be efficient (with some exceptions). + + Arithmetic optimizes very well if the arrays are have contiguous inner dimension. + + The callback based methods like ``.mapv()``, ``.applyv()`` and + ``.zip_mut_with()`` are the most efficient ways to perform single traversal and lock step traversal respectively. - + ``.iter()`` and ``.iter_mut()`` are efficient for contiguous arrays. + + ``.iter()`` is efficient for c-contiguous arrays. + Can use BLAS in some operations (``dot`` and ``mat_mul``). Crate Feature Flags ------------------- -The following crate feature flags are available. The are configured in +The following crate feature flags are available. They are configured in your `Cargo.toml`. - ``assign_ops`` diff --git a/src/lib.rs b/src/lib.rs index 718a5dcb1..d4b7e9d28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,15 +43,19 @@ //! *will be deprecated* when Rust supports `+=` and similar in Rust 1.8. //! + We try to introduce more static checking gradually. //! - Performance status: -//! + Arithmetic involving arrays of contiguous inner dimension optimizes very well. -//! + `.fold()` and `.zip_mut_with()` are the most efficient ways to +//! + Performance of an operation depends on the memory layout of the array +//! or array view. Especially if it's a binary operation, which +//! needs matching memory layout to be efficient (with some exceptions). +//! + Arithmetic optimizes very well if the arrays are have contiguous inner dimension. +//! + The callback based methods like ``.mapv()``, ``.applyv()`` and +//! ``.zip_mut_with()`` are the most efficient ways to //! perform single traversal and lock step traversal respectively. -//! + `.iter()` and `.iter_mut()` are efficient for contiguous arrays. +//! + ``.iter()`` is efficient for c-contiguous arrays. //! + Can use BLAS in some operations (`dot` and `mat_mul`). //! //! ## Crate Feature Flags //! -//! The following crate feature flags are available. The are configured in your +//! The following crate feature flags are available. They are configured in your //! `Cargo.toml`. //! //! - `assign_ops` From 258fba68c489f7e7273ffad14c1b750b19a1d4cb Mon Sep 17 00:00:00 2001 From: bluss Date: Wed, 16 Mar 2016 20:48:14 +0100 Subject: [PATCH 5/7] Fix deprecation message for raw_data --- src/impl_methods.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/impl_methods.rs b/src/impl_methods.rs index 0a801b9d0..fad92478a 100644 --- a/src/impl_methods.rs +++ b/src/impl_methods.rs @@ -913,6 +913,8 @@ impl ArrayBase where S: Data, D: Dimension self.view().reversed_axes() } + /// ***Deprecated: Use .as_slice_memory_order() instead.*** + /// /// Return a slice of the array’s backing data in memory order. /// /// **Note:** Data memory order may not correspond to the index order @@ -925,6 +927,8 @@ impl ArrayBase where S: Data, D: Dimension self.data.slice() } + /// ***Deprecated: Use .as_slice_memory_order_mut() instead.*** + /// /// Return a mutable slice of the array’s backing data in memory order. /// /// **Note:** Data memory order may not correspond to the index order From 08ad7a633aad22f294ad37f03788128286125d4d Mon Sep 17 00:00:00 2001 From: bluss Date: Wed, 16 Mar 2016 20:48:14 +0100 Subject: [PATCH 6/7] Mark .fold() deprecated --- benches/bench1.rs | 11 ----------- src/impl_methods.rs | 4 ++++ src/impl_numeric.rs | 2 +- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/benches/bench1.rs b/benches/bench1.rs index 1a8536726..dc0b04db7 100644 --- a/benches/bench1.rs +++ b/benches/bench1.rs @@ -116,17 +116,6 @@ fn iter_sum_2d_cutout(bench: &mut test::Bencher) }); } -#[bench] -fn iter_sum_2d_cutout_fold(bench: &mut test::Bencher) -{ - let a = OwnedArray::::zeros((66, 66)); - let av = a.slice(s![1..-1, 1..-1]); - let a = black_box(av); - bench.iter(|| { - a.fold(0, |acc, elt| acc + *elt) - }); -} - #[bench] fn iter_sum_2d_cutout_by_row(bench: &mut test::Bencher) { diff --git a/src/impl_methods.rs b/src/impl_methods.rs index fad92478a..37b5a44cb 100644 --- a/src/impl_methods.rs +++ b/src/impl_methods.rs @@ -1079,8 +1079,12 @@ impl ArrayBase where S: Data, D: Dimension } } + /// ***Deprecated: Will be removed because it dictates a specific order.*** + /// /// Traverse the array elements in order and apply a fold, /// returning the resulting value. + #[cfg_attr(has_deprecated, deprecated(note= + "Will be removed because it dictates a specific order"))] pub fn fold<'a, F, B>(&'a self, mut init: B, mut f: F) -> B where F: FnMut(B, &'a A) -> B, A: 'a { diff --git a/src/impl_numeric.rs b/src/impl_numeric.rs index 5b56be592..531762745 100644 --- a/src/impl_numeric.rs +++ b/src/impl_numeric.rs @@ -70,7 +70,7 @@ impl ArrayBase if let Some(slc) = row.as_slice() { sum = sum + numeric_util::unrolled_sum(slc); } else { - sum = sum + row.fold(A::zero(), |acc, elt| acc + elt.clone()); + sum = sum + row.iter().fold(A::zero(), |acc, elt| acc + elt.clone()); } } sum From 956f1f6135e3c8804db8f6e0a2d0b92d39a6ad86 Mon Sep 17 00:00:00 2001 From: bluss Date: Wed, 16 Mar 2016 20:48:14 +0100 Subject: [PATCH 7/7] Rm redundant call to ensure_unique --- src/impl_methods.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/impl_methods.rs b/src/impl_methods.rs index 37b5a44cb..39e8084a4 100644 --- a/src/impl_methods.rs +++ b/src/impl_methods.rs @@ -129,7 +129,6 @@ impl ArrayBase where S: Data, D: Dimension pub fn iter_mut(&mut self) -> ElementsMut where S: DataMut, { - self.ensure_unique(); self.view_mut().into_iter_() } @@ -229,8 +228,7 @@ impl ArrayBase where S: Data, D: Dimension where S: DataMut, I: NdIndex, { - self.ensure_unique(); - let ptr = self.ptr; + let ptr = self.as_mut_ptr(); index.index_checked(&self.dim, &self.strides) .map(move |offset| unsafe { &mut *ptr.offset(offset) }) }