rust-ndarray · bluss · Mar 16, 2016 · Mar 16, 2016 · Mar 16, 2016 · Mar 16, 2016
diff --git a/README.rst b/README.rst
@@ -53,16 +53,20 @@ Status and Lookout
 
 - Performance status:
 
-  + Arithmetic involving arrays of contiguous inner dimension optimizes very well.
-  + ``.fold()`` and ``.zip_mut_with()`` are the most efficient ways to
+  + Performance of an operation depends on the memory layout of the array
+    or array view. Especially if it's a binary operation, which
+    needs matching memory layout to be efficient (with some exceptions).
+  + Arithmetic optimizes very well if the arrays are have contiguous inner dimension.
+  + The callback based methods like ``.mapv()``, ``.applyv()`` and
+    ``.zip_mut_with()`` are the most efficient ways to
     perform single traversal and lock step traversal respectively.
-  + ``.iter()`` and ``.iter_mut()`` are efficient for contiguous arrays.
+  + ``.iter()`` is efficient for c-contiguous arrays.
   + Can use BLAS in some operations (``dot`` and ``mat_mul``).
 
 Crate Feature Flags
 -------------------
 
-The following crate feature flags are available. The are configured in
+The following crate feature flags are available. They are configured in
 your `Cargo.toml`.
 
 - ``assign_ops``

diff --git a/benches/bench1.rs b/benches/bench1.rs
@@ -12,6 +12,7 @@ use rblas::matrix::Matrix;
 use ndarray::{
     OwnedArray,
     Axis,
+    Ix,
 };
 use ndarray::{arr0, arr1, arr2};
 
@@ -27,7 +28,7 @@ fn map(bench: &mut test::Bencher)
 }
 
 #[bench]
-fn sum_1d_regular(bench: &mut test::Bencher)
+fn iter_sum_1d_regular(bench: &mut test::Bencher)
 {
     let a = OwnedArray::<i32, _>::zeros(64 * 64);
     let a = black_box(a);
@@ -41,7 +42,7 @@ fn sum_1d_regular(bench: &mut test::Bencher)
 }
 
 #[bench]
-fn sum_1d_raw(bench: &mut test::Bencher)
+fn iter_sum_1d_raw(bench: &mut test::Bencher)
 {
     // this is autovectorized to death (= great performance)
     let a = OwnedArray::<i32, _>::zeros(64 * 64);
@@ -56,7 +57,7 @@ fn sum_1d_raw(bench: &mut test::Bencher)
 }
 
 #[bench]
-fn sum_2d_regular(bench: &mut test::Bencher)
+fn iter_sum_2d_regular(bench: &mut test::Bencher)
 {
     let a = OwnedArray::<i32, _>::zeros((64, 64));
     let a = black_box(a);
@@ -70,7 +71,7 @@ fn sum_2d_regular(bench: &mut test::Bencher)
 }
 
 #[bench]
-fn sum_2d_by_row(bench: &mut test::Bencher)
+fn iter_sum_2d_by_row(bench: &mut test::Bencher)
 {
     let a = OwnedArray::<i32, _>::zeros((64, 64));
     let a = black_box(a);
@@ -86,7 +87,7 @@ fn sum_2d_by_row(bench: &mut test::Bencher)
 }
 
 #[bench]
-fn sum_2d_raw(bench: &mut test::Bencher)
+fn iter_sum_2d_raw(bench: &mut test::Bencher)
 {
     // this is autovectorized to death (= great performance)
     let a = OwnedArray::<i32, _>::zeros((64, 64));
@@ -101,7 +102,7 @@ fn sum_2d_raw(bench: &mut test::Bencher)
 }
 
 #[bench]
-fn sum_2d_cutout(bench: &mut test::Bencher)
+fn iter_sum_2d_cutout(bench: &mut test::Bencher)
 {
     let a = OwnedArray::<i32, _>::zeros((66, 66));
     let av = a.slice(s![1..-1, 1..-1]);
@@ -116,39 +117,7 @@ fn sum_2d_cutout(bench: &mut test::Bencher)
 }
 
 #[bench]
-fn sum_2d_cutout_fold(bench: &mut test::Bencher)
-{
-    let a = OwnedArray::<i32, _>::zeros((66, 66));
-    let av = a.slice(s![1..-1, 1..-1]);
-    let a = black_box(av);
-    bench.iter(|| {
-        a.fold(0, |acc, elt| acc + *elt)
-    });
-}
-
-#[bench]
-fn scalar_sum_2d_regular(bench: &mut test::Bencher)
-{
-    let a = OwnedArray::<i32, _>::zeros((64, 64));
-    let a = black_box(a);
-    bench.iter(|| {
-        a.scalar_sum()
-    });
-}
-
-#[bench]
-fn scalar_sum_2d_cutout(bench: &mut test::Bencher)
-{
-    let a = OwnedArray::<i32, _>::zeros((66, 66));
-    let av = a.slice(s![1..-1, 1..-1]);
-    let a = black_box(av);
-    bench.iter(|| {
-        a.scalar_sum()
-    });
-}
-
-#[bench]
-fn sum_2d_cutout_by_row(bench: &mut test::Bencher)
+fn iter_sum_2d_cutout_by_row(bench: &mut test::Bencher)
 {
     let a = OwnedArray::<i32, _>::zeros((66, 66));
     let av = a.slice(s![1..-1, 1..-1]);
@@ -165,7 +134,7 @@ fn sum_2d_cutout_by_row(bench: &mut test::Bencher)
 }
 
 #[bench]
-fn sum_2d_cutout_outer_iter(bench: &mut test::Bencher)
+fn iter_sum_2d_cutout_outer_iter(bench: &mut test::Bencher)
 {
     let a = OwnedArray::<i32, _>::zeros((66, 66));
     let av = a.slice(s![1..-1, 1..-1]);
@@ -182,7 +151,7 @@ fn sum_2d_cutout_outer_iter(bench: &mut test::Bencher)
 }
 
 #[bench]
-fn sum_2d_transpose_regular(bench: &mut test::Bencher)
+fn iter_sum_2d_transpose_regular(bench: &mut test::Bencher)
 {
     let mut a = OwnedArray::<i32, _>::zeros((64, 64));
     a.swap_axes(0, 1);
@@ -197,7 +166,7 @@ fn sum_2d_transpose_regular(bench: &mut test::Bencher)
 }
 
 #[bench]
-fn sum_2d_transpose_by_row(bench: &mut test::Bencher)
+fn iter_sum_2d_transpose_by_row(bench: &mut test::Bencher)
 {
     let mut a = OwnedArray::<i32, _>::zeros((64, 64));
     a.swap_axes(0, 1);
@@ -213,6 +182,27 @@ fn sum_2d_transpose_by_row(bench: &mut test::Bencher)
     });
 }
 
+#[bench]
+fn scalar_sum_2d_regular(bench: &mut test::Bencher)
+{
+    let a = OwnedArray::<i32, _>::zeros((64, 64));
+    let a = black_box(a);
+    bench.iter(|| {
+        a.scalar_sum()
+    });
+}
+
+#[bench]
+fn scalar_sum_2d_cutout(bench: &mut test::Bencher)
+{
+    let a = OwnedArray::<i32, _>::zeros((66, 66));
+    let av = a.slice(s![1..-1, 1..-1]);
+    let a = black_box(av);
+    bench.iter(|| {
+        a.scalar_sum()
+    });
+}
+
 #[bench]
 fn scalar_sum_2d_float(bench: &mut test::Bencher)
 {
@@ -642,9 +632,33 @@ fn dot_extended(bench: &mut test::Bencher) {
     })
 }
 
+const MEAN_SUM_N: usize = 127;
+
+fn range_mat(m: Ix, n: Ix) -> OwnedArray<f32, (Ix, Ix)> {
+    assert!(m * n != 0);
+    OwnedArray::linspace(0., (m * n - 1) as f32, m * n).into_shape((m, n)).unwrap()
+}
+
 #[bench]
-fn means(bench: &mut test::Bencher) {
-    let a = OwnedArray::from_iter(0..100_000i64);
-    let a = a.into_shape((100, 1000)).unwrap();
+fn mean_axis0(bench: &mut test::Bencher) {
+    let a = range_mat(MEAN_SUM_N, MEAN_SUM_N);
     bench.iter(|| a.mean(Axis(0)));
 }
+
+#[bench]
+fn mean_axis1(bench: &mut test::Bencher) {
+    let a = range_mat(MEAN_SUM_N, MEAN_SUM_N);
+    bench.iter(|| a.mean(Axis(1)));
+}
+
+#[bench]
+fn sum_axis0(bench: &mut test::Bencher) {
+    let a = range_mat(MEAN_SUM_N, MEAN_SUM_N);
+    bench.iter(|| a.sum(Axis(0)));
+}
+
+#[bench]
+fn sum_axis1(bench: &mut test::Bencher) {
+    let a = range_mat(MEAN_SUM_N, MEAN_SUM_N);
+    bench.iter(|| a.sum(Axis(1)));
+}
diff --git a/src/impl_methods.rs b/src/impl_methods.rs
@@ -129,7 +129,6 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
     pub fn iter_mut(&mut self) -> ElementsMut<A, D>
         where S: DataMut,
     {
-        self.ensure_unique();
         self.view_mut().into_iter_()
     }
 
@@ -229,8 +228,7 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
         where S: DataMut,
               I: NdIndex<Dim=D>,
     {
-        self.ensure_unique();
-        let ptr = self.ptr;
+        let ptr = self.as_mut_ptr();
         index.index_checked(&self.dim, &self.strides)
              .map(move |offset| unsafe { &mut *ptr.offset(offset) })
     }
@@ -913,6 +911,8 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
         self.view().reversed_axes()
     }
 
+    /// ***Deprecated: Use .as_slice_memory_order() instead.***
+    ///
     /// Return a slice of the array’s backing data in memory order.
     ///
     /// **Note:** Data memory order may not correspond to the index order
@@ -925,6 +925,8 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
         self.data.slice()
     }
 
+    /// ***Deprecated: Use .as_slice_memory_order_mut() instead.***
+    ///
     /// Return a mutable slice of the array’s backing data in memory order.
     ///
     /// **Note:** Data memory order may not correspond to the index order
@@ -1075,8 +1077,12 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
         }
     }
 
+    /// ***Deprecated: Will be removed because it dictates a specific order.***
+    ///
     /// Traverse the array elements in order and apply a fold,
     /// returning the resulting value.
+    #[cfg_attr(has_deprecated, deprecated(note=
+      "Will be removed because it dictates a specific order"))]
     pub fn fold<'a, F, B>(&'a self, mut init: B, mut f: F) -> B
         where F: FnMut(B, &'a A) -> B, A: 'a
     {
@@ -1095,10 +1101,12 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
         init
     }
 
-    /// Apply `f` elementwise and return a new array with
-    /// the results.
+    /// Call `f` by reference on each element and create a new array
+    /// with the new values.
     ///
-    /// Return an array with the same shape as *self*.
+    /// Elements are visited in arbitrary order.
+    ///
+    /// Return an array with the same shape as `self`.
     ///
     /// ```
     /// use ndarray::arr2;
@@ -1128,4 +1136,103 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
             }
         }
     }
+
+    /// Call `f` by **v**alue on each element and create a new array
+    /// with the new values.
+    ///
+    /// Elements are visited in arbitrary order.
+    ///
+    /// Return an array with the same shape as `self`.
+    ///
+    /// ```
+    /// use ndarray::arr2;
+    ///
+    /// let a = arr2(&[[ 0., 1.],
+    ///                [-1., 2.]]);
+    /// assert!(
+    ///     a.mapv(f32::abs) == arr2(&[[0., 1.],
+    ///                                [1., 2.]])
+    /// );
+    /// ```
+    pub fn mapv<B, F>(&self, mut f: F) -> OwnedArray<B, D>
+        where F: FnMut(A) -> B,
+              A: Clone,
+    {
+        self.map(move |x| f(x.clone()))
+    }
+
+    /// Call `f` by **v**alue on each element, update the array with the new values
+    /// and return it.
+    ///
+    /// Elements are visited in arbitrary order.
+    pub fn mapv_into<F>(mut self, f: F) -> Self
+        where S: DataMut,
+              F: FnMut(A) -> A,
+              A: Clone,
+    {
+        self.applyv(f);
+        self
+    }
+
+    /// Modify the array in place by calling `f` by mutable reference on each element.
+    ///
+    /// Elements are visited in arbitrary order.
+    pub fn apply<F>(&mut self, f: F)
+        where S: DataMut,
+              F: FnMut(&mut A),
+    {
+        self.unordered_foreach_mut(f);
+    }
+
+    /// Modify the array in place by calling `f` by **v**alue on each element.
+    /// The array is updated with the new values.
+    ///
+    /// Elements are visited in arbitrary order.
+    ///
+    /// ```
+    /// use ndarray::arr2;
+    ///
+    /// let mut a = arr2(&[[ 0., 1.],
+    ///                    [-1., 2.]]);
+    /// a.applyv(f32::exp);
+    /// assert!(
+    ///     a.allclose(&arr2(&[[1.00000, 2.71828],
+    ///                        [0.36788, 7.38906]]), 1e-5)
+    /// );
+    /// ```
+    pub fn applyv<F>(&mut self, mut f: F)
+        where S: DataMut,
+              F: FnMut(A) -> A,
+              A: Clone,
+    {
+        self.unordered_foreach_mut(move |x| *x = f(x.clone()));
+    }
+
+    /// Visit each element in the array by calling `f` by reference
+    /// on each element.
+    ///
+    /// Elements are visited in arbitrary order.
+    pub fn visit<'a, F>(&'a self, mut f: F)
+        where F: FnMut(&'a A),
+              A: 'a,
+    {
+        if let Some(slc) = self.as_slice_memory_order() {
+            // FIXME: Use for loop when slice iterator is perf is restored
+            for i in 0..slc.len() {
+                f(&slc[i]);
+            }
+        } else {
+            for row in self.inner_iter() {
+                if let Some(slc) = row.into_slice() {
+                    for i in 0..slc.len() {
+                        f(&slc[i]);
+                    }
+                } else {
+                    for elt in row {
+                        f(elt);
+                    }
+                }
+            }
+        }
+    }
 }
diff --git a/src/impl_numeric.rs b/src/impl_numeric.rs
@@ -70,7 +70,7 @@ impl<A, S, D> ArrayBase<S, D>
             if let Some(slc) = row.as_slice() {
                 sum = sum + numeric_util::unrolled_sum(slc);
             } else {
-                sum = sum + row.fold(A::zero(), |acc, elt| acc + elt.clone());
+                sum = sum + row.iter().fold(A::zero(), |acc, elt| acc + elt.clone());
             }
         }
         sum