Skip to content

Commit c5e307a

Browse files
committed
Merge pull request #159 from bluss/experiments
Methods .mapv(), .mapv_into(), .apply(), .applyv(), .visit()
2 parents d2c2bee + 956f1f6 commit c5e307a

File tree

5 files changed

+189
-60
lines changed

5 files changed

+189
-60
lines changed

README.rst

+8-4
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,20 @@ Status and Lookout
5353

5454
- Performance status:
5555

56-
+ Arithmetic involving arrays of contiguous inner dimension optimizes very well.
57-
+ ``.fold()`` and ``.zip_mut_with()`` are the most efficient ways to
56+
+ Performance of an operation depends on the memory layout of the array
57+
or array view. Especially if it's a binary operation, which
58+
needs matching memory layout to be efficient (with some exceptions).
59+
+ Arithmetic optimizes very well if the arrays are have contiguous inner dimension.
60+
+ The callback based methods like ``.mapv()``, ``.applyv()`` and
61+
``.zip_mut_with()`` are the most efficient ways to
5862
perform single traversal and lock step traversal respectively.
59-
+ ``.iter()`` and ``.iter_mut()`` are efficient for contiguous arrays.
63+
+ ``.iter()`` is efficient for c-contiguous arrays.
6064
+ Can use BLAS in some operations (``dot`` and ``mat_mul``).
6165

6266
Crate Feature Flags
6367
-------------------
6468

65-
The following crate feature flags are available. The are configured in
69+
The following crate feature flags are available. They are configured in
6670
your `Cargo.toml`.
6771

6872
- ``assign_ops``

benches/bench1.rs

+59-45
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use rblas::matrix::Matrix;
1212
use ndarray::{
1313
OwnedArray,
1414
Axis,
15+
Ix,
1516
};
1617
use ndarray::{arr0, arr1, arr2};
1718

@@ -27,7 +28,7 @@ fn map(bench: &mut test::Bencher)
2728
}
2829

2930
#[bench]
30-
fn sum_1d_regular(bench: &mut test::Bencher)
31+
fn iter_sum_1d_regular(bench: &mut test::Bencher)
3132
{
3233
let a = OwnedArray::<i32, _>::zeros(64 * 64);
3334
let a = black_box(a);
@@ -41,7 +42,7 @@ fn sum_1d_regular(bench: &mut test::Bencher)
4142
}
4243

4344
#[bench]
44-
fn sum_1d_raw(bench: &mut test::Bencher)
45+
fn iter_sum_1d_raw(bench: &mut test::Bencher)
4546
{
4647
// this is autovectorized to death (= great performance)
4748
let a = OwnedArray::<i32, _>::zeros(64 * 64);
@@ -56,7 +57,7 @@ fn sum_1d_raw(bench: &mut test::Bencher)
5657
}
5758

5859
#[bench]
59-
fn sum_2d_regular(bench: &mut test::Bencher)
60+
fn iter_sum_2d_regular(bench: &mut test::Bencher)
6061
{
6162
let a = OwnedArray::<i32, _>::zeros((64, 64));
6263
let a = black_box(a);
@@ -70,7 +71,7 @@ fn sum_2d_regular(bench: &mut test::Bencher)
7071
}
7172

7273
#[bench]
73-
fn sum_2d_by_row(bench: &mut test::Bencher)
74+
fn iter_sum_2d_by_row(bench: &mut test::Bencher)
7475
{
7576
let a = OwnedArray::<i32, _>::zeros((64, 64));
7677
let a = black_box(a);
@@ -86,7 +87,7 @@ fn sum_2d_by_row(bench: &mut test::Bencher)
8687
}
8788

8889
#[bench]
89-
fn sum_2d_raw(bench: &mut test::Bencher)
90+
fn iter_sum_2d_raw(bench: &mut test::Bencher)
9091
{
9192
// this is autovectorized to death (= great performance)
9293
let a = OwnedArray::<i32, _>::zeros((64, 64));
@@ -101,7 +102,7 @@ fn sum_2d_raw(bench: &mut test::Bencher)
101102
}
102103

103104
#[bench]
104-
fn sum_2d_cutout(bench: &mut test::Bencher)
105+
fn iter_sum_2d_cutout(bench: &mut test::Bencher)
105106
{
106107
let a = OwnedArray::<i32, _>::zeros((66, 66));
107108
let av = a.slice(s![1..-1, 1..-1]);
@@ -116,39 +117,7 @@ fn sum_2d_cutout(bench: &mut test::Bencher)
116117
}
117118

118119
#[bench]
119-
fn sum_2d_cutout_fold(bench: &mut test::Bencher)
120-
{
121-
let a = OwnedArray::<i32, _>::zeros((66, 66));
122-
let av = a.slice(s![1..-1, 1..-1]);
123-
let a = black_box(av);
124-
bench.iter(|| {
125-
a.fold(0, |acc, elt| acc + *elt)
126-
});
127-
}
128-
129-
#[bench]
130-
fn scalar_sum_2d_regular(bench: &mut test::Bencher)
131-
{
132-
let a = OwnedArray::<i32, _>::zeros((64, 64));
133-
let a = black_box(a);
134-
bench.iter(|| {
135-
a.scalar_sum()
136-
});
137-
}
138-
139-
#[bench]
140-
fn scalar_sum_2d_cutout(bench: &mut test::Bencher)
141-
{
142-
let a = OwnedArray::<i32, _>::zeros((66, 66));
143-
let av = a.slice(s![1..-1, 1..-1]);
144-
let a = black_box(av);
145-
bench.iter(|| {
146-
a.scalar_sum()
147-
});
148-
}
149-
150-
#[bench]
151-
fn sum_2d_cutout_by_row(bench: &mut test::Bencher)
120+
fn iter_sum_2d_cutout_by_row(bench: &mut test::Bencher)
152121
{
153122
let a = OwnedArray::<i32, _>::zeros((66, 66));
154123
let av = a.slice(s![1..-1, 1..-1]);
@@ -165,7 +134,7 @@ fn sum_2d_cutout_by_row(bench: &mut test::Bencher)
165134
}
166135

167136
#[bench]
168-
fn sum_2d_cutout_outer_iter(bench: &mut test::Bencher)
137+
fn iter_sum_2d_cutout_outer_iter(bench: &mut test::Bencher)
169138
{
170139
let a = OwnedArray::<i32, _>::zeros((66, 66));
171140
let av = a.slice(s![1..-1, 1..-1]);
@@ -182,7 +151,7 @@ fn sum_2d_cutout_outer_iter(bench: &mut test::Bencher)
182151
}
183152

184153
#[bench]
185-
fn sum_2d_transpose_regular(bench: &mut test::Bencher)
154+
fn iter_sum_2d_transpose_regular(bench: &mut test::Bencher)
186155
{
187156
let mut a = OwnedArray::<i32, _>::zeros((64, 64));
188157
a.swap_axes(0, 1);
@@ -197,7 +166,7 @@ fn sum_2d_transpose_regular(bench: &mut test::Bencher)
197166
}
198167

199168
#[bench]
200-
fn sum_2d_transpose_by_row(bench: &mut test::Bencher)
169+
fn iter_sum_2d_transpose_by_row(bench: &mut test::Bencher)
201170
{
202171
let mut a = OwnedArray::<i32, _>::zeros((64, 64));
203172
a.swap_axes(0, 1);
@@ -213,6 +182,27 @@ fn sum_2d_transpose_by_row(bench: &mut test::Bencher)
213182
});
214183
}
215184

185+
#[bench]
186+
fn scalar_sum_2d_regular(bench: &mut test::Bencher)
187+
{
188+
let a = OwnedArray::<i32, _>::zeros((64, 64));
189+
let a = black_box(a);
190+
bench.iter(|| {
191+
a.scalar_sum()
192+
});
193+
}
194+
195+
#[bench]
196+
fn scalar_sum_2d_cutout(bench: &mut test::Bencher)
197+
{
198+
let a = OwnedArray::<i32, _>::zeros((66, 66));
199+
let av = a.slice(s![1..-1, 1..-1]);
200+
let a = black_box(av);
201+
bench.iter(|| {
202+
a.scalar_sum()
203+
});
204+
}
205+
216206
#[bench]
217207
fn scalar_sum_2d_float(bench: &mut test::Bencher)
218208
{
@@ -642,9 +632,33 @@ fn dot_extended(bench: &mut test::Bencher) {
642632
})
643633
}
644634

635+
const MEAN_SUM_N: usize = 127;
636+
637+
fn range_mat(m: Ix, n: Ix) -> OwnedArray<f32, (Ix, Ix)> {
638+
assert!(m * n != 0);
639+
OwnedArray::linspace(0., (m * n - 1) as f32, m * n).into_shape((m, n)).unwrap()
640+
}
641+
645642
#[bench]
646-
fn means(bench: &mut test::Bencher) {
647-
let a = OwnedArray::from_iter(0..100_000i64);
648-
let a = a.into_shape((100, 1000)).unwrap();
643+
fn mean_axis0(bench: &mut test::Bencher) {
644+
let a = range_mat(MEAN_SUM_N, MEAN_SUM_N);
649645
bench.iter(|| a.mean(Axis(0)));
650646
}
647+
648+
#[bench]
649+
fn mean_axis1(bench: &mut test::Bencher) {
650+
let a = range_mat(MEAN_SUM_N, MEAN_SUM_N);
651+
bench.iter(|| a.mean(Axis(1)));
652+
}
653+
654+
#[bench]
655+
fn sum_axis0(bench: &mut test::Bencher) {
656+
let a = range_mat(MEAN_SUM_N, MEAN_SUM_N);
657+
bench.iter(|| a.sum(Axis(0)));
658+
}
659+
660+
#[bench]
661+
fn sum_axis1(bench: &mut test::Bencher) {
662+
let a = range_mat(MEAN_SUM_N, MEAN_SUM_N);
663+
bench.iter(|| a.sum(Axis(1)));
664+
}

src/impl_methods.rs

+113-6
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
129129
pub fn iter_mut(&mut self) -> ElementsMut<A, D>
130130
where S: DataMut,
131131
{
132-
self.ensure_unique();
133132
self.view_mut().into_iter_()
134133
}
135134

@@ -229,8 +228,7 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
229228
where S: DataMut,
230229
I: NdIndex<Dim=D>,
231230
{
232-
self.ensure_unique();
233-
let ptr = self.ptr;
231+
let ptr = self.as_mut_ptr();
234232
index.index_checked(&self.dim, &self.strides)
235233
.map(move |offset| unsafe { &mut *ptr.offset(offset) })
236234
}
@@ -913,6 +911,8 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
913911
self.view().reversed_axes()
914912
}
915913

914+
/// ***Deprecated: Use .as_slice_memory_order() instead.***
915+
///
916916
/// Return a slice of the array’s backing data in memory order.
917917
///
918918
/// **Note:** Data memory order may not correspond to the index order
@@ -925,6 +925,8 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
925925
self.data.slice()
926926
}
927927

928+
/// ***Deprecated: Use .as_slice_memory_order_mut() instead.***
929+
///
928930
/// Return a mutable slice of the array’s backing data in memory order.
929931
///
930932
/// **Note:** Data memory order may not correspond to the index order
@@ -1075,8 +1077,12 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
10751077
}
10761078
}
10771079

1080+
/// ***Deprecated: Will be removed because it dictates a specific order.***
1081+
///
10781082
/// Traverse the array elements in order and apply a fold,
10791083
/// returning the resulting value.
1084+
#[cfg_attr(has_deprecated, deprecated(note=
1085+
"Will be removed because it dictates a specific order"))]
10801086
pub fn fold<'a, F, B>(&'a self, mut init: B, mut f: F) -> B
10811087
where F: FnMut(B, &'a A) -> B, A: 'a
10821088
{
@@ -1095,10 +1101,12 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
10951101
init
10961102
}
10971103

1098-
/// Apply `f` elementwise and return a new array with
1099-
/// the results.
1104+
/// Call `f` by reference on each element and create a new array
1105+
/// with the new values.
11001106
///
1101-
/// Return an array with the same shape as *self*.
1107+
/// Elements are visited in arbitrary order.
1108+
///
1109+
/// Return an array with the same shape as `self`.
11021110
///
11031111
/// ```
11041112
/// use ndarray::arr2;
@@ -1128,4 +1136,103 @@ impl<A, S, D> ArrayBase<S, D> where S: Data<Elem=A>, D: Dimension
11281136
}
11291137
}
11301138
}
1139+
1140+
/// Call `f` by **v**alue on each element and create a new array
1141+
/// with the new values.
1142+
///
1143+
/// Elements are visited in arbitrary order.
1144+
///
1145+
/// Return an array with the same shape as `self`.
1146+
///
1147+
/// ```
1148+
/// use ndarray::arr2;
1149+
///
1150+
/// let a = arr2(&[[ 0., 1.],
1151+
/// [-1., 2.]]);
1152+
/// assert!(
1153+
/// a.mapv(f32::abs) == arr2(&[[0., 1.],
1154+
/// [1., 2.]])
1155+
/// );
1156+
/// ```
1157+
pub fn mapv<B, F>(&self, mut f: F) -> OwnedArray<B, D>
1158+
where F: FnMut(A) -> B,
1159+
A: Clone,
1160+
{
1161+
self.map(move |x| f(x.clone()))
1162+
}
1163+
1164+
/// Call `f` by **v**alue on each element, update the array with the new values
1165+
/// and return it.
1166+
///
1167+
/// Elements are visited in arbitrary order.
1168+
pub fn mapv_into<F>(mut self, f: F) -> Self
1169+
where S: DataMut,
1170+
F: FnMut(A) -> A,
1171+
A: Clone,
1172+
{
1173+
self.applyv(f);
1174+
self
1175+
}
1176+
1177+
/// Modify the array in place by calling `f` by mutable reference on each element.
1178+
///
1179+
/// Elements are visited in arbitrary order.
1180+
pub fn apply<F>(&mut self, f: F)
1181+
where S: DataMut,
1182+
F: FnMut(&mut A),
1183+
{
1184+
self.unordered_foreach_mut(f);
1185+
}
1186+
1187+
/// Modify the array in place by calling `f` by **v**alue on each element.
1188+
/// The array is updated with the new values.
1189+
///
1190+
/// Elements are visited in arbitrary order.
1191+
///
1192+
/// ```
1193+
/// use ndarray::arr2;
1194+
///
1195+
/// let mut a = arr2(&[[ 0., 1.],
1196+
/// [-1., 2.]]);
1197+
/// a.applyv(f32::exp);
1198+
/// assert!(
1199+
/// a.allclose(&arr2(&[[1.00000, 2.71828],
1200+
/// [0.36788, 7.38906]]), 1e-5)
1201+
/// );
1202+
/// ```
1203+
pub fn applyv<F>(&mut self, mut f: F)
1204+
where S: DataMut,
1205+
F: FnMut(A) -> A,
1206+
A: Clone,
1207+
{
1208+
self.unordered_foreach_mut(move |x| *x = f(x.clone()));
1209+
}
1210+
1211+
/// Visit each element in the array by calling `f` by reference
1212+
/// on each element.
1213+
///
1214+
/// Elements are visited in arbitrary order.
1215+
pub fn visit<'a, F>(&'a self, mut f: F)
1216+
where F: FnMut(&'a A),
1217+
A: 'a,
1218+
{
1219+
if let Some(slc) = self.as_slice_memory_order() {
1220+
// FIXME: Use for loop when slice iterator is perf is restored
1221+
for i in 0..slc.len() {
1222+
f(&slc[i]);
1223+
}
1224+
} else {
1225+
for row in self.inner_iter() {
1226+
if let Some(slc) = row.into_slice() {
1227+
for i in 0..slc.len() {
1228+
f(&slc[i]);
1229+
}
1230+
} else {
1231+
for elt in row {
1232+
f(elt);
1233+
}
1234+
}
1235+
}
1236+
}
1237+
}
11311238
}

src/impl_numeric.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ impl<A, S, D> ArrayBase<S, D>
7070
if let Some(slc) = row.as_slice() {
7171
sum = sum + numeric_util::unrolled_sum(slc);
7272
} else {
73-
sum = sum + row.fold(A::zero(), |acc, elt| acc + elt.clone());
73+
sum = sum + row.iter().fold(A::zero(), |acc, elt| acc + elt.clone());
7474
}
7575
}
7676
sum

0 commit comments

Comments
 (0)