Skip to content

Commit 332b0a6

Browse files
committed
histogram/bin.rs: Multiple Improvements
- Rewrites and reformats docs. Hopefully it's more aligned to the style in std library. - Replaced `left-inclusive-right-exclusive` by `left-closed-right-open`, which is more common in maths.
1 parent 9720891 commit 332b0a6

File tree

1 file changed

+112
-60
lines changed

1 file changed

+112
-60
lines changed

src/histogram/bins.rs

Lines changed: 112 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,51 @@
11
use ndarray::prelude::*;
22
use std::ops::{Index, Range};
33

4-
/// `Edges` is a sorted collection of `A` elements used
5-
/// to represent the boundaries of intervals ([`Bins`]) on
6-
/// a 1-dimensional axis.
4+
/// A sorted collection of type `A` elements used to represent the boundaries of intervals, i.e.
5+
/// [`Bins`] on a 1-dimensional axis.
76
///
8-
/// [`Bins`]: struct.Bins.html
9-
/// # Example:
7+
/// **Note** that all intervals are left-closed and right-open. See examples below.
8+
///
9+
/// # Examples
1010
///
1111
/// ```
12-
/// use ndarray_stats::histogram::{Edges, Bins};
12+
/// use ndarray_stats::histogram::{Bins, Edges};
1313
/// use noisy_float::types::n64;
1414
///
1515
/// let unit_edges = Edges::from(vec![n64(0.), n64(1.)]);
1616
/// let unit_interval = Bins::new(unit_edges);
17-
/// // left inclusive
17+
/// // left-closed
1818
/// assert_eq!(
1919
/// unit_interval.range_of(&n64(0.)).unwrap(),
2020
/// n64(0.)..n64(1.),
2121
/// );
22-
/// // right exclusive
22+
/// // right-open
2323
/// assert_eq!(
2424
/// unit_interval.range_of(&n64(1.)),
2525
/// None
2626
/// );
2727
/// ```
28+
///
29+
/// [`Bins`]: struct.Bins.html
2830
#[derive(Clone, Debug, Eq, PartialEq)]
2931
pub struct Edges<A: Ord> {
3032
edges: Vec<A>,
3133
}
3234

3335
impl<A: Ord> From<Vec<A>> for Edges<A> {
34-
/// Get an `Edges` instance from a `Vec<A>`:
35-
/// the vector will be sorted in increasing order
36-
/// using an unstable sorting algorithm and duplicates
37-
/// will be removed.
36+
/// Converts a `Vec<A>` into an `Edges<A>`, consuming the edges.
37+
/// The vector will be sorted in increasing order using an unstable sorting algorithm, with
38+
/// duplicates removed.
3839
///
39-
/// # Example:
40+
/// # Current implementation
41+
///
42+
/// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort],
43+
/// which is based on [pattern-defeating quicksort][pdqsort].
44+
///
45+
/// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate)
46+
/// , and O(n log n) worst-case.
47+
///
48+
/// # Examples
4049
///
4150
/// ```
4251
/// use ndarray::array;
@@ -49,6 +58,9 @@ impl<A: Ord> From<Vec<A>> for Edges<A> {
4958
/// 15
5059
/// );
5160
/// ```
61+
///
62+
/// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable
63+
/// [pdqsort]: https://github.com/orlp/pdqsort
5264
fn from(mut edges: Vec<A>) -> Self {
5365
// sort the array in-place
5466
edges.sort_unstable();
@@ -59,11 +71,19 @@ impl<A: Ord> From<Vec<A>> for Edges<A> {
5971
}
6072

6173
impl<A: Ord + Clone> From<Array1<A>> for Edges<A> {
62-
/// Get an `Edges` instance from a `Array1<A>`:
63-
/// the array elements will be sorted in increasing order
64-
/// using an unstable sorting algorithm and duplicates will be removed.
74+
/// Converts an `Array1<A>` into an `Edges<A>`, consuming the 1-dimensional array.
75+
/// The array will be sorted in increasing order using an unstable sorting algorithm, with
76+
/// duplicates removed.
6577
///
66-
/// # Example:
78+
/// # Current implementation
79+
///
80+
/// The current sorting algorithm is the same as [`std::slice::sort_unstable()`][sort],
81+
/// which is based on [pattern-defeating quicksort][pdqsort].
82+
///
83+
/// This sort is unstable (i.e., may reorder equal elements), in-place (i.e., does not allocate)
84+
/// , and O(n log n) worst-case.
85+
///
86+
/// # Examples
6787
///
6888
/// ```
6989
/// use ndarray_stats::histogram::Edges;
@@ -75,6 +95,9 @@ impl<A: Ord + Clone> From<Array1<A>> for Edges<A> {
7595
/// 10
7696
/// );
7797
/// ```
98+
///
99+
/// [sort]: https://doc.rust-lang.org/stable/std/primitive.slice.html#method.sort_unstable
100+
/// [pdqsort]: https://github.com/orlp/pdqsort
78101
fn from(edges: Array1<A>) -> Self {
79102
let edges = edges.to_vec();
80103
Self::from(edges)
@@ -84,11 +107,13 @@ impl<A: Ord + Clone> From<Array1<A>> for Edges<A> {
84107
impl<A: Ord> Index<usize> for Edges<A> {
85108
type Output = A;
86109

87-
/// Get the `i`-th edge.
110+
/// Returns a reference to the `i`-th edge in `self`.
88111
///
89-
/// **Panics** if the index `i` is out of bounds.
112+
/// # Panics
90113
///
91-
/// # Example:
114+
/// Panics if the index `i` is out of bounds.
115+
///
116+
/// # Examples
92117
///
93118
/// ```
94119
/// use ndarray_stats::histogram::Edges;
@@ -105,9 +130,9 @@ impl<A: Ord> Index<usize> for Edges<A> {
105130
}
106131

107132
impl<A: Ord> Edges<A> {
108-
/// Number of edges in `self`.
133+
/// Returns the number of edges in `self`.
109134
///
110-
/// # Example:
135+
/// # Examples
111136
///
112137
/// ```
113138
/// use ndarray_stats::histogram::Edges;
@@ -125,25 +150,25 @@ impl<A: Ord> Edges<A> {
125150

126151
/// Returns `true` if `self` contains no edges.
127152
///
128-
/// # Example:
153+
/// # Examples
129154
///
130155
/// ```
131156
/// use ndarray_stats::histogram::Edges;
132157
/// use noisy_float::types::{N64, n64};
133158
///
134159
/// let edges = Edges::<N64>::from(vec![]);
135160
/// assert_eq!(edges.is_empty(), true);
161+
///
136162
/// let edges = Edges::from(vec![n64(0.), n64(2.), n64(5.)]);
137163
/// assert_eq!(edges.is_empty(), false);
138164
/// ```
139165
pub fn is_empty(&self) -> bool {
140166
self.edges.is_empty()
141167
}
142168

143-
/// Borrow an immutable reference to the edges as a 1-dimensional
144-
/// array view.
169+
/// Returns an immutable 1-dimensional array view of edges.
145170
///
146-
/// # Example:
171+
/// # Examples
147172
///
148173
/// ```
149174
/// use ndarray::array;
@@ -159,21 +184,26 @@ impl<A: Ord> Edges<A> {
159184
ArrayView1::from(&self.edges)
160185
}
161186

162-
/// Given `value`, it returns an option:
163-
/// - `Some((left, right))`, where `right=left+1`, if there are two consecutive edges in
164-
/// `self` such that `self[left] <= value < self[right]`;
187+
/// Returns indices of two consecutive `edges` in `self`, if the interval they represent
188+
/// contains the given `value`, or returns `None` otherwise.
189+
///
190+
/// That is to say, it returns
191+
/// - `Some((left, right))`, where `left` and `right` are the indices of two consecutive edges
192+
/// in `self` and `right == left + 1`, if `self[left] <= value < self[right]`;
165193
/// - `None`, otherwise.
166194
///
167-
/// # Example:
195+
/// # Examples
168196
///
169197
/// ```
170198
/// use ndarray_stats::histogram::Edges;
171199
///
172200
/// let edges = Edges::from(vec![0, 2, 3]);
201+
/// // `1` is in the interval [0, 2), whose indices are (0, 1)
173202
/// assert_eq!(
174203
/// edges.indices_of(&1),
175204
/// Some((0, 1))
176205
/// );
206+
/// // `5` is not in any of intervals
177207
/// assert_eq!(
178208
/// edges.indices_of(&5),
179209
/// None
@@ -193,17 +223,17 @@ impl<A: Ord> Edges<A> {
193223
}
194224
}
195225

226+
/// Returns an iterator over the `edges` in `self`.
196227
pub fn iter(&self) -> impl Iterator<Item = &A> {
197228
self.edges.iter()
198229
}
199230
}
200231

201-
/// `Bins` is a sorted collection of non-overlapping
202-
/// 1-dimensional intervals.
232+
/// A sorted collection of non-overlapping 1-dimensional intervals.
203233
///
204-
/// All intervals are left-inclusive and right-exclusive.
234+
/// **Note** that all intervals are left-closed and right-open.
205235
///
206-
/// # Example:
236+
/// # Examples
207237
///
208238
/// ```
209239
/// use ndarray_stats::histogram::{Edges, Bins};
@@ -228,16 +258,17 @@ pub struct Bins<A: Ord> {
228258
}
229259

230260
impl<A: Ord> Bins<A> {
231-
/// Given a collection of [`Edges`], it returns the corresponding `Bins` instance.
261+
/// Returns a `Bins` instance where each bin corresponds to two consecutive members of the given
262+
/// [`Edges`], consuming the edges.
232263
///
233264
/// [`Edges`]: struct.Edges.html
234265
pub fn new(edges: Edges<A>) -> Self {
235266
Bins { edges }
236267
}
237268

238-
/// Returns the number of bins.
269+
/// Returns the number of bins in `self`.
239270
///
240-
/// # Example:
271+
/// # Examples
241272
///
242273
/// ```
243274
/// use ndarray_stats::histogram::{Edges, Bins};
@@ -257,70 +288,89 @@ impl<A: Ord> Bins<A> {
257288
}
258289
}
259290

260-
/// Returns `true` if the number of bins is zero, or in other words, if the
261-
/// number of edges is 0 or 1.
291+
/// Returns `true` if the number of bins is zero, i.e. if the number of edges is 0 or 1.
262292
///
263-
/// # Example:
293+
/// # Examples
264294
///
265295
/// ```
266296
/// use ndarray_stats::histogram::{Edges, Bins};
267297
/// use noisy_float::types::{N64, n64};
268298
///
299+
/// // At least 2 edges is needed to represent 1 interval
300+
/// let edges = Edges::from(vec![n64(0.), n64(1.), n64(3.)]);
301+
/// let bins = Bins::new(edges);
302+
/// assert_eq!(bins.is_empty(), false);
303+
///
304+
/// // No valid interval == Empty
269305
/// let edges = Edges::<N64>::from(vec![]);
270306
/// let bins = Bins::new(edges);
271307
/// assert_eq!(bins.is_empty(), true);
272308
/// let edges = Edges::from(vec![n64(0.)]);
273309
/// let bins = Bins::new(edges);
274310
/// assert_eq!(bins.is_empty(), true);
275-
/// let edges = Edges::from(vec![n64(0.), n64(1.), n64(3.)]);
276-
/// let bins = Bins::new(edges);
277-
/// assert_eq!(bins.is_empty(), false);
278311
/// ```
279312
pub fn is_empty(&self) -> bool {
280313
self.len() == 0
281314
}
282315

283-
/// Given `value`, it returns:
284-
/// - `Some(i)`, if the `i`-th bin in `self` contains `value`;
285-
/// - `None`, if `value` does not belong to any of the bins in `self`.
316+
/// Returns the index of the bin in `self` that contains the given `value`,
317+
/// or returns `None` if `value` does not belong to any bins in `self`.
286318
///
287-
/// # Example:
319+
/// # Examples
320+
///
321+
/// Basic usage:
288322
///
289323
/// ```
290324
/// use ndarray_stats::histogram::{Edges, Bins};
291325
///
292326
/// let edges = Edges::from(vec![0, 2, 4, 6]);
293327
/// let bins = Bins::new(edges);
294328
/// let value = 1;
329+
/// // The first bin [0, 2) contains `1`
295330
/// assert_eq!(
296331
/// bins.index_of(&1),
297332
/// Some(0)
298333
/// );
334+
/// // No bin contains 100
335+
/// assert_eq!(
336+
/// bins.index_of(&100),
337+
/// None
338+
/// )
339+
/// ```
340+
///
341+
/// Chaining [`Bins::index`] and [`Bins::index_of`] to get the boundaries of the bin containing
342+
/// the value:
343+
///
344+
/// ```
345+
/// # use ndarray_stats::histogram::{Edges, Bins};
346+
/// # let edges = Edges::from(vec![0, 2, 4, 6]);
347+
/// # let bins = Bins::new(edges);
348+
/// # let value = 1;
299349
/// assert_eq!(
300-
/// bins.index(bins.index_of(&1).unwrap()),
301-
/// 0..2
350+
/// // using `Option::map` to avoid panic on index out-of-bounds
351+
/// bins.index_of(&1).map(|i| bins.index(i)),
352+
/// Some(0..2)
302353
/// );
303354
/// ```
304355
pub fn index_of(&self, value: &A) -> Option<usize> {
305356
self.edges.indices_of(value).map(|t| t.0)
306357
}
307358

308-
/// Given `value`, it returns:
309-
/// - `Some(left_edge..right_edge)`, if there exists a bin in `self` such that
310-
/// `left_edge <= value < right_edge`;
311-
/// - `None`, otherwise.
359+
/// Returns a range as the bin which contains the given `value`, or returns `None` otherwise.
312360
///
313-
/// # Example:
361+
/// # Examples
314362
///
315363
/// ```
316364
/// use ndarray_stats::histogram::{Edges, Bins};
317365
///
318366
/// let edges = Edges::from(vec![0, 2, 4, 6]);
319367
/// let bins = Bins::new(edges);
368+
/// // [0, 2) contains `1`
320369
/// assert_eq!(
321370
/// bins.range_of(&1),
322371
/// Some(0..2)
323372
/// );
373+
/// // `10` is not in any interval
324374
/// assert_eq!(
325375
/// bins.range_of(&10),
326376
/// None
@@ -337,11 +387,13 @@ impl<A: Ord> Bins<A> {
337387
})
338388
}
339389

340-
/// Get the `i`-th bin.
390+
/// Returns a range as the bin at the given `index` position.
391+
///
392+
/// # Panics
341393
///
342-
/// **Panics** if `index` is out of bounds.
394+
/// Panics if `index` is out of bounds.
343395
///
344-
/// # Example:
396+
/// # Examples
345397
///
346398
/// ```
347399
/// use ndarray_stats::histogram::{Edges, Bins};
@@ -401,7 +453,7 @@ mod edges_tests {
401453
}
402454

403455
#[quickcheck]
404-
fn edges_are_right_exclusive(v: Vec<i32>) -> bool {
456+
fn edges_are_right_open(v: Vec<i32>) -> bool {
405457
let edges = Edges::from(v);
406458
let view = edges.as_array_view();
407459
if view.len() == 0 {
@@ -413,7 +465,7 @@ mod edges_tests {
413465
}
414466

415467
#[quickcheck]
416-
fn edges_are_left_inclusive(v: Vec<i32>) -> bool {
468+
fn edges_are_left_closed(v: Vec<i32>) -> bool {
417469
let edges = Edges::from(v);
418470
match edges.len() {
419471
1 => true,
@@ -445,7 +497,7 @@ mod bins_tests {
445497

446498
#[test]
447499
#[should_panic]
448-
fn get_panics_for_out_of_bound_indexes() {
500+
fn get_panics_for_out_of_bounds_indexes() {
449501
let edges = Edges::from(vec![0]);
450502
let bins = Bins::new(edges);
451503
// we need at least two edges to make a valid bin!

0 commit comments

Comments
 (0)