From 62e8b020e9eb8e1abcfd6f42305624ac185366c7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 15 Jan 2019 06:58:01 +0000 Subject: [PATCH 01/44] Added definition of central order moment --- src/summary_statistics/mod.rs | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index ae05e709..940e695f 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -6,9 +6,9 @@ use std::ops::{Add, Div}; /// Extension trait for `ArrayBase` providing methods /// to compute several summary statistics (e.g. mean, variance, etc.). pub trait SummaryStatisticsExt - where - S: Data, - D: Dimension, +where + S: Data, + D: Dimension, { /// Returns the [`arithmetic mean`] x̅ of all elements in the array: /// @@ -24,8 +24,8 @@ pub trait SummaryStatisticsExt /// /// [`arithmetic mean`]: https://en.wikipedia.org/wiki/Arithmetic_mean fn mean(&self) -> Option - where - A: Clone + FromPrimitive + Add + Div + Zero; + where + A: Clone + FromPrimitive + Add + Div + Zero; /// Returns the [`harmonic mean`] `HM(X)` of all elements in the array: /// @@ -41,8 +41,8 @@ pub trait SummaryStatisticsExt /// /// [`harmonic mean`]: https://en.wikipedia.org/wiki/Harmonic_mean fn harmonic_mean(&self) -> Option - where - A: Float + FromPrimitive; + where + A: Float + FromPrimitive; /// Returns the [`geometric mean`] `GM(X)` of all elements in the array: /// @@ -58,8 +58,25 @@ pub trait SummaryStatisticsExt /// /// [`geometric mean`]: https://en.wikipedia.org/wiki/Geometric_mean fn geometric_mean(&self) -> Option - where - A: Float + FromPrimitive; + where + A: Float + FromPrimitive; + + /// Returns the *n*th [central order moment] of all elements in the array, μₚ: + /// + /// ```text + /// 1 n + /// μₚ = ― ∑ (xᵢ-x̅)ᵖ + /// n i=1 + /// ``` + /// + /// If the array is empty, `None` is returned. + /// + /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. + /// + /// [central order moment]: https://en.wikipedia.org/wiki/Central_moment + fn nth_central_order_moment(&self, n: usize) -> Option + where + A: Float + FromPrimitive; } From 89d6b8c114eba49646d87402dcb8fbe38ff48cd1 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 15 Jan 2019 06:59:07 +0000 Subject: [PATCH 02/44] Added stubbed implementation --- src/summary_statistics/means.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 97217c23..8c12df73 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -31,11 +31,18 @@ where } fn geometric_mean(&self) -> Option - where - A: Float + FromPrimitive, + where + A: Float + FromPrimitive, { self.map(|x| x.ln()).mean().map(|x| x.exp()) } + + fn nth_central_order_moment(&self, n: usize) -> Option + where + A: Float + FromPrimitive + { + unimplemented!() + } } #[cfg(test)] From 9a5b565d3891502954ee99ff4483facdc54675fd Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 15 Jan 2019 07:01:53 +0000 Subject: [PATCH 03/44] Add behaviour to return None if the array is empty --- src/summary_statistics/means.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 8c12df73..e02e02c8 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -41,7 +41,12 @@ where where A: Float + FromPrimitive { - unimplemented!() + let mean = self.mean(); + if mean.is_none() { + None + } else { + unimplemented!() + } } } @@ -102,4 +107,10 @@ mod tests { abs_diff_eq!(a.harmonic_mean().unwrap(), expected_harmonic_mean, epsilon = f64::EPSILON); abs_diff_eq!(a.geometric_mean().unwrap(), expected_geometric_mean, epsilon = f64::EPSILON); } + + #[test] + fn test_central_order_moment_with_empty_array_of_floats() { + let a: Array1 = array![]; + assert!(a.nth_central_order_moment(1).is_none()); + } } From d9826e5c573e0838200bcd88931d4fa520b1cfce Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 15 Jan 2019 07:10:35 +0000 Subject: [PATCH 04/44] Handle edge cases --- src/summary_statistics/means.rs | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index e02e02c8..b9dc2be7 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -45,7 +45,11 @@ where if mean.is_none() { None } else { - unimplemented!() + match n { + 0 => Some(A::one()), + 1 => Some(A::zero()), + n => unimplemented!() + } } } } @@ -56,7 +60,9 @@ mod tests { use std::f64; use approx::abs_diff_eq; use noisy_float::types::N64; - use ndarray::{array, Array1}; + use ndarray::{array, Array, Array1}; + use ndarray_rand::RandomExt; + use rand::distributions::Uniform; #[test] fn test_means_with_nan_values() { @@ -113,4 +119,16 @@ mod tests { let a: Array1 = array![]; assert!(a.nth_central_order_moment(1).is_none()); } + + #[test] + fn test_zeroth_central_order_moment_is_one() { + let a: Array1 = array![]; + let n = 50; + let bound: f64 = 200.; + let a = Array::random( + n, + Uniform::new(-bound.abs(), bound.abs()) + ); + assert_eq!(a.nth_central_order_moment(0).unwrap(), 1.); + } } From 316a0e5422495ab357500ca6212b735da5cb45b7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 15 Jan 2019 07:11:14 +0000 Subject: [PATCH 05/44] Added test --- src/summary_statistics/means.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index b9dc2be7..d01c1147 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -131,4 +131,16 @@ mod tests { ); assert_eq!(a.nth_central_order_moment(0).unwrap(), 1.); } + + #[test] + fn test_first_central_order_moment_is_zero() { + let a: Array1 = array![]; + let n = 50; + let bound: f64 = 200.; + let a = Array::random( + n, + Uniform::new(-bound.abs(), bound.abs()) + ); + assert_eq!(a.nth_central_order_moment(1).unwrap(), 0.); + } } From 00782d80b7f14b9c5d3ab909813e0001e433b680 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 15 Jan 2019 07:39:01 +0000 Subject: [PATCH 06/44] First implementation completed --- src/summary_statistics/means.rs | 46 ++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index d01c1147..34528fa4 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -42,18 +42,50 @@ where A: Float + FromPrimitive { let mean = self.mean(); - if mean.is_none() { - None - } else { - match n { - 0 => Some(A::one()), - 1 => Some(A::zero()), - n => unimplemented!() + match mean { + None => None, + Some(mean) => { + match n { + 0 => Some(A::one()), + 1 => Some(A::zero()), + n => { + let n_elements = A::from_usize(self.len()). + expect("Converting number of elements to `A` must not fail"); + let shifted_array = self.map(|x| x.clone() - mean); + let correction_term = -shifted_array.sum() / n_elements; + let coefficients: Vec = (0..n).map( + |k| A::from_usize(binomial_coefficient(n, k)).unwrap() * + shifted_array.map(|x| x.powi((n - k) as i32)).sum() + ).collect(); + // Use Horner's method here + let mut result = A::zero(); + for (k, coefficient) in coefficients.iter().enumerate() { + result = result + *coefficient * correction_term.powi(k as i32); + } + Some(result) + } + } } } } } +/// Returns the binomial coefficient "n over k". +fn binomial_coefficient(n: usize, k: usize) -> usize { + // BC(n, k) = BC(n, n-k) + let k = if k > n - k { + n - k + } else { + k + }; + let mut result = 1; + for i in 0..k { + result = result * (n - i); + result = result / (i + 1); + } + result +} + #[cfg(test)] mod tests { use super::SummaryStatisticsExt; From 1d634acbde64f3f3607adc16945714e575d07c1e Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 15 Jan 2019 07:43:59 +0000 Subject: [PATCH 07/44] Added test for second order moment (variance) --- src/summary_statistics/means.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 34528fa4..c1c06af4 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -154,7 +154,6 @@ mod tests { #[test] fn test_zeroth_central_order_moment_is_one() { - let a: Array1 = array![]; let n = 50; let bound: f64 = 200.; let a = Array::random( @@ -166,7 +165,6 @@ mod tests { #[test] fn test_first_central_order_moment_is_zero() { - let a: Array1 = array![]; let n = 50; let bound: f64 = 200.; let a = Array::random( @@ -175,4 +173,21 @@ mod tests { ); assert_eq!(a.nth_central_order_moment(1).unwrap(), 0.); } + + #[test] + fn test_second_central_order_moment() { + let a: Array1 = array![ + 0.07820559, 0.5026185 , 0.80935324, 0.39384033, 0.9483038, + 0.62516215, 0.90772261, 0.87329831, 0.60267392, 0.2960298, + 0.02810356, 0.31911966, 0.86705506, 0.96884832, 0.2222465, + 0.42162446, 0.99909868, 0.47619762, 0.91696979, 0.9972741, + 0.09891734, 0.76934818, 0.77566862, 0.7692585 , 0.2235759, + 0.44821286, 0.79732186, 0.04804275, 0.87863238, 0.1111003, + 0.6653943 , 0.44386445, 0.2133176 , 0.39397086, 0.4374617, + 0.95896624, 0.57850146, 0.29301706, 0.02329879, 0.2123203, + 0.62005503, 0.996492 , 0.5342986 , 0.97822099, 0.5028445, + 0.6693834 , 0.14256682, 0.52724704, 0.73482372, 0.1809703, + ]; + assert_eq!(a.nth_central_order_moment(2).unwrap(), 0.09339920262960291); + } } From 05bf6834f5be5d03aae8f941f99c57f67ba05192 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 15 Jan 2019 08:15:16 +0000 Subject: [PATCH 08/44] Implemented test and renamed to central_moment --- src/summary_statistics/means.rs | 25 ++++++++++++++++--------- src/summary_statistics/mod.rs | 6 +++--- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index c1c06af4..8705d2af 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -37,7 +37,7 @@ where self.map(|x| x.ln()).mean().map(|x| x.exp()) } - fn nth_central_order_moment(&self, n: usize) -> Option + fn central_moment(&self, order: usize) -> Option where A: Float + FromPrimitive { @@ -45,7 +45,7 @@ where match mean { None => None, Some(mean) => { - match n { + match order { 0 => Some(A::one()), 1 => Some(A::zero()), n => { @@ -53,9 +53,9 @@ where expect("Converting number of elements to `A` must not fail"); let shifted_array = self.map(|x| x.clone() - mean); let correction_term = -shifted_array.sum() / n_elements; - let coefficients: Vec = (0..n).map( + let coefficients: Vec = (0..=n).map( |k| A::from_usize(binomial_coefficient(n, k)).unwrap() * - shifted_array.map(|x| x.powi((n - k) as i32)).sum() + shifted_array.map(|x| x.powi((n - k) as i32)).sum() / n_elements ).collect(); // Use Horner's method here let mut result = A::zero(); @@ -149,7 +149,7 @@ mod tests { #[test] fn test_central_order_moment_with_empty_array_of_floats() { let a: Array1 = array![]; - assert!(a.nth_central_order_moment(1).is_none()); + assert!(a.central_moment(1).is_none()); } #[test] @@ -160,7 +160,7 @@ mod tests { n, Uniform::new(-bound.abs(), bound.abs()) ); - assert_eq!(a.nth_central_order_moment(0).unwrap(), 1.); + assert_eq!(a.central_moment(0).unwrap(), 1.); } #[test] @@ -171,11 +171,11 @@ mod tests { n, Uniform::new(-bound.abs(), bound.abs()) ); - assert_eq!(a.nth_central_order_moment(1).unwrap(), 0.); + assert_eq!(a.central_moment(1).unwrap(), 0.); } #[test] - fn test_second_central_order_moment() { + fn test_central_order_moments() { let a: Array1 = array![ 0.07820559, 0.5026185 , 0.80935324, 0.39384033, 0.9483038, 0.62516215, 0.90772261, 0.87329831, 0.60267392, 0.2960298, @@ -188,6 +188,13 @@ mod tests { 0.62005503, 0.996492 , 0.5342986 , 0.97822099, 0.5028445, 0.6693834 , 0.14256682, 0.52724704, 0.73482372, 0.1809703, ]; - assert_eq!(a.nth_central_order_moment(2).unwrap(), 0.09339920262960291); + // Computed using scipy.stats.moment + let expected_moments = vec![ + 1., 0., 0.09339920262960291, -0.0026849636727735186, + 0.015403769257729755, -0.001204176487006564, 0.002976822584939186, + ]; + for (order, expected_moment) in expected_moments.iter().enumerate() { + abs_diff_eq!(a.central_moment(order).unwrap(), expected_moment, epsilon = f64::EPSILON); + } } } diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index 940e695f..42dd0a80 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -61,7 +61,7 @@ where where A: Float + FromPrimitive; - /// Returns the *n*th [central order moment] of all elements in the array, μₚ: + /// Returns the *n*-th [central moment] of all elements in the array, μₚ: /// /// ```text /// 1 n @@ -73,8 +73,8 @@ where /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. /// - /// [central order moment]: https://en.wikipedia.org/wiki/Central_moment - fn nth_central_order_moment(&self, n: usize) -> Option + /// [central moment]: https://en.wikipedia.org/wiki/Central_moment + fn central_moment(&self, order: usize) -> Option where A: Float + FromPrimitive; From 5921dd2383f116b84fa8549e4ada54e6c3883ab9 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 15 Jan 2019 08:30:16 +0000 Subject: [PATCH 09/44] Using Horner's method for evaluating polynomials --- src/summary_statistics/means.rs | 44 ++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 8705d2af..b24a24a1 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -57,12 +57,7 @@ where |k| A::from_usize(binomial_coefficient(n, k)).unwrap() * shifted_array.map(|x| x.powi((n - k) as i32)).sum() / n_elements ).collect(); - // Use Horner's method here - let mut result = A::zero(); - for (k, coefficient) in coefficients.iter().enumerate() { - result = result + *coefficient * correction_term.powi(k as i32); - } - Some(result) + Some(horner_method(coefficients, correction_term)) } } } @@ -71,13 +66,17 @@ where } /// Returns the binomial coefficient "n over k". +/// +/// **Panics** if k > n. fn binomial_coefficient(n: usize, k: usize) -> usize { + if k > n { + panic!( + "Tried to compute the binomial coefficient of {0} over {1}, \ + but {1} is strictly greater than {0}!" + ) + } // BC(n, k) = BC(n, n-k) - let k = if k > n - k { - n - k - } else { - k - }; + let k = if k > n-k { n-k } else { k }; let mut result = 1; for i in 0..k { result = result * (n - i); @@ -86,6 +85,29 @@ fn binomial_coefficient(n: usize, k: usize) -> usize { result } +/// Uses [Horner's method] to evaluate a polynomial with a single indeterminate. +/// +/// Coefficients are expected to be sorted by ascending order +/// with respect to the indeterminate's exponent. +/// +/// If the array is empty, `A::zero()` is returned. +/// +/// Horner's method can evaluate a polynomial of order *n* with a single indeterminate +/// using only *n-1* multiplications and *n-1* sums - in terms of number of operations, +/// this is an optimal algorithm for polynomial evaluation. +/// +/// [Horner's method]: https://en.wikipedia.org/wiki/Horner%27s_method +fn horner_method(coefficients: Vec, indeterminate: A) -> A +where + A: Float, +{ + let mut result = A::zero(); + for coefficient in coefficients.into_iter().rev() { + result = coefficient + indeterminate * result + } + result +} + #[cfg(test)] mod tests { use super::SummaryStatisticsExt; From c04e295822ec64acdd1398cc083c79413e1cb7b2 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 15 Jan 2019 08:33:27 +0000 Subject: [PATCH 10/44] Add algorithm notes to the docs --- src/summary_statistics/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index 42dd0a80..1b6b94a4 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -71,9 +71,13 @@ where /// /// If the array is empty, `None` is returned. /// + /// The *n*-th central moment is computed using a corrected two-pass algorithm (see Section 3.5 + /// in [Pébay et al., 2016]). + /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. /// /// [central moment]: https://en.wikipedia.org/wiki/Central_moment + /// [Pébay et al., 2016]: https://www.osti.gov/pages/servlets/purl/1427275 fn central_moment(&self, order: usize) -> Option where A: Float + FromPrimitive; From 9cddde5a5c477f0273132930fa80b77e37c50073 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 15 Jan 2019 08:40:38 +0000 Subject: [PATCH 11/44] Added algorithmic complexity to the docs --- src/summary_statistics/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index 1b6b94a4..a3f6adf1 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -72,7 +72,7 @@ where /// If the array is empty, `None` is returned. /// /// The *n*-th central moment is computed using a corrected two-pass algorithm (see Section 3.5 - /// in [Pébay et al., 2016]). + /// in [Pébay et al., 2016]). Complexity is *O(N(p+1))* when *N >> p*, *p > 1*. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. /// @@ -81,7 +81,6 @@ where fn central_moment(&self, order: usize) -> Option where A: Float + FromPrimitive; - } mod means; From 336a51a1142d5906740bec4bbe995a6d078fb971 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 15 Jan 2019 08:46:23 +0000 Subject: [PATCH 12/44] Added two optimizations --- src/summary_statistics/means.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index b24a24a1..a26fdec1 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -53,10 +53,21 @@ where expect("Converting number of elements to `A` must not fail"); let shifted_array = self.map(|x| x.clone() - mean); let correction_term = -shifted_array.sum() / n_elements; - let coefficients: Vec = (0..=n).map( + + let mut coefficients: Vec = (0..n-1).map( |k| A::from_usize(binomial_coefficient(n, k)).unwrap() * shifted_array.map(|x| x.powi((n - k) as i32)).sum() / n_elements ).collect(); + // When k=n-1, we can reuse the already computed correction_term, by + // flipping its sign + coefficients.push( + A::from_usize(binomial_coefficient(n, n-1)).unwrap() * + (-correction_term) + ); + // When k=n, we are raising each element to the 0th power + // No need to waste CPU cycles going through the array + coefficients.push(A::one()); + Some(horner_method(coefficients, correction_term)) } } From 7f012af570f00b665bc5f00324c49116f85de1aa Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 18 Jan 2019 22:28:17 +0000 Subject: [PATCH 13/44] Added signature for bulk central moment computation --- src/summary_statistics/means.rs | 7 +++++++ src/summary_statistics/mod.rs | 23 ++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index a26fdec1..db234834 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -74,6 +74,13 @@ where } } } + + fn central_moments(&self, order: usize) -> Option> + where + A: Float + FromPrimitive + { + unimplemented!() + } } /// Returns the binomial coefficient "n over k". diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index a3f6adf1..59d2a56d 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -61,7 +61,7 @@ where where A: Float + FromPrimitive; - /// Returns the *n*-th [central moment] of all elements in the array, μₚ: + /// Returns the *p*-th [central moment] of all elements in the array, μₚ: /// /// ```text /// 1 n @@ -71,8 +71,8 @@ where /// /// If the array is empty, `None` is returned. /// - /// The *n*-th central moment is computed using a corrected two-pass algorithm (see Section 3.5 - /// in [Pébay et al., 2016]). Complexity is *O(N(p+1))* when *N >> p*, *p > 1*. + /// The *p*-th central moment is computed using a corrected two-pass algorithm (see Section 3.5 + /// in [Pébay et al., 2016]). Complexity is *O(np)* when *n >> p*, *p > 1*. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. /// @@ -81,6 +81,23 @@ where fn central_moment(&self, order: usize) -> Option where A: Float + FromPrimitive; + + /// Returns the first *p* [central moments] of all elements in the array, see [central moment] + /// for more details. + /// + /// If the array is empty, `None` is returned. + /// + /// This method reuses the intermediate steps for the *k*-th moment to compute the *(k+1)*-th, + /// being thus more efficient than repeated calls to [central moment] if the computation + /// of central moments of multiple orders is required. + /// + /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. + /// + /// [central moments]: https://en.wikipedia.org/wiki/Central_moment + /// [central moment]: + fn central_moments(&self, order: usize) -> Option + where + A: Float + FromPrimitive; } mod means; From cfd4dbea976dbb4bd890be03c87fdd4369a5c3aa Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 18 Jan 2019 22:28:46 +0000 Subject: [PATCH 14/44] Fixed trait signature --- src/summary_statistics/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index 59d2a56d..7d32f9c0 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -95,7 +95,7 @@ where /// /// [central moments]: https://en.wikipedia.org/wiki/Central_moment /// [central moment]: - fn central_moments(&self, order: usize) -> Option + fn central_moments(&self, order: usize) -> Option> where A: Float + FromPrimitive; } From 2517b4d55d434f5dba966e0e51b773cfb656e726 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 18 Jan 2019 22:29:02 +0000 Subject: [PATCH 15/44] Indent --- src/summary_statistics/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index 7d32f9c0..dbdb11aa 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -96,8 +96,8 @@ where /// [central moments]: https://en.wikipedia.org/wiki/Central_moment /// [central moment]: fn central_moments(&self, order: usize) -> Option> - where - A: Float + FromPrimitive; + where + A: Float + FromPrimitive; } mod means; From b5a520c28d1e135f64c0439e3dfe6601b56b7b79 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 18 Jan 2019 23:15:48 +0000 Subject: [PATCH 16/44] Factored out logic from central_moment --- src/summary_statistics/means.rs | 71 +++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 17 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index db234834..61e17d94 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -49,25 +49,11 @@ where 0 => Some(A::one()), 1 => Some(A::zero()), n => { - let n_elements = A::from_usize(self.len()). - expect("Converting number of elements to `A` must not fail"); let shifted_array = self.map(|x| x.clone() - mean); - let correction_term = -shifted_array.sum() / n_elements; - - let mut coefficients: Vec = (0..n-1).map( - |k| A::from_usize(binomial_coefficient(n, k)).unwrap() * - shifted_array.map(|x| x.powi((n - k) as i32)).sum() / n_elements - ).collect(); - // When k=n-1, we can reuse the already computed correction_term, by - // flipping its sign - coefficients.push( - A::from_usize(binomial_coefficient(n, n-1)).unwrap() * - (-correction_term) - ); - // When k=n, we are raising each element to the 0th power - // No need to waste CPU cycles going through the array - coefficients.push(A::one()); + let shifted_moments = moments(shifted_array, n); + let correction_term = -shifted_moments[1].clone(); + let coefficients = central_moment_coefficients(&shifted_moments); Some(horner_method(coefficients, correction_term)) } } @@ -83,6 +69,57 @@ where } } +/// Returns a vector containing all moments of the array elements up to +/// *order*, where the *p*-th moment is defined as: +/// +/// ```text +/// 1 n +/// ― ∑ xᵢᵖ +/// n i=1 +/// ``` +/// +/// The returned moments are ordered by power magnitude: 0th moment, 1st moment, etc. +/// +/// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. +fn moments(a: ArrayBase, order: usize) -> Vec +where + A: Float + FromPrimitive, + S: Data, + D: Dimension, +{ + let n_elements = A::from_usize(a.len()) + .expect("Converting number of elements to `A` must not fail"); + + // When k=0, we are raising each element to the 0th power + // No need to waste CPU cycles going through the array + let mut moments = vec![A::one()]; + + if order >=1 { + // When k=1, we don't need to raise elements to the 1th power (identity) + moments.push(a.sum() / n_elements) + } + + for k in 2..=order { + moments.push(a.map(|x| x.powi(k as i32)).sum() / n_elements) + } + moments +} + +/// Returns the coefficients in the polynomial expression to compute the *p*th +/// central moment as a function of the sample mean. +/// +/// It takes as input all moments up to order *p*, ordered by power magnitude - *p* is +/// inferred to be the length of the *moments* array. +fn central_moment_coefficients(moments: &[A]) -> Vec +where + A: Float + FromPrimitive, +{ + let order = moments.len(); + moments.iter().rev().enumerate().map( + |(k, moment)| A::from_usize(binomial_coefficient(order, k)).unwrap() * *moment + ).collect() +} + /// Returns the binomial coefficient "n over k". /// /// **Panics** if k > n. From 7eb268f24203f4cb21ae544b11fc3edc47873693 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 18 Jan 2019 23:21:29 +0000 Subject: [PATCH 17/44] Implemented central_moments --- src/summary_statistics/means.rs | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 61e17d94..d27fcfab 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -65,7 +65,31 @@ where where A: Float + FromPrimitive { - unimplemented!() + let mean = self.mean(); + match mean { + None => None, + Some(mean) => { + match order { + 0 => Some(vec![A::one()]), + 1 => Some(vec![A::one(), A::zero()]), + n => { + // We only perform this operations once, and then reuse their + // result to compute all the required moments + let shifted_array = self.map(|x| x.clone() - mean); + let shifted_moments = moments(shifted_array, n); + let correction_term = -shifted_moments[1].clone(); + + let mut central_moments = vec![A::one(), A::zero()]; + for k in 2..=n { + let coefficients = central_moment_coefficients(&shifted_moments[..=k]); + let central_moment = horner_method(coefficients, correction_term); + central_moments.push(central_moment) + } + Some(central_moments) + } + } + } + } } } From 2e61f2efce27aedda963be4ddb895825781633e3 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 18 Jan 2019 23:25:49 +0000 Subject: [PATCH 18/44] Test implementation correctness --- src/summary_statistics/means.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index d27fcfab..d804b419 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -251,6 +251,7 @@ mod tests { fn test_central_order_moment_with_empty_array_of_floats() { let a: Array1 = array![]; assert!(a.central_moment(1).is_none()); + assert!(a.central_moments(1).is_none()); } #[test] @@ -298,4 +299,20 @@ mod tests { abs_diff_eq!(a.central_moment(order).unwrap(), expected_moment, epsilon = f64::EPSILON); } } + + #[test] + fn test_bulk_central_order_moments() { + // Test that the bulk method is coherent with the non-bulk method + let n = 50; + let bound: f64 = 200.; + let a = Array::random( + n, + Uniform::new(-bound.abs(), bound.abs()) + ); + let order = 10; + let central_moments = a.central_moments(order).unwrap(); + for i in 0..=order { + assert_eq!(a.central_moment(i).unwrap(), central_moments[i]); + } + } } From 9d37f1505ed5b42e0ccc698761e40734ea0e411f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 18 Jan 2019 23:39:23 +0000 Subject: [PATCH 19/44] Added skewness and kurtosis --- src/summary_statistics/means.rs | 20 ++++++++++++++++++++ src/summary_statistics/mod.rs | 8 ++++++++ 2 files changed, 28 insertions(+) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index d804b419..b7f7342c 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -37,6 +37,26 @@ where self.map(|x| x.ln()).mean().map(|x| x.exp()) } + fn kurtosis(&self) -> Option + where + A: Float + FromPrimitive + { + let central_moments = self.central_moments(4); + central_moments.map( + |moments| moments[4] / moments[2].powi(2) + ) + } + + fn skewness(&self) -> Option + where + A: Float + FromPrimitive + { + let central_moments = self.central_moments(4); + central_moments.map( + |moments| moments[3] / moments[2].sqrt().powi(3) + ) + } + fn central_moment(&self, order: usize) -> Option where A: Float + FromPrimitive diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index dbdb11aa..4e0ac1e7 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -61,6 +61,14 @@ where where A: Float + FromPrimitive; + fn kurtosis(&self) -> Option + where + A: Float + FromPrimitive; + + fn skewness(&self) -> Option + where + A: Float + FromPrimitive; + /// Returns the *p*-th [central moment] of all elements in the array, μₚ: /// /// ```text From 4829ca6c5d4f43f78ee53c026a89082848e96ca7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 18 Jan 2019 23:47:03 +0000 Subject: [PATCH 20/44] Added docs --- src/summary_statistics/mod.rs | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index 4e0ac1e7..054fb84a 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -61,10 +61,38 @@ where where A: Float + FromPrimitive; + /// Returns the [kurtosis] `Kurt[X]` of all elements in the array: + /// + /// ```text + /// Kurt[X] = μ₄ / σ⁴ + /// ``` + /// + /// where μ₄ is the fourth central moment and σ is the standard deviation of + /// the elements in the array. + /// + /// If the array is empty, `None` is returned. + /// + /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. + /// + /// [kurtosis]: https://en.wikipedia.org/wiki/Kurtosis fn kurtosis(&self) -> Option where A: Float + FromPrimitive; + /// Returns the [Pearson's moment coefficient of skewness] γ₁ of all elements in the array: + /// + /// ```text + /// γ₁ = μ₃ / σ³ + /// ``` + /// + /// where μ₃ is the third central moment and σ is the standard deviation of + /// the elements in the array. + /// + /// If the array is empty, `None` is returned. + /// + /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. + /// + /// [Pearson's moment coefficient of skewness]: https://en.wikipedia.org/wiki/Skewness fn skewness(&self) -> Option where A: Float + FromPrimitive; @@ -102,7 +130,7 @@ where /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. /// /// [central moments]: https://en.wikipedia.org/wiki/Central_moment - /// [central moment]: + /// [central moment]: #tymethod.central_moment fn central_moments(&self, order: usize) -> Option> where A: Float + FromPrimitive; From c748c8e91e92c58dc7d22109c7d9a580bd606349 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 18 Jan 2019 23:53:01 +0000 Subject: [PATCH 21/44] Added tests for kurtosis and skewness --- src/summary_statistics/means.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index b7f7342c..80a437e6 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -335,4 +335,37 @@ mod tests { assert_eq!(a.central_moment(i).unwrap(), central_moments[i]); } } + + #[test] + fn test_kurtosis_and_skewness_is_none_with_empty_array_of_floats() { + let a: Array1 = array![]; + assert!(a.skewness().is_none()); + assert!(a.kurtosis().is_none()); + } + + #[test] + fn test_kurtosis_and_skewness() { + let a: Array1 = array![ + 0.33310096, 0.98757449, 0.9789796 , 0.96738114, 0.43545674, + 0.06746873, 0.23706562, 0.04241815, 0.38961714, 0.52421271, + 0.93430327, 0.33911604, 0.05112372, 0.5013455 , 0.05291507, + 0.62511183, 0.20749633, 0.22132433, 0.14734804, 0.51960608, + 0.00449208, 0.4093339 , 0.2237519 , 0.28070469, 0.7887231 , + 0.92224523, 0.43454188, 0.18335111, 0.08646856, 0.87979847, + 0.25483457, 0.99975627, 0.52712442, 0.41163279, 0.85162594, + 0.52618733, 0.75815023, 0.30640695, 0.14205781, 0.59695813, + 0.851331 , 0.39524328, 0.73965373, 0.4007615 , 0.02133069, + 0.92899207, 0.79878191, 0.38947334, 0.22042183, 0.77768353, + ]; + // Computed using scipy.stats.kurtosis + let expected_kurtosis = -1.1780662914793822; + // Computed using scipy.stats.skew + let expected_skewness = 0.2604785422878771; + + let kurtosis = a.kurtosis().unwrap(); + let skewness = a.skewness().unwrap(); + + abs_diff_eq!(kurtosis, expected_kurtosis, epsilon = f64::EPSILON); + abs_diff_eq!(skewness, expected_skewness, epsilon = f64::EPSILON); + } } From 4bf0035f6552ebfca0a3bfcce95e8047197e5de4 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 22 Jan 2019 09:22:33 +0000 Subject: [PATCH 22/44] Fixed assertions --- src/summary_statistics/means.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 80a437e6..22a5f30b 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -211,7 +211,7 @@ where mod tests { use super::SummaryStatisticsExt; use std::f64; - use approx::abs_diff_eq; + use approx::assert_abs_diff_eq; use noisy_float::types::N64; use ndarray::{array, Array, Array1}; use ndarray_rand::RandomExt; @@ -262,9 +262,9 @@ mod tests { // Computed using SciPy let expected_geometric_mean = 0.4345897639796527; - abs_diff_eq!(a.mean().unwrap(), expected_mean, epsilon = f64::EPSILON); - abs_diff_eq!(a.harmonic_mean().unwrap(), expected_harmonic_mean, epsilon = f64::EPSILON); - abs_diff_eq!(a.geometric_mean().unwrap(), expected_geometric_mean, epsilon = f64::EPSILON); + assert_abs_diff_eq!(a.mean().unwrap(), expected_mean, epsilon = 1e-6); + assert_abs_diff_eq!(a.harmonic_mean().unwrap(), expected_harmonic_mean, epsilon = 1e-6); + assert_abs_diff_eq!(a.geometric_mean().unwrap(), expected_geometric_mean, epsilon = 1e-6); } #[test] @@ -316,7 +316,7 @@ mod tests { 0.015403769257729755, -0.001204176487006564, 0.002976822584939186, ]; for (order, expected_moment) in expected_moments.iter().enumerate() { - abs_diff_eq!(a.central_moment(order).unwrap(), expected_moment, epsilon = f64::EPSILON); + assert_abs_diff_eq!(a.central_moment(order).unwrap(), expected_moment, epsilon = 1e-6); } } @@ -365,7 +365,7 @@ mod tests { let kurtosis = a.kurtosis().unwrap(); let skewness = a.skewness().unwrap(); - abs_diff_eq!(kurtosis, expected_kurtosis, epsilon = f64::EPSILON); - abs_diff_eq!(skewness, expected_skewness, epsilon = f64::EPSILON); + assert_abs_diff_eq!(kurtosis, expected_kurtosis, epsilon = 1e-6); + assert_abs_diff_eq!(skewness, expected_skewness, epsilon = 1e-6); } } From da49a28220875b0cd66d980bf10cb68f626d3c11 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 22 Jan 2019 09:23:50 +0000 Subject: [PATCH 23/44] No need to get to order 4 for skewness --- src/summary_statistics/means.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 22a5f30b..a23d525b 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -51,7 +51,7 @@ where where A: Float + FromPrimitive { - let central_moments = self.central_moments(4); + let central_moments = self.central_moments(3); central_moments.map( |moments| moments[3] / moments[2].sqrt().powi(3) ) From 4a193bb610f235be5289f2810f7eac1d4e0f161f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 23 Jan 2019 08:43:10 +0000 Subject: [PATCH 24/44] Fixed kurtosis test --- src/summary_statistics/means.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index a23d525b..29dc7084 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -357,8 +357,8 @@ mod tests { 0.851331 , 0.39524328, 0.73965373, 0.4007615 , 0.02133069, 0.92899207, 0.79878191, 0.38947334, 0.22042183, 0.77768353, ]; - // Computed using scipy.stats.kurtosis - let expected_kurtosis = -1.1780662914793822; + // Computed using scipy.stats.kurtosis(a, fisher=False) + let expected_kurtosis = 1.821933711687523; // Computed using scipy.stats.skew let expected_skewness = 0.2604785422878771; From 5fe1fe873b046fb7eab7b108886d9da311a246ff Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Wed, 23 Jan 2019 08:47:56 +0000 Subject: [PATCH 25/44] Enriched docs --- src/summary_statistics/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index 054fb84a..9daed02a 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -70,6 +70,9 @@ where /// where μ₄ is the fourth central moment and σ is the standard deviation of /// the elements in the array. /// + /// This is sometimes referred to as _Pearson's kurtosis_. Fisher's kurtosis can be + /// computed by subtracting 3 from Pearson's kurtosis. + /// /// If the array is empty, `None` is returned. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. From 36428c75bd8086ce9bfc264e7c86f2c315291b41 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 26 Mar 2019 22:15:51 +0000 Subject: [PATCH 26/44] Fmt --- src/correlation.rs | 96 ++++++++---------- src/histogram/bins.rs | 35 +++---- src/histogram/grid.rs | 29 ++++-- src/histogram/histograms.rs | 25 +++-- src/histogram/mod.rs | 10 +- src/histogram/strategies.rs | 86 ++++++++-------- src/lib.rs | 17 ++-- src/quantile.rs | 9 +- src/summary_statistics/means.rs | 172 ++++++++++++++++---------------- src/summary_statistics/mod.rs | 4 +- 10 files changed, 233 insertions(+), 250 deletions(-) diff --git a/src/correlation.rs b/src/correlation.rs index ec3a1030..200bbc45 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -131,14 +131,15 @@ where { let observation_axis = Axis(1); let n_observations = A::from_usize(self.len_of(observation_axis)).unwrap(); - let dof = - if ddof >= n_observations { - panic!("`ddof` needs to be strictly smaller than the \ - number of observations provided for each \ - random variable!") - } else { - n_observations - ddof - }; + let dof = if ddof >= n_observations { + panic!( + "`ddof` needs to be strictly smaller than the \ + number of observations provided for each \ + random variable!" + ) + } else { + n_observations - ddof + }; let mean = self.mean_axis(observation_axis); let denoised = self - &mean.insert_axis(observation_axis); let covariance = denoised.dot(&denoised.t()); @@ -156,7 +157,9 @@ where // observation per random variable (or no observations at all) let ddof = -A::one(); let cov = self.cov(ddof); - let std = self.std_axis(observation_axis, ddof).insert_axis(observation_axis); + let std = self + .std_axis(observation_axis, ddof) + .insert_axis(observation_axis); let std_matrix = std.dot(&std.t()); // element-wise division cov / std_matrix @@ -167,10 +170,10 @@ where mod cov_tests { use super::*; use ndarray::array; + use ndarray_rand::RandomExt; use quickcheck::quickcheck; use rand; use rand::distributions::Uniform; - use ndarray_rand::RandomExt; quickcheck! { fn constant_random_variables_have_zero_covariance_matrix(value: f64) -> bool { @@ -200,10 +203,7 @@ mod cov_tests { fn test_invalid_ddof() { let n_random_variables = 3; let n_observations = 4; - let a = Array::random( - (n_random_variables, n_observations), - Uniform::new(0., 10.) - ); + let a = Array::random((n_random_variables, n_observations), Uniform::new(0., 10.)); let invalid_ddof = (n_observations as f64) + rand::random::().abs(); a.cov(invalid_ddof); } @@ -235,45 +235,36 @@ mod cov_tests { #[test] fn test_covariance_for_random_array() { let a = array![ - [ 0.72009497, 0.12568055, 0.55705966, 0.5959984 , 0.69471457], - [ 0.56717131, 0.47619486, 0.21526298, 0.88915366, 0.91971245], - [ 0.59044195, 0.10720363, 0.76573717, 0.54693675, 0.95923036], - [ 0.24102952, 0.131347, 0.11118028, 0.21451351, 0.30515539], - [ 0.26952473, 0.93079841, 0.8080893 , 0.42814155, 0.24642258] + [0.72009497, 0.12568055, 0.55705966, 0.5959984, 0.69471457], + [0.56717131, 0.47619486, 0.21526298, 0.88915366, 0.91971245], + [0.59044195, 0.10720363, 0.76573717, 0.54693675, 0.95923036], + [0.24102952, 0.131347, 0.11118028, 0.21451351, 0.30515539], + [0.26952473, 0.93079841, 0.8080893, 0.42814155, 0.24642258] ]; let numpy_covariance = array![ - [ 0.05786248, 0.02614063, 0.06446215, 0.01285105, -0.06443992], - [ 0.02614063, 0.08733569, 0.02436933, 0.01977437, -0.06715555], - [ 0.06446215, 0.02436933, 0.10052129, 0.01393589, -0.06129912], - [ 0.01285105, 0.01977437, 0.01393589, 0.00638795, -0.02355557], - [-0.06443992, -0.06715555, -0.06129912, -0.02355557, 0.09909855] + [0.05786248, 0.02614063, 0.06446215, 0.01285105, -0.06443992], + [0.02614063, 0.08733569, 0.02436933, 0.01977437, -0.06715555], + [0.06446215, 0.02436933, 0.10052129, 0.01393589, -0.06129912], + [0.01285105, 0.01977437, 0.01393589, 0.00638795, -0.02355557], + [ + -0.06443992, + -0.06715555, + -0.06129912, + -0.02355557, + 0.09909855 + ] ]; assert_eq!(a.ndim(), 2); - assert!( - a.cov(1.).all_close( - &numpy_covariance, - 1e-8 - ) - ); + assert!(a.cov(1.).all_close(&numpy_covariance, 1e-8)); } #[test] #[should_panic] // We lose precision, hence the failing assert fn test_covariance_for_badly_conditioned_array() { - let a: Array2 = array![ - [ 1e12 + 1., 1e12 - 1.], - [ 1e-6 + 1e-12, 1e-6 - 1e-12], - ]; - let expected_covariance = array![ - [2., 2e-12], [2e-12, 2e-24] - ]; - assert!( - a.cov(1.).all_close( - &expected_covariance, - 1e-24 - ) - ); + let a: Array2 = array![[1e12 + 1., 1e12 - 1.], [1e-6 + 1e-12, 1e-6 - 1e-12],]; + let expected_covariance = array![[2., 2e-12], [2e-12, 2e-24]]; + assert!(a.cov(1.).all_close(&expected_covariance, 1e-24)); } } @@ -281,9 +272,9 @@ mod cov_tests { mod pearson_correlation_tests { use super::*; use ndarray::array; + use ndarray_rand::RandomExt; use quickcheck::quickcheck; use rand::distributions::Uniform; - use ndarray_rand::RandomExt; quickcheck! { fn output_matrix_is_symmetric(bound: f64) -> bool { @@ -337,19 +328,14 @@ mod pearson_correlation_tests { [0.26979716, 0.20887228, 0.95454999, 0.96290785] ]; let numpy_corrcoeff = array![ - [ 1. , 0.38089376, 0.08122504, -0.59931623, 0.1365648 ], - [ 0.38089376, 1. , 0.80918429, -0.52615195, 0.38954398], - [ 0.08122504, 0.80918429, 1. , 0.07134906, -0.17324776], - [-0.59931623, -0.52615195, 0.07134906, 1. , -0.8743213 ], - [ 0.1365648 , 0.38954398, -0.17324776, -0.8743213 , 1. ] + [1., 0.38089376, 0.08122504, -0.59931623, 0.1365648], + [0.38089376, 1., 0.80918429, -0.52615195, 0.38954398], + [0.08122504, 0.80918429, 1., 0.07134906, -0.17324776], + [-0.59931623, -0.52615195, 0.07134906, 1., -0.8743213], + [0.1365648, 0.38954398, -0.17324776, -0.8743213, 1.] ]; assert_eq!(a.ndim(), 2); - assert!( - a.pearson_correlation().all_close( - &numpy_corrcoeff, - 1e-7 - ) - ); + assert!(a.pearson_correlation().all_close(&numpy_corrcoeff, 1e-7)); } } diff --git a/src/histogram/bins.rs b/src/histogram/bins.rs index e0c3a1ea..3a83eae9 100644 --- a/src/histogram/bins.rs +++ b/src/histogram/bins.rs @@ -33,7 +33,6 @@ pub struct Edges { } impl From> for Edges { - /// Get an `Edges` instance from a `Vec`: /// the vector will be sorted in increasing order /// using an unstable sorting algorithm and duplicates @@ -89,7 +88,7 @@ impl From> for Edges { } } -impl Index for Edges{ +impl Index for Edges { type Output = A; /// Get the `i`-th edge. @@ -182,13 +181,11 @@ impl Edges { match self.edges.binary_search(value) { Ok(i) if i == n_edges - 1 => None, Ok(i) => Some((i, i + 1)), - Err(i) => { - match i { - 0 => None, - j if j == n_edges => None, - j => Some((j - 1, j)), - } - } + Err(i) => match i { + 0 => None, + j if j == n_edges => None, + j => Some((j - 1, j)), + }, } } @@ -309,18 +306,14 @@ impl Bins { /// ); /// ``` pub fn range_of(&self, value: &A) -> Option> - where - A: Clone, + where + A: Clone, { let edges_indexes = self.edges.indices_of(value); - edges_indexes.map( - |(left, right)| { - Range { - start: self.edges[left].clone(), - end: self.edges[right].clone(), - } - } - ) + edges_indexes.map(|(left, right)| Range { + start: self.edges[left].clone(), + end: self.edges[right].clone(), + }) } /// Get the `i`-th bin. @@ -341,7 +334,7 @@ impl Bins { /// ); /// ``` pub fn index(&self, index: usize) -> Range - where + where A: Clone, { // It was not possible to implement this functionality @@ -350,7 +343,7 @@ impl Bins { // Index, in fact, forces you to return a reference. Range { start: self.edges[index].clone(), - end: self.edges[index+1].clone(), + end: self.edges[index + 1].clone(), } } } diff --git a/src/histogram/grid.rs b/src/histogram/grid.rs index 32a7161b..bfa5afc1 100644 --- a/src/histogram/grid.rs +++ b/src/histogram/grid.rs @@ -1,8 +1,8 @@ use super::bins::Bins; use super::strategies::BinsBuildingStrategy; -use std::ops::Range; use itertools::izip; -use ndarray::{ArrayBase, Data, Ix1, Ix2, Axis}; +use ndarray::{ArrayBase, Axis, Data, Ix1, Ix2}; +use std::ops::Range; /// A `Grid` is a partition of a rectangular region of an *n*-dimensional /// space—e.g. [*a*0, *b*0) × ⋯ × [*a**n*−1, @@ -72,7 +72,6 @@ pub struct Grid { } impl From>> for Grid { - /// Get a `Grid` instance from a `Vec>`. /// /// The `i`-th element in `Vec>` represents the 1-dimensional @@ -113,9 +112,14 @@ impl Grid { where S: Data, { - assert_eq!(point.len(), self.ndim(), - "Dimension mismatch: the point has {:?} dimensions, the grid \ - expected {:?} dimensions.", point.len(), self.ndim()); + assert_eq!( + point.len(), + self.ndim(), + "Dimension mismatch: the point has {:?} dimensions, the grid \ + expected {:?} dimensions.", + point.len(), + self.ndim() + ); point .iter() .zip(self.projections.iter()) @@ -132,9 +136,14 @@ impl Grid { /// **Panics** if at least one among `(i_0, ..., i_{n-1})` is out of bounds on the respective /// coordinate axis - i.e. if there exists `j` such that `i_j >= self.projections[j].len()`. pub fn index(&self, index: &[usize]) -> Vec> { - assert_eq!(index.len(), self.ndim(), - "Dimension mismatch: the index has {0:?} dimensions, the grid \ - expected {1:?} dimensions.", index.len(), self.ndim()); + assert_eq!( + index.len(), + self.ndim(), + "Dimension mismatch: the index has {0:?} dimensions, the grid \ + expected {1:?} dimensions.", + index.len(), + self.ndim() + ); izip!(&self.projections, index) .map(|(bins, &i)| bins.index(i)) .collect() @@ -164,7 +173,7 @@ where /// [`strategy`]: strategies/index.html pub fn from_array(array: &ArrayBase) -> Self where - S: Data, + S: Data, { let bin_builders = array .axis_iter(Axis(1)) diff --git a/src/histogram/histograms.rs b/src/histogram/histograms.rs index 825aadb7..9bfe2724 100644 --- a/src/histogram/histograms.rs +++ b/src/histogram/histograms.rs @@ -1,7 +1,7 @@ +use super::errors::BinNotFound; +use super::grid::Grid; use ndarray::prelude::*; use ndarray::Data; -use super::grid::Grid; -use super::errors::BinNotFound; /// Histogram data structure. pub struct Histogram { @@ -58,8 +58,8 @@ impl Histogram { Some(bin_index) => { self.counts[&*bin_index] += 1; Ok(()) - }, - None => Err(BinNotFound) + } + None => Err(BinNotFound), } } @@ -82,8 +82,8 @@ impl Histogram { /// Extension trait for `ArrayBase` providing methods to compute histograms. pub trait HistogramExt - where - S: Data, +where + S: Data, { /// Returns the [histogram](https://en.wikipedia.org/wiki/Histogram) /// for a 2-dimensional array of points `M`. @@ -145,17 +145,16 @@ pub trait HistogramExt /// # } /// ``` fn histogram(&self, grid: Grid) -> Histogram - where - A: Ord; + where + A: Ord; } impl HistogramExt for ArrayBase - where - S: Data, - A: Ord, +where + S: Data, + A: Ord, { - fn histogram(&self, grid: Grid) -> Histogram - { + fn histogram(&self, grid: Grid) -> Histogram { let mut histogram = Histogram::new(grid); for point in self.axis_iter(Axis(0)) { let _ = histogram.add_observation(&point); diff --git a/src/histogram/mod.rs b/src/histogram/mod.rs index 9176aee1..3acbf40a 100644 --- a/src/histogram/mod.rs +++ b/src/histogram/mod.rs @@ -1,10 +1,10 @@ //! Histogram functionalities. -pub use self::histograms::{Histogram, HistogramExt}; -pub use self::bins::{Edges, Bins}; +pub use self::bins::{Bins, Edges}; pub use self::grid::{Grid, GridBuilder}; +pub use self::histograms::{Histogram, HistogramExt}; -mod histograms; mod bins; -pub mod strategies; -mod grid; pub mod errors; +mod grid; +mod histograms; +pub mod strategies; diff --git a/src/histogram/strategies.rs b/src/histogram/strategies.rs index f669b35e..eeaee686 100644 --- a/src/histogram/strategies.rs +++ b/src/histogram/strategies.rs @@ -18,13 +18,12 @@ //! [`Grid`]: ../struct.Grid.html //! [`GridBuilder`]: ../struct.GridBuilder.html //! [`NumPy`]: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram_bin_edges.html#numpy.histogram_bin_edges +use super::super::interpolate::Nearest; +use super::super::{Quantile1dExt, QuantileExt}; +use super::{Bins, Edges}; use ndarray::prelude::*; use ndarray::Data; use num_traits::{FromPrimitive, NumOps, Zero}; -use super::super::{QuantileExt, Quantile1dExt}; -use super::super::interpolate::Nearest; -use super::{Edges, Bins}; - /// A trait implemented by all strategies to build [`Bins`] /// with parameters inferred from observations. @@ -36,8 +35,7 @@ use super::{Edges, Bins}; /// [`Bins`]: ../struct.Bins.html /// [`Grid`]: ../struct.Grid.html /// [`GridBuilder`]: ../struct.GridBuilder.html -pub trait BinsBuildingStrategy -{ +pub trait BinsBuildingStrategy { type Elem: Ord; /// Given some observations in a 1-dimensional array it returns a `BinsBuildingStrategy` /// that has learned the required parameter to build a collection of [`Bins`]. @@ -45,7 +43,7 @@ pub trait BinsBuildingStrategy /// [`Bins`]: ../struct.Bins.html fn from_array(array: &ArrayBase) -> Self where - S: Data; + S: Data; /// Returns a [`Bins`] instance, built accordingly to the parameters /// inferred from observations in [`from_array`]. @@ -140,21 +138,24 @@ pub struct Auto { } impl EquiSpaced - where - T: Ord + Clone + FromPrimitive + NumOps + Zero +where + T: Ord + Clone + FromPrimitive + NumOps + Zero, { /// **Panics** if `bin_width<=0`. - fn new(bin_width: T, min: T, max: T) -> Self - { + fn new(bin_width: T, min: T, max: T) -> Self { assert!(bin_width > T::zero()); - Self { bin_width, min, max } + Self { + bin_width, + min, + max, + } } fn build(&self) -> Bins { let n_bins = self.n_bins(); let mut edges: Vec = vec![]; - for i in 0..(n_bins+1) { - let edge = self.min.clone() + T::from_usize(i).unwrap()*self.bin_width.clone(); + for i in 0..(n_bins + 1) { + let edge = self.min.clone() + T::from_usize(i).unwrap() * self.bin_width.clone(); edges.push(edge); } Bins::new(Edges::from(edges)) @@ -167,7 +168,7 @@ impl EquiSpaced max_edge = max_edge + self.bin_width.clone(); n_bins += 1; } - return n_bins + return n_bins; } fn bin_width(&self) -> T { @@ -176,15 +177,15 @@ impl EquiSpaced } impl BinsBuildingStrategy for Sqrt - where - T: Ord + Clone + FromPrimitive + NumOps + Zero +where + T: Ord + Clone + FromPrimitive + NumOps + Zero, { type Elem = T; /// **Panics** if the array is constant or if `a.len()==0`. fn from_array(a: &ArrayBase) -> Self where - S: Data + S: Data, { let n_elems = a.len(); let n_bins = (n_elems as f64).sqrt().round() as usize; @@ -205,8 +206,8 @@ impl BinsBuildingStrategy for Sqrt } impl Sqrt - where - T: Ord + Clone + FromPrimitive + NumOps + Zero +where + T: Ord + Clone + FromPrimitive + NumOps + Zero, { /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { @@ -215,18 +216,18 @@ impl Sqrt } impl BinsBuildingStrategy for Rice - where - T: Ord + Clone + FromPrimitive + NumOps + Zero +where + T: Ord + Clone + FromPrimitive + NumOps + Zero, { type Elem = T; /// **Panics** if the array is constant or if `a.len()==0`. fn from_array(a: &ArrayBase) -> Self where - S: Data + S: Data, { let n_elems = a.len(); - let n_bins = (2. * (n_elems as f64).powf(1./3.)).round() as usize; + let n_bins = (2. * (n_elems as f64).powf(1. / 3.)).round() as usize; let min = a.min().unwrap().clone(); let max = a.max().unwrap().clone(); let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins); @@ -244,8 +245,8 @@ impl BinsBuildingStrategy for Rice } impl Rice - where - T: Ord + Clone + FromPrimitive + NumOps + Zero +where + T: Ord + Clone + FromPrimitive + NumOps + Zero, { /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { @@ -254,15 +255,15 @@ impl Rice } impl BinsBuildingStrategy for Sturges - where - T: Ord + Clone + FromPrimitive + NumOps + Zero +where + T: Ord + Clone + FromPrimitive + NumOps + Zero, { type Elem = T; /// **Panics** if the array is constant or if `a.len()==0`. fn from_array(a: &ArrayBase) -> Self where - S: Data + S: Data, { let n_elems = a.len(); let n_bins = (n_elems as f64).log2().round() as usize + 1; @@ -283,8 +284,8 @@ impl BinsBuildingStrategy for Sturges } impl Sturges - where - T: Ord + Clone + FromPrimitive + NumOps + Zero +where + T: Ord + Clone + FromPrimitive + NumOps + Zero, { /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { @@ -293,15 +294,15 @@ impl Sturges } impl BinsBuildingStrategy for FreedmanDiaconis - where - T: Ord + Clone + FromPrimitive + NumOps + Zero +where + T: Ord + Clone + FromPrimitive + NumOps + Zero, { type Elem = T; /// **Panics** if `IQR==0` or if `a.len()==0`. fn from_array(a: &ArrayBase) -> Self where - S: Data + S: Data, { let n_points = a.len(); @@ -327,11 +328,10 @@ impl BinsBuildingStrategy for FreedmanDiaconis } impl FreedmanDiaconis - where - T: Ord + Clone + FromPrimitive + NumOps + Zero +where + T: Ord + Clone + FromPrimitive + NumOps + Zero, { - fn compute_bin_width(n_bins: usize, iqr: T) -> T - { + fn compute_bin_width(n_bins: usize, iqr: T) -> T { let denominator = (n_bins as f64).powf(1. / 3.); let bin_width = T::from_usize(2).unwrap() * iqr / T::from_f64(denominator).unwrap(); bin_width @@ -344,15 +344,15 @@ impl FreedmanDiaconis } impl BinsBuildingStrategy for Auto - where - T: Ord + Clone + FromPrimitive + NumOps + Zero +where + T: Ord + Clone + FromPrimitive + NumOps + Zero, { type Elem = T; /// **Panics** if `IQR==0`, the array is constant, or `a.len()==0`. fn from_array(a: &ArrayBase) -> Self where - S: Data + S: Data, { let fd_builder = FreedmanDiaconis::from_array(&a); let sturges_builder = Sturges::from_array(&a); @@ -384,8 +384,8 @@ impl BinsBuildingStrategy for Auto } impl Auto - where - T: Ord + Clone + FromPrimitive + NumOps + Zero +where + T: Ord + Clone + FromPrimitive + NumOps + Zero, { /// The bin width (or bin length) according to the fitted strategy. pub fn bin_width(&self) -> T { diff --git a/src/lib.rs b/src/lib.rs index 5d764a67..7fbcc94f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,30 +23,29 @@ //! [`NumPy`]: https://docs.scipy.org/doc/numpy-1.14.1/reference/routines.statistics.html //! [`StatsBase.jl`]: https://juliastats.github.io/StatsBase.jl/latest/ - +extern crate itertools; extern crate ndarray; extern crate noisy_float; extern crate num_traits; extern crate rand; -extern crate itertools; +#[cfg(test)] +extern crate approx; #[cfg(test)] extern crate ndarray_rand; #[cfg(test)] extern crate quickcheck; -#[cfg(test)] -extern crate approx; -pub use maybe_nan::{MaybeNan, MaybeNanExt}; -pub use quantile::{interpolate, QuantileExt, Quantile1dExt}; -pub use sort::Sort1dExt; pub use correlation::CorrelationExt; pub use histogram::HistogramExt; +pub use maybe_nan::{MaybeNan, MaybeNanExt}; +pub use quantile::{interpolate, Quantile1dExt, QuantileExt}; +pub use sort::Sort1dExt; pub use summary_statistics::SummaryStatisticsExt; +mod correlation; +pub mod histogram; mod maybe_nan; mod quantile; mod sort; -mod correlation; mod summary_statistics; -pub mod histogram; diff --git a/src/quantile.rs b/src/quantile.rs index 9188aa2f..c29ccba2 100644 --- a/src/quantile.rs +++ b/src/quantile.rs @@ -378,8 +378,8 @@ where /// Quantile methods for 1-D arrays. pub trait Quantile1dExt - where - S: Data, +where + S: Data, { /// Return the qth quantile of the data. /// @@ -418,8 +418,8 @@ pub trait Quantile1dExt } impl Quantile1dExt for ArrayBase - where - S: Data, +where + S: Data, { fn quantile_mut(&mut self, q: f64) -> Option where @@ -434,4 +434,3 @@ impl Quantile1dExt for ArrayBase } } } - diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 29dc7084..a2b5bcd6 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -1,8 +1,7 @@ -use ndarray::{Data, Dimension, ArrayBase}; -use num_traits::{FromPrimitive, Float, Zero}; -use std::ops::{Add, Div}; use super::SummaryStatisticsExt; - +use ndarray::{ArrayBase, Data, Dimension}; +use num_traits::{Float, FromPrimitive, Zero}; +use std::ops::{Add, Div}; impl SummaryStatisticsExt for ArrayBase where @@ -11,7 +10,7 @@ where { fn mean(&self) -> Option where - A: Clone + FromPrimitive + Add + Div + Zero + A: Clone + FromPrimitive + Add + Div + Zero, { let n_elements = self.len(); if n_elements == 0 { @@ -39,51 +38,45 @@ where fn kurtosis(&self) -> Option where - A: Float + FromPrimitive + A: Float + FromPrimitive, { let central_moments = self.central_moments(4); - central_moments.map( - |moments| moments[4] / moments[2].powi(2) - ) + central_moments.map(|moments| moments[4] / moments[2].powi(2)) } fn skewness(&self) -> Option where - A: Float + FromPrimitive + A: Float + FromPrimitive, { let central_moments = self.central_moments(3); - central_moments.map( - |moments| moments[3] / moments[2].sqrt().powi(3) - ) + central_moments.map(|moments| moments[3] / moments[2].sqrt().powi(3)) } fn central_moment(&self, order: usize) -> Option where - A: Float + FromPrimitive + A: Float + FromPrimitive, { let mean = self.mean(); match mean { None => None, - Some(mean) => { - match order { - 0 => Some(A::one()), - 1 => Some(A::zero()), - n => { - let shifted_array = self.map(|x| x.clone() - mean); - let shifted_moments = moments(shifted_array, n); - let correction_term = -shifted_moments[1].clone(); - - let coefficients = central_moment_coefficients(&shifted_moments); - Some(horner_method(coefficients, correction_term)) - } + Some(mean) => match order { + 0 => Some(A::one()), + 1 => Some(A::zero()), + n => { + let shifted_array = self.map(|x| x.clone() - mean); + let shifted_moments = moments(shifted_array, n); + let correction_term = -shifted_moments[1].clone(); + + let coefficients = central_moment_coefficients(&shifted_moments); + Some(horner_method(coefficients, correction_term)) } - } + }, } } fn central_moments(&self, order: usize) -> Option> - where - A: Float + FromPrimitive + where + A: Float + FromPrimitive, { let mean = self.mean(); match mean { @@ -128,17 +121,17 @@ where fn moments(a: ArrayBase, order: usize) -> Vec where A: Float + FromPrimitive, - S: Data, + S: Data, D: Dimension, { - let n_elements = A::from_usize(a.len()) - .expect("Converting number of elements to `A` must not fail"); + let n_elements = + A::from_usize(a.len()).expect("Converting number of elements to `A` must not fail"); // When k=0, we are raising each element to the 0th power // No need to waste CPU cycles going through the array let mut moments = vec![A::one()]; - if order >=1 { + if order >= 1 { // When k=1, we don't need to raise elements to the 1th power (identity) moments.push(a.sum() / n_elements) } @@ -159,9 +152,12 @@ where A: Float + FromPrimitive, { let order = moments.len(); - moments.iter().rev().enumerate().map( - |(k, moment)| A::from_usize(binomial_coefficient(order, k)).unwrap() * *moment - ).collect() + moments + .iter() + .rev() + .enumerate() + .map(|(k, moment)| A::from_usize(binomial_coefficient(order, k)).unwrap() * *moment) + .collect() } /// Returns the binomial coefficient "n over k". @@ -171,11 +167,11 @@ fn binomial_coefficient(n: usize, k: usize) -> usize { if k > n { panic!( "Tried to compute the binomial coefficient of {0} over {1}, \ - but {1} is strictly greater than {0}!" + but {1} is strictly greater than {0}!" ) } // BC(n, k) = BC(n, n-k) - let k = if k > n-k { n-k } else { k }; + let k = if k > n - k { n - k } else { k }; let mut result = 1; for i in 0..k { result = result * (n - i); @@ -210,12 +206,12 @@ where #[cfg(test)] mod tests { use super::SummaryStatisticsExt; - use std::f64; use approx::assert_abs_diff_eq; - use noisy_float::types::N64; use ndarray::{array, Array, Array1}; use ndarray_rand::RandomExt; + use noisy_float::types::N64; use rand::distributions::Uniform; + use std::f64; #[test] fn test_means_with_nan_values() { @@ -244,16 +240,14 @@ mod tests { #[test] fn test_means_with_array_of_floats() { let a: Array1 = array![ - 0.99889651, 0.0150731 , 0.28492482, 0.83819218, 0.48413156, - 0.80710412, 0.41762936, 0.22879429, 0.43997224, 0.23831807, - 0.02416466, 0.6269962 , 0.47420614, 0.56275487, 0.78995021, - 0.16060581, 0.64635041, 0.34876609, 0.78543249, 0.19938356, - 0.34429457, 0.88072369, 0.17638164, 0.60819363, 0.250392 , - 0.69912532, 0.78855523, 0.79140914, 0.85084218, 0.31839879, - 0.63381769, 0.22421048, 0.70760302, 0.99216018, 0.80199153, - 0.19239188, 0.61356023, 0.31505352, 0.06120481, 0.66417377, - 0.63608897, 0.84959691, 0.43599069, 0.77867775, 0.88267754, - 0.83003623, 0.67016118, 0.67547638, 0.65220036, 0.68043427 + 0.99889651, 0.0150731, 0.28492482, 0.83819218, 0.48413156, 0.80710412, 0.41762936, + 0.22879429, 0.43997224, 0.23831807, 0.02416466, 0.6269962, 0.47420614, 0.56275487, + 0.78995021, 0.16060581, 0.64635041, 0.34876609, 0.78543249, 0.19938356, 0.34429457, + 0.88072369, 0.17638164, 0.60819363, 0.250392, 0.69912532, 0.78855523, 0.79140914, + 0.85084218, 0.31839879, 0.63381769, 0.22421048, 0.70760302, 0.99216018, 0.80199153, + 0.19239188, 0.61356023, 0.31505352, 0.06120481, 0.66417377, 0.63608897, 0.84959691, + 0.43599069, 0.77867775, 0.88267754, 0.83003623, 0.67016118, 0.67547638, 0.65220036, + 0.68043427 ]; // Computed using NumPy let expected_mean = 0.5475494059146699; @@ -263,8 +257,16 @@ mod tests { let expected_geometric_mean = 0.4345897639796527; assert_abs_diff_eq!(a.mean().unwrap(), expected_mean, epsilon = 1e-6); - assert_abs_diff_eq!(a.harmonic_mean().unwrap(), expected_harmonic_mean, epsilon = 1e-6); - assert_abs_diff_eq!(a.geometric_mean().unwrap(), expected_geometric_mean, epsilon = 1e-6); + assert_abs_diff_eq!( + a.harmonic_mean().unwrap(), + expected_harmonic_mean, + epsilon = 1e-6 + ); + assert_abs_diff_eq!( + a.geometric_mean().unwrap(), + expected_geometric_mean, + epsilon = 1e-6 + ); } #[test] @@ -278,10 +280,7 @@ mod tests { fn test_zeroth_central_order_moment_is_one() { let n = 50; let bound: f64 = 200.; - let a = Array::random( - n, - Uniform::new(-bound.abs(), bound.abs()) - ); + let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs())); assert_eq!(a.central_moment(0).unwrap(), 1.); } @@ -289,34 +288,38 @@ mod tests { fn test_first_central_order_moment_is_zero() { let n = 50; let bound: f64 = 200.; - let a = Array::random( - n, - Uniform::new(-bound.abs(), bound.abs()) - ); + let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs())); assert_eq!(a.central_moment(1).unwrap(), 0.); } #[test] fn test_central_order_moments() { let a: Array1 = array![ - 0.07820559, 0.5026185 , 0.80935324, 0.39384033, 0.9483038, - 0.62516215, 0.90772261, 0.87329831, 0.60267392, 0.2960298, - 0.02810356, 0.31911966, 0.86705506, 0.96884832, 0.2222465, - 0.42162446, 0.99909868, 0.47619762, 0.91696979, 0.9972741, - 0.09891734, 0.76934818, 0.77566862, 0.7692585 , 0.2235759, - 0.44821286, 0.79732186, 0.04804275, 0.87863238, 0.1111003, - 0.6653943 , 0.44386445, 0.2133176 , 0.39397086, 0.4374617, - 0.95896624, 0.57850146, 0.29301706, 0.02329879, 0.2123203, - 0.62005503, 0.996492 , 0.5342986 , 0.97822099, 0.5028445, - 0.6693834 , 0.14256682, 0.52724704, 0.73482372, 0.1809703, + 0.07820559, 0.5026185, 0.80935324, 0.39384033, 0.9483038, 0.62516215, 0.90772261, + 0.87329831, 0.60267392, 0.2960298, 0.02810356, 0.31911966, 0.86705506, 0.96884832, + 0.2222465, 0.42162446, 0.99909868, 0.47619762, 0.91696979, 0.9972741, 0.09891734, + 0.76934818, 0.77566862, 0.7692585, 0.2235759, 0.44821286, 0.79732186, 0.04804275, + 0.87863238, 0.1111003, 0.6653943, 0.44386445, 0.2133176, 0.39397086, 0.4374617, + 0.95896624, 0.57850146, 0.29301706, 0.02329879, 0.2123203, 0.62005503, 0.996492, + 0.5342986, 0.97822099, 0.5028445, 0.6693834, 0.14256682, 0.52724704, 0.73482372, + 0.1809703, ]; // Computed using scipy.stats.moment let expected_moments = vec![ - 1., 0., 0.09339920262960291, -0.0026849636727735186, - 0.015403769257729755, -0.001204176487006564, 0.002976822584939186, + 1., + 0., + 0.09339920262960291, + -0.0026849636727735186, + 0.015403769257729755, + -0.001204176487006564, + 0.002976822584939186, ]; for (order, expected_moment) in expected_moments.iter().enumerate() { - assert_abs_diff_eq!(a.central_moment(order).unwrap(), expected_moment, epsilon = 1e-6); + assert_abs_diff_eq!( + a.central_moment(order).unwrap(), + expected_moment, + epsilon = 1e-6 + ); } } @@ -325,10 +328,7 @@ mod tests { // Test that the bulk method is coherent with the non-bulk method let n = 50; let bound: f64 = 200.; - let a = Array::random( - n, - Uniform::new(-bound.abs(), bound.abs()) - ); + let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs())); let order = 10; let central_moments = a.central_moments(order).unwrap(); for i in 0..=order { @@ -346,16 +346,14 @@ mod tests { #[test] fn test_kurtosis_and_skewness() { let a: Array1 = array![ - 0.33310096, 0.98757449, 0.9789796 , 0.96738114, 0.43545674, - 0.06746873, 0.23706562, 0.04241815, 0.38961714, 0.52421271, - 0.93430327, 0.33911604, 0.05112372, 0.5013455 , 0.05291507, - 0.62511183, 0.20749633, 0.22132433, 0.14734804, 0.51960608, - 0.00449208, 0.4093339 , 0.2237519 , 0.28070469, 0.7887231 , - 0.92224523, 0.43454188, 0.18335111, 0.08646856, 0.87979847, - 0.25483457, 0.99975627, 0.52712442, 0.41163279, 0.85162594, - 0.52618733, 0.75815023, 0.30640695, 0.14205781, 0.59695813, - 0.851331 , 0.39524328, 0.73965373, 0.4007615 , 0.02133069, - 0.92899207, 0.79878191, 0.38947334, 0.22042183, 0.77768353, + 0.33310096, 0.98757449, 0.9789796, 0.96738114, 0.43545674, 0.06746873, 0.23706562, + 0.04241815, 0.38961714, 0.52421271, 0.93430327, 0.33911604, 0.05112372, 0.5013455, + 0.05291507, 0.62511183, 0.20749633, 0.22132433, 0.14734804, 0.51960608, 0.00449208, + 0.4093339, 0.2237519, 0.28070469, 0.7887231, 0.92224523, 0.43454188, 0.18335111, + 0.08646856, 0.87979847, 0.25483457, 0.99975627, 0.52712442, 0.41163279, 0.85162594, + 0.52618733, 0.75815023, 0.30640695, 0.14205781, 0.59695813, 0.851331, 0.39524328, + 0.73965373, 0.4007615, 0.02133069, 0.92899207, 0.79878191, 0.38947334, 0.22042183, + 0.77768353, ]; // Computed using scipy.stats.kurtosis(a, fisher=False) let expected_kurtosis = 1.821933711687523; diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index 9daed02a..ff873d3b 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -1,6 +1,6 @@ //! Summary statistics (e.g. mean, variance, etc.). use ndarray::{Data, Dimension}; -use num_traits::{FromPrimitive, Float, Zero}; +use num_traits::{Float, FromPrimitive, Zero}; use std::ops::{Add, Div}; /// Extension trait for `ArrayBase` providing methods @@ -25,7 +25,7 @@ where /// [`arithmetic mean`]: https://en.wikipedia.org/wiki/Arithmetic_mean fn mean(&self) -> Option where - A: Clone + FromPrimitive + Add + Div + Zero; + A: Clone + FromPrimitive + Add + Div + Zero; /// Returns the [`harmonic mean`] `HM(X)` of all elements in the array: /// From 25f3f0fd1cd0552d9c0f31b2771352229b4fb01a Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 15:06:25 -0400 Subject: [PATCH 27/44] Avoid computing mean for p=0,1 central moments --- src/summary_statistics/means.rs | 68 ++++++++++++++++----------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index a2b5bcd6..14051e10 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -56,21 +56,21 @@ where where A: Float + FromPrimitive, { - let mean = self.mean(); - match mean { - None => None, - Some(mean) => match order { - 0 => Some(A::one()), - 1 => Some(A::zero()), - n => { - let shifted_array = self.map(|x| x.clone() - mean); - let shifted_moments = moments(shifted_array, n); - let correction_term = -shifted_moments[1].clone(); + if self.is_empty() { + return None; + } + match order { + 0 => Some(A::one()), + 1 => Some(A::zero()), + n => { + let mean = self.mean().unwrap(); + let shifted_array = self.map(|x| x.clone() - mean); + let shifted_moments = moments(shifted_array, n); + let correction_term = -shifted_moments[1].clone(); - let coefficients = central_moment_coefficients(&shifted_moments); - Some(horner_method(coefficients, correction_term)) - } - }, + let coefficients = central_moment_coefficients(&shifted_moments); + Some(horner_method(coefficients, correction_term)) + } } } @@ -78,29 +78,27 @@ where where A: Float + FromPrimitive, { - let mean = self.mean(); - match mean { - None => None, - Some(mean) => { - match order { - 0 => Some(vec![A::one()]), - 1 => Some(vec![A::one(), A::zero()]), - n => { - // We only perform this operations once, and then reuse their - // result to compute all the required moments - let shifted_array = self.map(|x| x.clone() - mean); - let shifted_moments = moments(shifted_array, n); - let correction_term = -shifted_moments[1].clone(); + if self.is_empty() { + return None; + } + match order { + 0 => Some(vec![A::one()]), + 1 => Some(vec![A::one(), A::zero()]), + n => { + // We only perform these operations once, and then reuse their + // result to compute all the required moments + let mean = self.mean().unwrap(); + let shifted_array = self.map(|x| x.clone() - mean); + let shifted_moments = moments(shifted_array, n); + let correction_term = -shifted_moments[1].clone(); - let mut central_moments = vec![A::one(), A::zero()]; - for k in 2..=n { - let coefficients = central_moment_coefficients(&shifted_moments[..=k]); - let central_moment = horner_method(coefficients, correction_term); - central_moments.push(central_moment) - } - Some(central_moments) - } + let mut central_moments = vec![A::one(), A::zero()]; + for k in 2..=n { + let coefficients = central_moment_coefficients(&shifted_moments[..=k]); + let central_moment = horner_method(coefficients, correction_term); + central_moments.push(central_moment) } + Some(central_moments) } } } From d9ba4bbb098f92d5feda430e8025040ee36207f2 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 15:18:55 -0400 Subject: [PATCH 28/44] Replace map + clone with mapv --- src/summary_statistics/means.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 14051e10..19002745 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -64,7 +64,7 @@ where 1 => Some(A::zero()), n => { let mean = self.mean().unwrap(); - let shifted_array = self.map(|x| x.clone() - mean); + let shifted_array = self.mapv(|x| x - mean); let shifted_moments = moments(shifted_array, n); let correction_term = -shifted_moments[1].clone(); @@ -88,7 +88,7 @@ where // We only perform these operations once, and then reuse their // result to compute all the required moments let mean = self.mean().unwrap(); - let shifted_array = self.map(|x| x.clone() - mean); + let shifted_array = self.mapv(|x| x - mean); let shifted_moments = moments(shifted_array, n); let correction_term = -shifted_moments[1].clone(); From 4a052884a2f51ce0c586b02c225b54cc053eb235 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 15:41:53 -0400 Subject: [PATCH 29/44] Check for moment order overflowing `i32` In reality, this probably isn't necessary because I'd expect someone to terminate their program when trying to calculate a moment of order greater than `i32::MAX` (because it would take so long), but it's nice to have an explicit check anyway. --- src/summary_statistics/means.rs | 7 +++++-- src/summary_statistics/mod.rs | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 19002745..4fd66b46 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -1,6 +1,6 @@ use super::SummaryStatisticsExt; use ndarray::{ArrayBase, Data, Dimension}; -use num_traits::{Float, FromPrimitive, Zero}; +use num_traits::{Float, FromPrimitive, ToPrimitive, Zero}; use std::ops::{Add, Div}; impl SummaryStatisticsExt for ArrayBase @@ -124,6 +124,9 @@ where { let n_elements = A::from_usize(a.len()).expect("Converting number of elements to `A` must not fail"); + let order = order + .to_i32() + .expect("Moment order must not overflow `i32`."); // When k=0, we are raising each element to the 0th power // No need to waste CPU cycles going through the array @@ -135,7 +138,7 @@ where } for k in 2..=order { - moments.push(a.map(|x| x.powi(k as i32)).sum() / n_elements) + moments.push(a.map(|x| x.powi(k)).sum() / n_elements) } moments } diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index ff873d3b..a5664a0a 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -113,7 +113,8 @@ where /// The *p*-th central moment is computed using a corrected two-pass algorithm (see Section 3.5 /// in [Pébay et al., 2016]). Complexity is *O(np)* when *n >> p*, *p > 1*. /// - /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. + /// **Panics** if `A::from_usize()` fails to convert the number of elements + /// in the array or if `order` overflows `i32`. /// /// [central moment]: https://en.wikipedia.org/wiki/Central_moment /// [Pébay et al., 2016]: https://www.osti.gov/pages/servlets/purl/1427275 @@ -130,7 +131,8 @@ where /// being thus more efficient than repeated calls to [central moment] if the computation /// of central moments of multiple orders is required. /// - /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. + /// **Panics** if `A::from_usize()` fails to convert the number of elements + /// in the array or if `order` overflows `i32`. /// /// [central moments]: https://en.wikipedia.org/wiki/Central_moment /// [central moment]: #tymethod.central_moment From 3aaab13934681707a3c4648a807ac08f1f310399 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 15:49:59 -0400 Subject: [PATCH 30/44] Take advantage of IterBinomial from num-integer --- Cargo.toml | 1 + src/lib.rs | 1 + src/summary_statistics/means.rs | 29 ++++------------------------- 3 files changed, 6 insertions(+), 25 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 57795ce8..645c41c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ categories = ["data-structures", "science"] [dependencies] ndarray = "0.12.1" noisy_float = "0.1.8" +num-integer = "0.1" num-traits = "0.2" rand = "0.6" itertools = { version = "0.7.0", default-features = false } diff --git a/src/lib.rs b/src/lib.rs index 9cf586f1..37499368 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,7 @@ extern crate itertools; extern crate ndarray; extern crate noisy_float; +extern crate num_integer; extern crate num_traits; extern crate rand; diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 4fd66b46..126c352f 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -1,5 +1,6 @@ use super::SummaryStatisticsExt; use ndarray::{ArrayBase, Data, Dimension}; +use num_integer::IterBinomial; use num_traits::{Float, FromPrimitive, ToPrimitive, Zero}; use std::ops::{Add, Div}; @@ -153,34 +154,12 @@ where A: Float + FromPrimitive, { let order = moments.len(); - moments - .iter() - .rev() - .enumerate() - .map(|(k, moment)| A::from_usize(binomial_coefficient(order, k)).unwrap() * *moment) + IterBinomial::new(order) + .zip(moments.iter().rev()) + .map(|(binom, &moment)| A::from_usize(binom).unwrap() * moment) .collect() } -/// Returns the binomial coefficient "n over k". -/// -/// **Panics** if k > n. -fn binomial_coefficient(n: usize, k: usize) -> usize { - if k > n { - panic!( - "Tried to compute the binomial coefficient of {0} over {1}, \ - but {1} is strictly greater than {0}!" - ) - } - // BC(n, k) = BC(n, n-k) - let k = if k > n - k { n - k } else { k }; - let mut result = 1; - for i in 0..k { - result = result * (n - i); - result = result / (i + 1); - } - result -} - /// Uses [Horner's method] to evaluate a polynomial with a single indeterminate. /// /// Coefficients are expected to be sorted by ascending order From 7efba82454e54537bd695b80a86bae54dc9b1557 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 16:02:11 -0400 Subject: [PATCH 31/44] Remove unnecessary explicit clones `A: Float` requires `A: Copy`. --- src/summary_statistics/means.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 126c352f..8ef1d48e 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -67,7 +67,7 @@ where let mean = self.mean().unwrap(); let shifted_array = self.mapv(|x| x - mean); let shifted_moments = moments(shifted_array, n); - let correction_term = -shifted_moments[1].clone(); + let correction_term = -shifted_moments[1]; let coefficients = central_moment_coefficients(&shifted_moments); Some(horner_method(coefficients, correction_term)) @@ -91,7 +91,7 @@ where let mean = self.mean().unwrap(); let shifted_array = self.mapv(|x| x - mean); let shifted_moments = moments(shifted_array, n); - let correction_term = -shifted_moments[1].clone(); + let correction_term = -shifted_moments[1]; let mut central_moments = vec![A::one(), A::zero()]; for k in 2..=n { From 0b0b0bab3acfbfc1379ba64fbd522058a7832d72 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 16:04:02 -0400 Subject: [PATCH 32/44] Replace .map() with ? I think this is a bit easier to understand at first glance. --- src/summary_statistics/means.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 8ef1d48e..d9ed10b1 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -41,16 +41,16 @@ where where A: Float + FromPrimitive, { - let central_moments = self.central_moments(4); - central_moments.map(|moments| moments[4] / moments[2].powi(2)) + let central_moments = self.central_moments(4)?; + Some(central_moments[4] / central_moments[2].powi(2)) } fn skewness(&self) -> Option where A: Float + FromPrimitive, { - let central_moments = self.central_moments(3); - central_moments.map(|moments| moments[3] / moments[2].sqrt().powi(3)) + let central_moments = self.central_moments(3)?; + Some(central_moments[3] / central_moments[2].sqrt().powi(3)) } fn central_moment(&self, order: usize) -> Option From f992453db3c55e04f188ce382e38df3d12c3b9e3 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 16:09:21 -0400 Subject: [PATCH 33/44] Test more cases in central moment tests --- src/summary_statistics/means.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index d9ed10b1..ec0fd80f 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -252,8 +252,10 @@ mod tests { #[test] fn test_central_order_moment_with_empty_array_of_floats() { let a: Array1 = array![]; - assert!(a.central_moment(1).is_none()); - assert!(a.central_moments(1).is_none()); + for order in 0..=3 { + assert!(a.central_moment(order).is_none()); + assert!(a.central_moments(order).is_none()); + } } #[test] From a8af8801f49b449279428ac224c755ca6247a076 Mon Sep 17 00:00:00 2001 From: Jim Turner Date: Sun, 31 Mar 2019 16:10:45 -0400 Subject: [PATCH 34/44] Rename central moment tests --- src/summary_statistics/means.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index ec0fd80f..1609b531 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -250,7 +250,7 @@ mod tests { } #[test] - fn test_central_order_moment_with_empty_array_of_floats() { + fn test_central_moment_with_empty_array_of_floats() { let a: Array1 = array![]; for order in 0..=3 { assert!(a.central_moment(order).is_none()); @@ -259,7 +259,7 @@ mod tests { } #[test] - fn test_zeroth_central_order_moment_is_one() { + fn test_zeroth_central_moment_is_one() { let n = 50; let bound: f64 = 200.; let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs())); @@ -267,7 +267,7 @@ mod tests { } #[test] - fn test_first_central_order_moment_is_zero() { + fn test_first_central_moment_is_zero() { let n = 50; let bound: f64 = 200.; let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs())); @@ -275,7 +275,7 @@ mod tests { } #[test] - fn test_central_order_moments() { + fn test_central_moments() { let a: Array1 = array![ 0.07820559, 0.5026185, 0.80935324, 0.39384033, 0.9483038, 0.62516215, 0.90772261, 0.87329831, 0.60267392, 0.2960298, 0.02810356, 0.31911966, 0.86705506, 0.96884832, @@ -306,7 +306,7 @@ mod tests { } #[test] - fn test_bulk_central_order_moments() { + fn test_bulk_central_moments() { // Test that the bulk method is coherent with the non-bulk method let n = 50; let bound: f64 = 200.; From a701f3302995d48c7f989e9eda839b13f71b7c94 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 1 Apr 2019 08:30:22 +0100 Subject: [PATCH 35/44] Push diff to the lowest possible exponent --- src/summary_statistics/means.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index a2b5bcd6..82eaf284 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -256,16 +256,16 @@ mod tests { // Computed using SciPy let expected_geometric_mean = 0.4345897639796527; - assert_abs_diff_eq!(a.mean().unwrap(), expected_mean, epsilon = 1e-6); + assert_abs_diff_eq!(a.mean().unwrap(), expected_mean, epsilon = 1e-9); assert_abs_diff_eq!( a.harmonic_mean().unwrap(), expected_harmonic_mean, - epsilon = 1e-6 + epsilon = 1e-7 ); assert_abs_diff_eq!( a.geometric_mean().unwrap(), expected_geometric_mean, - epsilon = 1e-6 + epsilon = 1e-12 ); } @@ -318,7 +318,7 @@ mod tests { assert_abs_diff_eq!( a.central_moment(order).unwrap(), expected_moment, - epsilon = 1e-6 + epsilon = 1e-8 ); } } @@ -363,7 +363,7 @@ mod tests { let kurtosis = a.kurtosis().unwrap(); let skewness = a.skewness().unwrap(); - assert_abs_diff_eq!(kurtosis, expected_kurtosis, epsilon = 1e-6); - assert_abs_diff_eq!(skewness, expected_skewness, epsilon = 1e-6); + assert_abs_diff_eq!(kurtosis, expected_kurtosis, epsilon = 1e-12); + assert_abs_diff_eq!(skewness, expected_skewness, epsilon = 1e-8); } } From 1796a48868fddb68e0ba139e13b6254024505f6f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 2 Apr 2019 07:59:55 +0100 Subject: [PATCH 36/44] Return a Result instead of Option, for consistency --- src/summary_statistics/mod.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index 90cd250a..b270b45e 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -74,12 +74,12 @@ where /// This is sometimes referred to as _Pearson's kurtosis_. Fisher's kurtosis can be /// computed by subtracting 3 from Pearson's kurtosis. /// - /// If the array is empty, `None` is returned. + /// If the array is empty, `Err(EmptyInput)` is returned. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. /// /// [kurtosis]: https://en.wikipedia.org/wiki/Kurtosis - fn kurtosis(&self) -> Option + fn kurtosis(&self) -> Result where A: Float + FromPrimitive; @@ -92,12 +92,12 @@ where /// where μ₃ is the third central moment and σ is the standard deviation of /// the elements in the array. /// - /// If the array is empty, `None` is returned. + /// If the array is empty, `Err(EmptyInput)` is returned. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. /// /// [Pearson's moment coefficient of skewness]: https://en.wikipedia.org/wiki/Skewness - fn skewness(&self) -> Option + fn skewness(&self) -> Result where A: Float + FromPrimitive; @@ -109,7 +109,7 @@ where /// n i=1 /// ``` /// - /// If the array is empty, `None` is returned. + /// If the array is empty, `Err(EmptyInput)` is returned. /// /// The *p*-th central moment is computed using a corrected two-pass algorithm (see Section 3.5 /// in [Pébay et al., 2016]). Complexity is *O(np)* when *n >> p*, *p > 1*. @@ -119,14 +119,14 @@ where /// /// [central moment]: https://en.wikipedia.org/wiki/Central_moment /// [Pébay et al., 2016]: https://www.osti.gov/pages/servlets/purl/1427275 - fn central_moment(&self, order: usize) -> Option + fn central_moment(&self, order: usize) -> Result where A: Float + FromPrimitive; /// Returns the first *p* [central moments] of all elements in the array, see [central moment] /// for more details. /// - /// If the array is empty, `None` is returned. + /// If the array is empty, `Err(EmptyInput)` is returned. /// /// This method reuses the intermediate steps for the *k*-th moment to compute the *(k+1)*-th, /// being thus more efficient than repeated calls to [central moment] if the computation @@ -137,7 +137,7 @@ where /// /// [central moments]: https://en.wikipedia.org/wiki/Central_moment /// [central moment]: #tymethod.central_moment - fn central_moments(&self, order: usize) -> Option> + fn central_moments(&self, order: usize) -> Result, EmptyInput> where A: Float + FromPrimitive; } From e21ca2bfbc08ee97aca9d458b535d80c51b59965 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 2 Apr 2019 08:02:38 +0100 Subject: [PATCH 37/44] Match impl with trait definition --- src/summary_statistics/means.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 523601be..6b70a70b 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -38,32 +38,32 @@ where self.map(|x| x.ln()).mean().map(|x| x.exp()) } - fn kurtosis(&self) -> Option + fn kurtosis(&self) -> Result where A: Float + FromPrimitive, { let central_moments = self.central_moments(4)?; - Some(central_moments[4] / central_moments[2].powi(2)) + Ok(central_moments[4] / central_moments[2].powi(2)) } - fn skewness(&self) -> Option + fn skewness(&self) -> Result where A: Float + FromPrimitive, { let central_moments = self.central_moments(3)?; - Some(central_moments[3] / central_moments[2].sqrt().powi(3)) + Ok(central_moments[3] / central_moments[2].sqrt().powi(3)) } - fn central_moment(&self, order: usize) -> Option + fn central_moment(&self, order: usize) -> Result where A: Float + FromPrimitive, { if self.is_empty() { - return None; + return Err(EmptyInput); } match order { - 0 => Some(A::one()), - 1 => Some(A::zero()), + 0 => Ok(A::one()), + 1 => Ok(A::zero()), n => { let mean = self.mean().unwrap(); let shifted_array = self.mapv(|x| x - mean); @@ -71,21 +71,21 @@ where let correction_term = -shifted_moments[1]; let coefficients = central_moment_coefficients(&shifted_moments); - Some(horner_method(coefficients, correction_term)) + Ok(horner_method(coefficients, correction_term)) } } } - fn central_moments(&self, order: usize) -> Option> + fn central_moments(&self, order: usize) -> Result, EmptyInput> where A: Float + FromPrimitive, { if self.is_empty() { - return None; + return Err(EmptyInput); } match order { - 0 => Some(vec![A::one()]), - 1 => Some(vec![A::one(), A::zero()]), + 0 => Ok(vec![A::one()]), + 1 => Ok(vec![A::one(), A::zero()]), n => { // We only perform these operations once, and then reuse their // result to compute all the required moments @@ -100,7 +100,7 @@ where let central_moment = horner_method(coefficients, correction_term); central_moments.push(central_moment) } - Some(central_moments) + Ok(central_moments) } } } From 8762490df18a0594f3b20d9b6d67eec5cd7fb170 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 2 Apr 2019 08:04:20 +0100 Subject: [PATCH 38/44] Match test to trait+impl --- src/summary_statistics/means.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 6b70a70b..c7edc404 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -255,8 +255,8 @@ mod tests { fn test_central_moment_with_empty_array_of_floats() { let a: Array1 = array![]; for order in 0..=3 { - assert!(a.central_moment(order).is_none()); - assert!(a.central_moments(order).is_none()); + assert_eq!(a.central_moment(order), Err(EmptyInput)); + assert_eq!(a.central_moments(order), Err(EmptyInput)); } } @@ -323,8 +323,8 @@ mod tests { #[test] fn test_kurtosis_and_skewness_is_none_with_empty_array_of_floats() { let a: Array1 = array![]; - assert!(a.skewness().is_none()); - assert!(a.kurtosis().is_none()); + assert_eq!(a.skewness(), Err(EmptyInput)); + assert_eq!(a.kurtosis(), Err(EmptyInput)); } #[test] From 76b007735538fb185a812e3a7b80538650b155fa Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Tue, 2 Apr 2019 08:43:52 +0100 Subject: [PATCH 39/44] Fmt --- src/summary_statistics/means.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index c7edc404..c6fe530f 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -187,10 +187,10 @@ where #[cfg(test)] mod tests { use super::SummaryStatisticsExt; + use crate::errors::EmptyInput; use approx::assert_abs_diff_eq; use ndarray::{array, Array, Array1}; use ndarray_rand::RandomExt; - use crate::errors::EmptyInput; use noisy_float::types::N64; use rand::distributions::Uniform; use std::f64; From 74cc782eb09452570bbbef650dad49098fc7b6c8 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 5 Apr 2019 08:59:37 +0100 Subject: [PATCH 40/44] Converted order to u16 --- src/summary_statistics/means.rs | 12 ++++++------ src/summary_statistics/mod.rs | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index c6fe530f..1b929266 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -54,7 +54,7 @@ where Ok(central_moments[3] / central_moments[2].sqrt().powi(3)) } - fn central_moment(&self, order: usize) -> Result + fn central_moment(&self, order: u16) -> Result where A: Float + FromPrimitive, { @@ -76,7 +76,7 @@ where } } - fn central_moments(&self, order: usize) -> Result, EmptyInput> + fn central_moments(&self, order: u16) -> Result, EmptyInput> where A: Float + FromPrimitive, { @@ -96,7 +96,7 @@ where let mut central_moments = vec![A::one(), A::zero()]; for k in 2..=n { - let coefficients = central_moment_coefficients(&shifted_moments[..=k]); + let coefficients = central_moment_coefficients(&shifted_moments[..=(k as usize)]); let central_moment = horner_method(coefficients, correction_term); central_moments.push(central_moment) } @@ -118,7 +118,7 @@ where /// The returned moments are ordered by power magnitude: 0th moment, 1st moment, etc. /// /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array. -fn moments(a: ArrayBase, order: usize) -> Vec +fn moments(a: ArrayBase, order: u16) -> Vec where A: Float + FromPrimitive, S: Data, @@ -300,7 +300,7 @@ mod tests { ]; for (order, expected_moment) in expected_moments.iter().enumerate() { assert_abs_diff_eq!( - a.central_moment(order).unwrap(), + a.central_moment(order as u16).unwrap(), expected_moment, epsilon = 1e-8 ); @@ -316,7 +316,7 @@ mod tests { let order = 10; let central_moments = a.central_moments(order).unwrap(); for i in 0..=order { - assert_eq!(a.central_moment(i).unwrap(), central_moments[i]); + assert_eq!(a.central_moment(i).unwrap(), central_moments[i as usize]); } } diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs index b270b45e..e0a71399 100644 --- a/src/summary_statistics/mod.rs +++ b/src/summary_statistics/mod.rs @@ -119,7 +119,7 @@ where /// /// [central moment]: https://en.wikipedia.org/wiki/Central_moment /// [Pébay et al., 2016]: https://www.osti.gov/pages/servlets/purl/1427275 - fn central_moment(&self, order: usize) -> Result + fn central_moment(&self, order: u16) -> Result where A: Float + FromPrimitive; @@ -137,7 +137,7 @@ where /// /// [central moments]: https://en.wikipedia.org/wiki/Central_moment /// [central moment]: #tymethod.central_moment - fn central_moments(&self, order: usize) -> Result, EmptyInput> + fn central_moments(&self, order: u16) -> Result, EmptyInput> where A: Float + FromPrimitive; } From 31a3909e2afe90a166ad77c37a01e07360ff5749 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Fri, 5 Apr 2019 08:59:51 +0100 Subject: [PATCH 41/44] Fmt --- src/summary_statistics/means.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 1b929266..8b187b37 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -96,7 +96,8 @@ where let mut central_moments = vec![A::one(), A::zero()]; for k in 2..=n { - let coefficients = central_moment_coefficients(&shifted_moments[..=(k as usize)]); + let coefficients = + central_moment_coefficients(&shifted_moments[..=(k as usize)]); let central_moment = horner_method(coefficients, correction_term); central_moments.push(central_moment) } @@ -316,7 +317,8 @@ mod tests { let order = 10; let central_moments = a.central_moments(order).unwrap(); for i in 0..=order { - assert_eq!(a.central_moment(i).unwrap(), central_moments[i as usize]); + assert_eq!(a.central_moment(i).unwrap(), central_moments[i as usiz + e]); } } From 1f9bcf181fd6d8e5cb1d5cce0ac30dbfaeef3b2f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 6 Apr 2019 18:17:49 +0100 Subject: [PATCH 42/44] Fix typo. Change casting to use as. --- src/summary_statistics/means.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 8b187b37..de65a801 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -2,7 +2,7 @@ use super::SummaryStatisticsExt; use crate::errors::EmptyInput; use ndarray::{ArrayBase, Data, Dimension}; use num_integer::IterBinomial; -use num_traits::{Float, FromPrimitive, ToPrimitive, Zero}; +use num_traits::{Float, FromPrimitive, Zero}; use std::ops::{Add, Div}; impl SummaryStatisticsExt for ArrayBase @@ -127,9 +127,7 @@ where { let n_elements = A::from_usize(a.len()).expect("Converting number of elements to `A` must not fail"); - let order = order - .to_i32() - .expect("Moment order must not overflow `i32`."); + let order = order as i32; // When k=0, we are raising each element to the 0th power // No need to waste CPU cycles going through the array @@ -317,8 +315,7 @@ mod tests { let order = 10; let central_moments = a.central_moments(order).unwrap(); for i in 0..=order { - assert_eq!(a.central_moment(i).unwrap(), central_moments[i as usiz - e]); + assert_eq!(a.central_moment(i).unwrap(), central_moments[i as usize]); } } From fb65893479c2069e9f424ce5da86fd1556a345df Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 6 Apr 2019 18:18:27 +0100 Subject: [PATCH 43/44] Fix typo. Change casting to use as. --- src/summary_statistics/means.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index de65a801..68de9b29 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -321,7 +321,8 @@ mod tests { #[test] fn test_kurtosis_and_skewness_is_none_with_empty_array_of_floats() { - let a: Array1 = array![]; + let a: Array1 = array![] + ; assert_eq!(a.skewness(), Err(EmptyInput)); assert_eq!(a.kurtosis(), Err(EmptyInput)); } From c3a166e1299350d5f373667ace93bf3610ca527b Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 6 Apr 2019 18:18:33 +0100 Subject: [PATCH 44/44] Fix typo. Change casting to use as. --- src/summary_statistics/means.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs index 68de9b29..de65a801 100644 --- a/src/summary_statistics/means.rs +++ b/src/summary_statistics/means.rs @@ -321,8 +321,7 @@ mod tests { #[test] fn test_kurtosis_and_skewness_is_none_with_empty_array_of_floats() { - let a: Array1 = array![] - ; + let a: Array1 = array![]; assert_eq!(a.skewness(), Err(EmptyInput)); assert_eq!(a.kurtosis(), Err(EmptyInput)); }