diff --git a/Cargo.toml b/Cargo.toml
index 57795ce8..645c41c0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,6 +17,7 @@ categories = ["data-structures", "science"]
[dependencies]
ndarray = "0.12.1"
noisy_float = "0.1.8"
+num-integer = "0.1"
num-traits = "0.2"
rand = "0.6"
itertools = { version = "0.7.0", default-features = false }
diff --git a/src/lib.rs b/src/lib.rs
index 9cf586f1..37499368 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -28,6 +28,7 @@
extern crate itertools;
extern crate ndarray;
extern crate noisy_float;
+extern crate num_integer;
extern crate num_traits;
extern crate rand;
diff --git a/src/summary_statistics/means.rs b/src/summary_statistics/means.rs
index a2fed054..de65a801 100644
--- a/src/summary_statistics/means.rs
+++ b/src/summary_statistics/means.rs
@@ -1,6 +1,7 @@
use super::SummaryStatisticsExt;
use crate::errors::EmptyInput;
use ndarray::{ArrayBase, Data, Dimension};
+use num_integer::IterBinomial;
use num_traits::{Float, FromPrimitive, Zero};
use std::ops::{Add, Div};
@@ -36,15 +37,161 @@ where
{
self.map(|x| x.ln()).mean().map(|x| x.exp())
}
+
+ fn kurtosis(&self) -> Result
+ where
+ A: Float + FromPrimitive,
+ {
+ let central_moments = self.central_moments(4)?;
+ Ok(central_moments[4] / central_moments[2].powi(2))
+ }
+
+ fn skewness(&self) -> Result
+ where
+ A: Float + FromPrimitive,
+ {
+ let central_moments = self.central_moments(3)?;
+ Ok(central_moments[3] / central_moments[2].sqrt().powi(3))
+ }
+
+ fn central_moment(&self, order: u16) -> Result
+ where
+ A: Float + FromPrimitive,
+ {
+ if self.is_empty() {
+ return Err(EmptyInput);
+ }
+ match order {
+ 0 => Ok(A::one()),
+ 1 => Ok(A::zero()),
+ n => {
+ let mean = self.mean().unwrap();
+ let shifted_array = self.mapv(|x| x - mean);
+ let shifted_moments = moments(shifted_array, n);
+ let correction_term = -shifted_moments[1];
+
+ let coefficients = central_moment_coefficients(&shifted_moments);
+ Ok(horner_method(coefficients, correction_term))
+ }
+ }
+ }
+
+ fn central_moments(&self, order: u16) -> Result, EmptyInput>
+ where
+ A: Float + FromPrimitive,
+ {
+ if self.is_empty() {
+ return Err(EmptyInput);
+ }
+ match order {
+ 0 => Ok(vec![A::one()]),
+ 1 => Ok(vec![A::one(), A::zero()]),
+ n => {
+ // We only perform these operations once, and then reuse their
+ // result to compute all the required moments
+ let mean = self.mean().unwrap();
+ let shifted_array = self.mapv(|x| x - mean);
+ let shifted_moments = moments(shifted_array, n);
+ let correction_term = -shifted_moments[1];
+
+ let mut central_moments = vec![A::one(), A::zero()];
+ for k in 2..=n {
+ let coefficients =
+ central_moment_coefficients(&shifted_moments[..=(k as usize)]);
+ let central_moment = horner_method(coefficients, correction_term);
+ central_moments.push(central_moment)
+ }
+ Ok(central_moments)
+ }
+ }
+ }
+}
+
+/// Returns a vector containing all moments of the array elements up to
+/// *order*, where the *p*-th moment is defined as:
+///
+/// ```text
+/// 1 n
+/// ― ∑ xᵢᵖ
+/// n i=1
+/// ```
+///
+/// The returned moments are ordered by power magnitude: 0th moment, 1st moment, etc.
+///
+/// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
+fn moments(a: ArrayBase, order: u16) -> Vec
+where
+ A: Float + FromPrimitive,
+ S: Data,
+ D: Dimension,
+{
+ let n_elements =
+ A::from_usize(a.len()).expect("Converting number of elements to `A` must not fail");
+ let order = order as i32;
+
+ // When k=0, we are raising each element to the 0th power
+ // No need to waste CPU cycles going through the array
+ let mut moments = vec![A::one()];
+
+ if order >= 1 {
+ // When k=1, we don't need to raise elements to the 1th power (identity)
+ moments.push(a.sum() / n_elements)
+ }
+
+ for k in 2..=order {
+ moments.push(a.map(|x| x.powi(k)).sum() / n_elements)
+ }
+ moments
+}
+
+/// Returns the coefficients in the polynomial expression to compute the *p*th
+/// central moment as a function of the sample mean.
+///
+/// It takes as input all moments up to order *p*, ordered by power magnitude - *p* is
+/// inferred to be the length of the *moments* array.
+fn central_moment_coefficients(moments: &[A]) -> Vec
+where
+ A: Float + FromPrimitive,
+{
+ let order = moments.len();
+ IterBinomial::new(order)
+ .zip(moments.iter().rev())
+ .map(|(binom, &moment)| A::from_usize(binom).unwrap() * moment)
+ .collect()
+}
+
+/// Uses [Horner's method] to evaluate a polynomial with a single indeterminate.
+///
+/// Coefficients are expected to be sorted by ascending order
+/// with respect to the indeterminate's exponent.
+///
+/// If the array is empty, `A::zero()` is returned.
+///
+/// Horner's method can evaluate a polynomial of order *n* with a single indeterminate
+/// using only *n-1* multiplications and *n-1* sums - in terms of number of operations,
+/// this is an optimal algorithm for polynomial evaluation.
+///
+/// [Horner's method]: https://en.wikipedia.org/wiki/Horner%27s_method
+fn horner_method(coefficients: Vec, indeterminate: A) -> A
+where
+ A: Float,
+{
+ let mut result = A::zero();
+ for coefficient in coefficients.into_iter().rev() {
+ result = coefficient + indeterminate * result
+ }
+ result
}
#[cfg(test)]
mod tests {
use super::SummaryStatisticsExt;
use crate::errors::EmptyInput;
- use approx::abs_diff_eq;
- use ndarray::{array, Array1};
+ use approx::assert_abs_diff_eq;
+ use ndarray::{array, Array, Array1};
+ use ndarray_rand::RandomExt;
use noisy_float::types::N64;
+ use rand::distributions::Uniform;
use std::f64;
#[test]
@@ -90,16 +237,116 @@ mod tests {
// Computed using SciPy
let expected_geometric_mean = 0.4345897639796527;
- abs_diff_eq!(a.mean().unwrap(), expected_mean, epsilon = f64::EPSILON);
- abs_diff_eq!(
+ assert_abs_diff_eq!(a.mean().unwrap(), expected_mean, epsilon = 1e-9);
+ assert_abs_diff_eq!(
a.harmonic_mean().unwrap(),
expected_harmonic_mean,
- epsilon = f64::EPSILON
+ epsilon = 1e-7
);
- abs_diff_eq!(
+ assert_abs_diff_eq!(
a.geometric_mean().unwrap(),
expected_geometric_mean,
- epsilon = f64::EPSILON
+ epsilon = 1e-12
);
}
+
+ #[test]
+ fn test_central_moment_with_empty_array_of_floats() {
+ let a: Array1 = array![];
+ for order in 0..=3 {
+ assert_eq!(a.central_moment(order), Err(EmptyInput));
+ assert_eq!(a.central_moments(order), Err(EmptyInput));
+ }
+ }
+
+ #[test]
+ fn test_zeroth_central_moment_is_one() {
+ let n = 50;
+ let bound: f64 = 200.;
+ let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs()));
+ assert_eq!(a.central_moment(0).unwrap(), 1.);
+ }
+
+ #[test]
+ fn test_first_central_moment_is_zero() {
+ let n = 50;
+ let bound: f64 = 200.;
+ let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs()));
+ assert_eq!(a.central_moment(1).unwrap(), 0.);
+ }
+
+ #[test]
+ fn test_central_moments() {
+ let a: Array1 = array![
+ 0.07820559, 0.5026185, 0.80935324, 0.39384033, 0.9483038, 0.62516215, 0.90772261,
+ 0.87329831, 0.60267392, 0.2960298, 0.02810356, 0.31911966, 0.86705506, 0.96884832,
+ 0.2222465, 0.42162446, 0.99909868, 0.47619762, 0.91696979, 0.9972741, 0.09891734,
+ 0.76934818, 0.77566862, 0.7692585, 0.2235759, 0.44821286, 0.79732186, 0.04804275,
+ 0.87863238, 0.1111003, 0.6653943, 0.44386445, 0.2133176, 0.39397086, 0.4374617,
+ 0.95896624, 0.57850146, 0.29301706, 0.02329879, 0.2123203, 0.62005503, 0.996492,
+ 0.5342986, 0.97822099, 0.5028445, 0.6693834, 0.14256682, 0.52724704, 0.73482372,
+ 0.1809703,
+ ];
+ // Computed using scipy.stats.moment
+ let expected_moments = vec![
+ 1.,
+ 0.,
+ 0.09339920262960291,
+ -0.0026849636727735186,
+ 0.015403769257729755,
+ -0.001204176487006564,
+ 0.002976822584939186,
+ ];
+ for (order, expected_moment) in expected_moments.iter().enumerate() {
+ assert_abs_diff_eq!(
+ a.central_moment(order as u16).unwrap(),
+ expected_moment,
+ epsilon = 1e-8
+ );
+ }
+ }
+
+ #[test]
+ fn test_bulk_central_moments() {
+ // Test that the bulk method is coherent with the non-bulk method
+ let n = 50;
+ let bound: f64 = 200.;
+ let a = Array::random(n, Uniform::new(-bound.abs(), bound.abs()));
+ let order = 10;
+ let central_moments = a.central_moments(order).unwrap();
+ for i in 0..=order {
+ assert_eq!(a.central_moment(i).unwrap(), central_moments[i as usize]);
+ }
+ }
+
+ #[test]
+ fn test_kurtosis_and_skewness_is_none_with_empty_array_of_floats() {
+ let a: Array1 = array![];
+ assert_eq!(a.skewness(), Err(EmptyInput));
+ assert_eq!(a.kurtosis(), Err(EmptyInput));
+ }
+
+ #[test]
+ fn test_kurtosis_and_skewness() {
+ let a: Array1 = array![
+ 0.33310096, 0.98757449, 0.9789796, 0.96738114, 0.43545674, 0.06746873, 0.23706562,
+ 0.04241815, 0.38961714, 0.52421271, 0.93430327, 0.33911604, 0.05112372, 0.5013455,
+ 0.05291507, 0.62511183, 0.20749633, 0.22132433, 0.14734804, 0.51960608, 0.00449208,
+ 0.4093339, 0.2237519, 0.28070469, 0.7887231, 0.92224523, 0.43454188, 0.18335111,
+ 0.08646856, 0.87979847, 0.25483457, 0.99975627, 0.52712442, 0.41163279, 0.85162594,
+ 0.52618733, 0.75815023, 0.30640695, 0.14205781, 0.59695813, 0.851331, 0.39524328,
+ 0.73965373, 0.4007615, 0.02133069, 0.92899207, 0.79878191, 0.38947334, 0.22042183,
+ 0.77768353,
+ ];
+ // Computed using scipy.stats.kurtosis(a, fisher=False)
+ let expected_kurtosis = 1.821933711687523;
+ // Computed using scipy.stats.skew
+ let expected_skewness = 0.2604785422878771;
+
+ let kurtosis = a.kurtosis().unwrap();
+ let skewness = a.skewness().unwrap();
+
+ assert_abs_diff_eq!(kurtosis, expected_kurtosis, epsilon = 1e-12);
+ assert_abs_diff_eq!(skewness, expected_skewness, epsilon = 1e-8);
+ }
}
diff --git a/src/summary_statistics/mod.rs b/src/summary_statistics/mod.rs
index 30d20b89..e0a71399 100644
--- a/src/summary_statistics/mod.rs
+++ b/src/summary_statistics/mod.rs
@@ -61,6 +61,85 @@ where
fn geometric_mean(&self) -> Result
where
A: Float + FromPrimitive;
+
+ /// Returns the [kurtosis] `Kurt[X]` of all elements in the array:
+ ///
+ /// ```text
+ /// Kurt[X] = μ₄ / σ⁴
+ /// ```
+ ///
+ /// where μ₄ is the fourth central moment and σ is the standard deviation of
+ /// the elements in the array.
+ ///
+ /// This is sometimes referred to as _Pearson's kurtosis_. Fisher's kurtosis can be
+ /// computed by subtracting 3 from Pearson's kurtosis.
+ ///
+ /// If the array is empty, `Err(EmptyInput)` is returned.
+ ///
+ /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
+ ///
+ /// [kurtosis]: https://en.wikipedia.org/wiki/Kurtosis
+ fn kurtosis(&self) -> Result
+ where
+ A: Float + FromPrimitive;
+
+ /// Returns the [Pearson's moment coefficient of skewness] γ₁ of all elements in the array:
+ ///
+ /// ```text
+ /// γ₁ = μ₃ / σ³
+ /// ```
+ ///
+ /// where μ₃ is the third central moment and σ is the standard deviation of
+ /// the elements in the array.
+ ///
+ /// If the array is empty, `Err(EmptyInput)` is returned.
+ ///
+ /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
+ ///
+ /// [Pearson's moment coefficient of skewness]: https://en.wikipedia.org/wiki/Skewness
+ fn skewness(&self) -> Result
+ where
+ A: Float + FromPrimitive;
+
+ /// Returns the *p*-th [central moment] of all elements in the array, μₚ:
+ ///
+ /// ```text
+ /// 1 n
+ /// μₚ = ― ∑ (xᵢ-x̅)ᵖ
+ /// n i=1
+ /// ```
+ ///
+ /// If the array is empty, `Err(EmptyInput)` is returned.
+ ///
+ /// The *p*-th central moment is computed using a corrected two-pass algorithm (see Section 3.5
+ /// in [Pébay et al., 2016]). Complexity is *O(np)* when *n >> p*, *p > 1*.
+ ///
+ /// **Panics** if `A::from_usize()` fails to convert the number of elements
+ /// in the array or if `order` overflows `i32`.
+ ///
+ /// [central moment]: https://en.wikipedia.org/wiki/Central_moment
+ /// [Pébay et al., 2016]: https://www.osti.gov/pages/servlets/purl/1427275
+ fn central_moment(&self, order: u16) -> Result
+ where
+ A: Float + FromPrimitive;
+
+ /// Returns the first *p* [central moments] of all elements in the array, see [central moment]
+ /// for more details.
+ ///
+ /// If the array is empty, `Err(EmptyInput)` is returned.
+ ///
+ /// This method reuses the intermediate steps for the *k*-th moment to compute the *(k+1)*-th,
+ /// being thus more efficient than repeated calls to [central moment] if the computation
+ /// of central moments of multiple orders is required.
+ ///
+ /// **Panics** if `A::from_usize()` fails to convert the number of elements
+ /// in the array or if `order` overflows `i32`.
+ ///
+ /// [central moments]: https://en.wikipedia.org/wiki/Central_moment
+ /// [central moment]: #tymethod.central_moment
+ fn central_moments(&self, order: u16) -> Result, EmptyInput>
+ where
+ A: Float + FromPrimitive;
}
mod means;