rust-ndarray · bytesnake · Apr 8, 2021 · Apr 8, 2021 · Apr 8, 2021 · Apr 14, 2021
diff --git a/ndarray-linalg/Cargo.toml b/ndarray-linalg/Cargo.toml
@@ -49,7 +49,9 @@ default-features = false
 paste = "1.0.5"
 criterion = "0.3.4"
 # Keep the same version as ndarray's dependency!
-approx = { version = "0.4.0", features = ["num-complex"] }
+approx = { version = "0.4", features = ["num-complex"] }
+rand_xoshiro = "0.6"
+ndarray-rand = "0.14"
 
 [[bench]]
 name = "truncated_eig"

diff --git a/ndarray-linalg/src/lobpcg/eig.rs b/ndarray-linalg/src/lobpcg/eig.rs
@@ -1,9 +1,10 @@
+//! Truncated eigenvalue decomposition
+//!
+
 use super::lobpcg::{lobpcg, LobpcgResult, Order};
 use crate::{generate, Scalar};
 use lax::Lapack;
 
-///! Implements truncated eigenvalue decomposition
-///
 use ndarray::prelude::*;
 use ndarray::stack;
 use ndarray::ScalarOperand;
@@ -15,6 +16,23 @@ use num_traits::{Float, NumCast};
 /// parameter like maximal iteration, precision and constraint matrix. Furthermore it allows
 /// conversion into a iterative solver where each iteration step yields a new eigenvalue/vector
 /// pair.
+///
+/// # Example
+///
+/// ```rust
+/// use ndarray::{arr1, Array2};
+/// use ndarray_linalg::{TruncatedEig, TruncatedOrder};
+///
+/// let diag = arr1(&[1., 2., 3., 4., 5.]);
+/// let a = Array2::from_diag(&diag);
+///
+/// let eig = TruncatedEig::new(a, TruncatedOrder::Largest)
+///    .precision(1e-5)
+///    .maxiter(500);
+///
+/// let res = eig.decompose(3);
+/// ```
+
 pub struct TruncatedEig<A: Scalar> {
     order: Order,
     problem: Array2<A>,
@@ -25,6 +43,11 @@ pub struct TruncatedEig<A: Scalar> {
 }
 
 impl<A: Float + Scalar + ScalarOperand + Lapack + PartialOrd + Default> TruncatedEig<A> {
+    /// Create a new truncated eigenproblem solver
+    ///
+    /// # Properties
+    /// * `problem`: problem matrix
+    /// * `order`: ordering of the eigenvalues with [TruncatedOrder](crate::TruncatedOrder)
     pub fn new(problem: Array2<A>, order: Order) -> TruncatedEig<A> {
         TruncatedEig {
             precision: 1e-5,
@@ -36,31 +59,71 @@ impl<A: Float + Scalar + ScalarOperand + Lapack + PartialOrd + Default> Truncate
         }
     }
 
+    /// Set desired precision
+    ///
+    /// This argument specifies the desired precision, which is passed to the LOBPCG solver. It
+    /// controls at which point the opimization of each eigenvalue is stopped. The precision is
+    /// global and applied to all eigenvalues with respect to their L2 norm.
+    ///
+    /// If the precision can't be reached and the maximum number of iteration is reached, then an
+    /// error is returned in [LobpcgResult](crate::lobpcg::LobpcgResult).
     pub fn precision(mut self, precision: f32) -> Self {
         self.precision = precision;
 
         self
     }
 
+    /// Set the maximal number of iterations
+    ///
+    /// The LOBPCG is an iterative approach to eigenproblems and stops when this maximum
+    /// number of iterations are reached.
     pub fn maxiter(mut self, maxiter: usize) -> Self {
         self.maxiter = maxiter;
 
         self
     }
 
+    /// Construct a solution, which is orthogonal to this
+    ///
+    /// If a number of eigenvectors are already known, then this function can be used to construct
+    /// a orthogonal subspace. Also used with an iterative approach.
     pub fn orthogonal_to(mut self, constraints: Array2<A>) -> Self {
         self.constraints = Some(constraints);
 
         self
     }
 
+    /// Apply a preconditioner
+    ///
+    /// A preconditioning matrix can speed up the solving process by improving the spectral
+    /// distribution of the eigenvalues. It requires prior knowledge of the problem.
     pub fn precondition_with(mut self, preconditioner: Array2<A>) -> Self {
         self.preconditioner = Some(preconditioner);
 
         self
     }
 
-    // calculate the eigenvalues decompose
+    /// Calculate the eigenvalue decomposition
+    ///
+    /// # Parameters
+    ///
+    ///  * `num`: number of eigenvalues ordered by magnitude
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use ndarray::{arr1, Array2};
+    /// use ndarray_linalg::{TruncatedEig, TruncatedOrder};
+    ///
+    /// let diag = arr1(&[1., 2., 3., 4., 5.]);
+    /// let a = Array2::from_diag(&diag);
+    ///
+    /// let eig = TruncatedEig::new(a, TruncatedOrder::Largest)
+    ///    .precision(1e-5)
+    ///    .maxiter(500);
+    ///
+    /// let res = eig.decompose(3);
+    /// ```
     pub fn decompose(&self, num: usize) -> LobpcgResult<A> {
         let x: Array2<f64> = generate::random((self.problem.len_of(Axis(0)), num));
         let x = x.mapv(|x| NumCast::from(x).unwrap());
@@ -104,10 +167,30 @@ impl<A: Float + Scalar + ScalarOperand + Lapack + PartialOrd + Default> IntoIter
     }
 }
 
-/// Truncate eigenproblem iterator
+/// Truncated eigenproblem iterator
 ///
 /// This wraps a truncated eigenproblem and provides an iterator where each step yields a new
 /// eigenvalue/vector pair. Useful for generating pairs until a certain condition is met.
+///
+/// # Example
+///
+/// ```rust
+/// use ndarray::{arr1, Array2};
+/// use ndarray_linalg::{TruncatedEig, TruncatedOrder};
+///
+/// let diag = arr1(&[1., 2., 3., 4., 5.]);
+/// let a = Array2::from_diag(&diag);
+///
+/// let teig = TruncatedEig::new(a, TruncatedOrder::Largest)
+///     .precision(1e-5)
+///     .maxiter(500);
+///
+/// // solve eigenproblem until eigenvalues get smaller than 0.5
+/// let res = teig.into_iter()
+///     .take_while(|x| x.0[0] > 0.5)
+///     .flat_map(|x| x.0.to_vec())
+///     .collect::<Vec<_>>();
+/// ```
 pub struct TruncatedEigIterator<A: Scalar> {
     step_size: usize,
     remaining: usize,

diff --git a/ndarray-linalg/src/lobpcg/lobpcg.rs b/ndarray-linalg/src/lobpcg/lobpcg.rs
@@ -1,7 +1,7 @@
 ///! Locally Optimal Block Preconditioned Conjugated
 ///!
 ///! This module implements the Locally Optimal Block Preconditioned Conjugated (LOBPCG) algorithm,
-///which can be used as a solver for large symmetric positive definite eigenproblems.
+///which can be used as a solver for large symmetric eigenproblems.
 use crate::error::{LinalgError, Result};
 use crate::{cholesky::*, close_l2, eigh::*, norm::*, triangular::*};
 use cauchy::Scalar;

diff --git a/ndarray-linalg/src/lobpcg/mod.rs b/ndarray-linalg/src/lobpcg/mod.rs
@@ -1,7 +1,23 @@
+//! Decomposition with LOBPCG
+//!
+//! Locally Optimal Block Preconditioned Conjugate Gradient (LOBPCG) is a matrix-free method for
+//! finding the large (or smallest) eigenvalues and the corresponding eigenvectors of a symmetric
+//! eigenvalue problem
+//! ```text
+//! A x = lambda x
+//! ```
+//! where A is symmetric and (x, lambda) the solution. It has the following advantages:
+//! * matrix free: does not require storing the coefficient matrix explicitely and only evaluates
+//! matrix-vector products.
+//! * factorization-free: does not require any matrix decomposition
+//! * linear-convergence: theoretically guaranteed and practically observed
+//!
+//! See also the wikipedia article at [LOBPCG](https://en.wikipedia.org/wiki/LOBPCG)
+//!
 mod eig;
 mod lobpcg;
 mod svd;
 
-pub use eig::TruncatedEig;
+pub use eig::{TruncatedEig, TruncatedEigIterator};
 pub use lobpcg::{lobpcg, LobpcgResult, Order as TruncatedOrder};
-pub use svd::TruncatedSvd;
+pub use svd::{MagnitudeCorrection, TruncatedSvd};
diff --git a/ndarray-linalg/src/lobpcg/svd.rs b/ndarray-linalg/src/lobpcg/svd.rs
@@ -24,7 +24,7 @@ pub struct TruncatedSvdResult<A> {
 }
 
 impl<A: Float + PartialOrd + DivAssign<A> + 'static + MagnitudeCorrection> TruncatedSvdResult<A> {
-    /// Returns singular values ordered by magnitude with indices.
+    /// Returns singular values ordered by magnitude with indices
     fn singular_values_with_indices(&self) -> (Array1<A>, Vec<usize>) {
         // numerate eigenvalues
         let mut a = self.eigvals.iter().enumerate().collect::<Vec<_>>();
@@ -90,7 +90,7 @@ impl<A: Float + PartialOrd + DivAssign<A> + 'static + MagnitudeCorrection> Trunc
 /// Truncated singular value decomposition
 ///
 /// Wraps the LOBPCG algorithm and provides convenient builder-pattern access to
-/// parameter like maximal iteration, precision and constraint matrix.
+/// parameter like maximal iteration, precision and contrain matrix.
 pub struct TruncatedSvd<A: Scalar> {
     order: Order,
     problem: Array2<A>,
@@ -99,6 +99,11 @@ pub struct TruncatedSvd<A: Scalar> {
 }
 
 impl<A: Float + Scalar + ScalarOperand + Lapack + PartialOrd + Default> TruncatedSvd<A> {
+    /// Create a new truncated SVD problem
+    ///
+    /// # Parameters
+    ///  * `problem`: rectangular matrix which is decomposed
+    ///  * `order`: whether to return large or small (close to zero) singular values
     pub fn new(problem: Array2<A>, order: Order) -> TruncatedSvd<A> {
         TruncatedSvd {
             precision: 1e-5,
@@ -108,19 +113,48 @@ impl<A: Float + Scalar + ScalarOperand + Lapack + PartialOrd + Default> Truncate
         }
     }
 
+    /// Set the required precision of the solution
+    ///
+    /// The precision is, in the context of SVD, the square-root precision of the underlying
+    /// eigenproblem solution. The eigenproblem-precision is used to check the L2 error of each
+    /// eigenvector and stops its optimization when the required precision is reached.
     pub fn precision(mut self, precision: f32) -> Self {
         self.precision = precision;
 
         self
     }
 
+    /// Set the maximal number of iterations
+    ///
+    /// The LOBPCG is an iterative approach to eigenproblems and stops when this maximum
+    /// number of iterations are reached.
     pub fn maxiter(mut self, maxiter: usize) -> Self {
         self.maxiter = maxiter;
 
         self
     }
 
-    // calculate the eigenvalue decomposition
+    /// Calculate the singular value decomposition
+    ///
+    /// # Parameters
+    ///
+    ///  * `num`: number of singular-value/vector pairs, ordered by magnitude
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use ndarray::{arr1, Array2};
+    /// use ndarray_linalg::{TruncatedSvd, TruncatedOrder};
+    ///
+    /// let diag = arr1(&[1., 2., 3., 4., 5.]);
+    /// let a = Array2::from_diag(&diag);
+    ///
+    /// let eig = TruncatedSvd::new(a, TruncatedOrder::Largest)
+    ///    .precision(1e-5)
+    ///    .maxiter(500);
+    ///
+    /// let res = eig.decompose(3);
+    /// ```
     pub fn decompose(self, num: usize) -> Result<TruncatedSvdResult<A>> {
         if num < 1 {
             panic!("The number of singular values to compute should be larger than zero!");
@@ -173,6 +207,11 @@ impl<A: Float + Scalar + ScalarOperand + Lapack + PartialOrd + Default> Truncate
     }
 }
 
+/// Magnitude Correction
+///
+/// The magnitude correction changes the cut-off point at which an eigenvector belongs to the
+/// null-space and its eigenvalue is therefore zero. The correction is multiplied by the floating
+/// point epsilon and therefore dependent on the floating type.
 pub trait MagnitudeCorrection {
     fn correction() -> Self;
 }
@@ -195,7 +234,12 @@ mod tests {
     use super::TruncatedSvd;
     use crate::{close_l2, generate};
 
-    use ndarray::{arr1, arr2, Array2};
+    use ndarray::{arr1, arr2, Array1, Array2};
+    use ndarray_rand::{rand_distr::StandardNormal, RandomExt};
+    use rand::SeedableRng;
+    use rand_xoshiro::Xoshiro256Plus;
+
+    use approx::assert_abs_diff_eq;
 
     #[test]
     fn test_truncated_svd() {
@@ -227,4 +271,58 @@ mod tests {
 
         close_l2(&a, &reconstructed, 1e-5);
     }
+
+    /// Eigenvalue structure in high dimensions
+    ///
+    /// This test checks that the eigenvalues are following the Marchensko-Pastur law. The data is
+    /// standard uniformly distributed (i.e. E(x) = 0, E^2(x) = 1) and we have twice the amount of
+    /// data when compared to features. The probability density of the eigenvalues should then follow
+    /// a special densitiy function, described by the Marchenko-Pastur law.
+    ///
+    /// See also https://en.wikipedia.org/wiki/Marchenko%E2%80%93Pastur_distribution
+    #[test]
+    fn test_marchenko_pastur() {
+        // create random number generator
+        let mut rng = Xoshiro256Plus::seed_from_u64(3);
+
+        // generate normal distribution random data with N >> p
+        let data = Array2::random_using((1000, 500), StandardNormal, &mut rng) / 1000f64.sqrt();
+
+        let res = TruncatedSvd::new(data, Order::Largest)
+            .decompose(500)
+            .unwrap();
+
+        let sv = res.values().mapv(|x: f64| x * x);
+
+        // we have created a random spectrum and can apply the Marchenko-Pastur law
+        // with variance 1 and p/n = 0.5
+        let (a, b) = (
+            1. * (1. - 0.5f64.sqrt()).powf(2.0),
+            1. * (1. + 0.5f64.sqrt()).powf(2.0),
+        );
+
+        // check that the spectrum has correct boundaries
+        assert_abs_diff_eq!(b, sv[0], epsilon = 0.1);
+        assert_abs_diff_eq!(a, sv[sv.len() - 1], epsilon = 0.1);
+
+        // estimate density empirical and compare with Marchenko-Pastur law
+        let mut i = 0;
+        'outer: for th in Array1::linspace(0.1, 2.8, 28).into_iter().rev() {
+            let mut count = 0;
+            while sv[i] >= *th {
+                count += 1;
+                i += 1;
+
+                if i == sv.len() {
+                    break 'outer;
+                }
+            }
+
+            let x = th + 0.05;
+            let mp_law = ((b - x) * (x - a)).sqrt() / std::f64::consts::PI / x;
+            let empirical = count as f64 / 500. / ((2.8 - 0.1) / 28.);
+
+            assert_abs_diff_eq!(mp_law, empirical, epsilon = 0.05);
+        }
+    }
 }