rust-ml · bytesnake · Apr 25, 2021 · Apr 20, 2021 · Apr 22, 2021 · Apr 22, 2021
diff --git a/Cargo.toml b/Cargo.toml
@@ -58,7 +58,7 @@ features = ["cblas"]
 
 [dev-dependencies]
 ndarray-rand = "0.13"
-approx = { version = "0.4", default-features = false, features = ["std"] }
+approx = "0.4"
 
 linfa-datasets = { path = "datasets", features = ["winequality", "iris", "diabetes"] }
 

diff --git a/algorithms/linfa-clustering/Cargo.toml b/algorithms/linfa-clustering/Cargo.toml
@@ -30,14 +30,13 @@ features = ["std", "derive"]
 [dependencies]
 ndarray = { version = "0.14", features = ["rayon", "approx"]}
 ndarray-linalg = "0.13"
-lax = "0.1"
 ndarray-rand = "0.13"
 ndarray-stats = "0.4"
 num-traits = "0.2"
 rand_isaac = "0.3"
+partitions = "0.2.4"
 
 linfa = { version = "0.3.1", path = "../.." }
-partitions = "0.2.4"
 
 [dev-dependencies]
 ndarray-npy = { version = "0.7", default-features = false }

diff --git a/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs b/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
@@ -119,7 +119,7 @@ impl<F: Float> Clone for GaussianMixtureModel<F> {
     }
 }
 
-impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
+impl<F: Float> GaussianMixtureModel<F> {
     fn new<D: Data<Elem = F>, R: Rng + SeedableRng + Clone, T>(
         hyperparameters: &GmmHyperParams<F, R>,
         dataset: &DatasetBase<ArrayBase<D, Ix2>, T>,
@@ -138,7 +138,7 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
                     .fit(&dataset)?;
                 let mut resp = Array::<F, Ix2>::zeros((n_samples, hyperparameters.n_clusters()));
                 for (k, idx) in model.predict(dataset.records()).iter().enumerate() {
-                    resp[[k, *idx]] = F::from(1.).unwrap();
+                    resp[[k, *idx]] = F::cast(1.);
                 }
                 resp
             }
@@ -150,7 +150,7 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
                 );
                 let totals = &resp.sum_axis(Axis(1)).insert_axis(Axis(0));
                 resp = (resp.reversed_axes() / totals).reversed_axes();
-                resp.mapv(|v| F::from(v).unwrap())
+                resp.mapv(|v| F::cast(v))
             }
         };
 
@@ -162,7 +162,7 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
             hyperparameters.covariance_type(),
             hyperparameters.reg_covariance(),
         )?;
-        weights /= F::from(n_samples).unwrap();
+        weights /= F::cast(n_samples);
 
         // GmmCovarType = full
         let precisions_chol = Self::compute_precisions_cholesky_full(&covariances)?;
@@ -179,7 +179,7 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
     }
 }
 
-impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
+impl<F: Float> GaussianMixtureModel<F> {
     pub fn params(n_clusters: usize) -> GmmHyperParams<F, Isaac64Rng> {
         GmmHyperParams::new(n_clusters)
     }
@@ -211,7 +211,7 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
         reg_covar: F,
     ) -> Result<(Array1<F>, Array2<F>, Array3<F>)> {
         let nk = resp.sum_axis(Axis(0));
-        if nk.min().unwrap() < &(F::from(10.).unwrap() * F::epsilon()) {
+        if nk.min().unwrap() < &(F::cast(10.) * F::epsilon()) {
             return Err(GmmError::EmptyCluster(format!(
                 "Cluster #{} has no more point. Consider decreasing number of clusters or change initialization.",
                 nk.argmin().unwrap() + 1
@@ -253,9 +253,12 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
         let n_features = covariances.shape()[1];
         let mut precisions_chol = Array::zeros((n_clusters, n_features, n_features));
         for (k, covariance) in covariances.outer_iter().enumerate() {
-            let cov_chol = covariance.cholesky(UPLO::Lower)?;
-            let sol =
-                cov_chol.solve_triangular(UPLO::Lower, Diag::NonUnit, &Array::eye(n_features))?;
+            let sol = F::map_cauchy_bound_view(covariance, |x| {
+                let decomp = x.cholesky(UPLO::Lower).unwrap();
+
+                decomp.solve_triangular(UPLO::Lower, Diag::NonUnit, &Array::eye(n_features)).unwrap()
+            });
+
             precisions_chol.slice_mut(s![k, .., ..]).assign(&sol.t());
         }
         Ok(precisions_chol)
@@ -296,12 +299,12 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
         let n_samples = observations.nrows();
         let (weights, means, covariances) = Self::estimate_gaussian_parameters(
             &observations,
-            &log_resp.mapv(|v| Scalar::exp(v)),
+            &log_resp.mapv(|x| x.exp()),
             &self.covar_type,
             reg_covar,
         )?;
         self.means = means;
-        self.weights = weights / F::from(n_samples).unwrap();
+        self.weights = weights / F::cast(n_samples);
         // GmmCovarType = Full()
         self.precisions_chol = Self::compute_precisions_cholesky_full(&covariances)?;
         Ok(())
@@ -325,9 +328,9 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
     ) -> (Array1<F>, Array2<F>) {
         let weighted_log_prob = self.estimate_weighted_log_prob(&observations);
         let log_prob_norm = weighted_log_prob
-            .mapv(|v| Scalar::exp(v))
+            .mapv(|x| x.exp())
             .sum_axis(Axis(1))
-            .mapv(|v| Scalar::ln(v));
+            .mapv(|x| x.ln());
         let log_resp = weighted_log_prob - log_prob_norm.to_owned().insert_axis(Axis(1));
         (log_prob_norm, log_resp)
     }
@@ -368,8 +371,8 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
                     .assign(&diff.mapv(|v| v * v).sum_axis(Axis(1)))
             });
         log_prob.mapv(|v| {
-            F::from(-0.5).unwrap()
-                * (v + F::from(n_features as f64 * f64::ln(2. * std::f64::consts::PI)).unwrap())
+            F::cast(-0.5)
+                * (v + F::cast(n_features as f64 * f64::ln(2. * std::f64::consts::PI)))
         }) + log_det
     }
 
@@ -384,16 +387,16 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
             .unwrap()
             .slice(s![.., ..; n_features+1])
             .to_owned()
-            .mapv(|v| Scalar::ln(v));
+            .mapv(|x| x.ln());
         log_diags.sum_axis(Axis(1))
     }
 
     fn estimate_log_weights(&self) -> Array1<F> {
-        self.weights().mapv(|v| Scalar::ln(v))
+        self.weights().mapv(|x| x.ln())
     }
 }
 
-impl<'a, F: Float + Lapack + Scalar, R: Rng + SeedableRng + Clone, D: Data<Elem = F>, T>
+impl<'a, F: Float, R: Rng + SeedableRng + Clone, D: Data<Elem = F>, T>
     Fit<'a, ArrayBase<D, Ix2>, T> for GmmHyperParams<F, R>
 {
     type Object = Result<GaussianMixtureModel<F>>;
@@ -420,7 +423,7 @@ impl<'a, F: Float + Lapack + Scalar, R: Rng + SeedableRng + Clone, D: Data<Elem
                 lower_bound =
                     GaussianMixtureModel::<F>::compute_lower_bound(&log_resp, log_prob_norm);
                 let change = lower_bound - prev_lower_bound;
-                if ndarray_rand::rand_distr::num_traits::Float::abs(change) < self.tolerance() {
+                if change.abs() < self.tolerance() {
                     converged_iter = Some(n_iter);
                     break;
                 }

diff --git a/src/correlation.rs b/src/correlation.rs
@@ -24,12 +24,14 @@ fn pearson_correlation<F: Float, D: Data<Elem = F>>(data: &ArrayBase<D, Ix2>) ->
 
     // center distribution by subtracting mean
     let mean = data.mean_axis(Axis(0)).unwrap();
+    //let std_deviation = mean.clone();
     let denoised = data - &mean.insert_axis(Axis(1)).t();
 
     // calculate the covariance matrix
-    let covariance = denoised.t().dot(&denoised) / F::from(nobservations - 1).unwrap();
+    let covariance = denoised.t().dot(&denoised) / F::cast(nobservations-1);
     // calculate the standard deviation vector
     let std_deviation = denoised.var_axis(Axis(0), F::one()).mapv(|x| x.sqrt());
+    //let std_deviation: Array1<F> = NdArrayCauchy::from_cauchy(std_deviation);
 
     // we will only save the upper triangular matrix as the diagonal is one and
     // the lower triangular is a mirror of the upper triangular part
@@ -98,7 +100,7 @@ fn p_values<F: Float, D: Data<Elem = F>>(
     }
 
     // divide by the number of iterations to re-scale range
-    p_values / F::from(num_iter).unwrap()
+    p_values / F::cast(num_iter)
 }
 
 /// Pearson Correlation Coefficients (or Bivariate Coefficients)

diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs
@@ -4,11 +4,13 @@
 //! functionality.
 use ndarray::{
     Array1, Array2, ArrayBase, ArrayView, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2,
-    Axis, CowArray, Ix2, Ix3, NdFloat, OwnedRepr,
+    Axis, CowArray, Ix2, Ix3, OwnedRepr, ScalarOperand, Dimension, ViewRepr,
 };
+use ndarray_linalg::{Scalar, Lapack};
 use num_traits::{AsPrimitive, FromPrimitive, NumAssignOps, Signed};
 use rand::distributions::uniform::SampleUniform;
 
+use std::fmt;
 use std::cmp::{Ordering, PartialOrd};
 use std::collections::{HashMap, HashSet};
 use std::hash::Hash;
@@ -31,10 +33,15 @@ pub mod multi_target_model;
 /// implement them for 32bit and 64bit floating points. They are used in records of a dataset and, for
 /// regression task, in the targets as well.
 pub trait Float:
-    NdFloat
-    + FromPrimitive
-    + Signed
+    FromPrimitive
+    + num_traits::Float
+    + PartialOrd
+    + Sync
+    + Send
     + Default
+    + fmt::Display
+    + fmt::Debug
+    + Signed
     + Sum
     + NumAssignOps
     + AsPrimitive<usize>
@@ -43,10 +50,113 @@ pub trait Float:
     + for<'a> SubAssign<&'a Self>
     + for<'a> DivAssign<&'a Self>
     + SampleUniform
+    + ScalarOperand
 {
+    type F2: Scalar + Lapack + Float;
+
+    fn self_to_float(self) -> Self::F2;
+    fn float_to_self(x: Self::F2) -> Self;
+
+    fn ndarray_to_float<I: Dimension>(x: ArrayBase<OwnedRepr<Self>, I>) -> ArrayBase<OwnedRepr<Self::F2>, I>;
+
+    fn ndarray_to_self<I: Dimension>(x: ArrayBase<OwnedRepr<Self::F2>, I>) -> ArrayBase<OwnedRepr<Self>, I>;
+
+    fn ndarray_to_float_view<'a, I: Dimension>(x: ArrayBase<ViewRepr<&'a Self>, I>) -> ArrayBase<ViewRepr<&'a Self::F2>, I>;
+
+    fn ndarray_to_self_view<'a, I: Dimension>(x: ArrayBase<ViewRepr<&'a Self::F2>, I>) -> ArrayBase<ViewRepr<&'a Self>, I>;
+
+    fn cast<T: AsPrimitive<f64>>(x: T) -> Self {
+        Self::from(x.as_()).unwrap()
+    }
+
+    fn map_cauchy_bound<G, I, I2>(x: ArrayBase<OwnedRepr<Self>, I>, fnc: G) -> ArrayBase<OwnedRepr<Self>, I2>
+    where
+        G: Fn(ArrayBase<OwnedRepr<Self::F2>, I>) -> ArrayBase<OwnedRepr<Self::F2>, I2>,
+        I: Dimension,
+        I2: Dimension
+    {
+        let x = Self::ndarray_to_float(x);
+        let x = fnc(x);
+
+        Self::ndarray_to_self(x)
+    }
+
+    fn map_cauchy_bound_view<'a, G, I, I2>(x: ArrayBase<ViewRepr<&'a Self>, I>, fnc: G) -> ArrayBase<OwnedRepr<Self>, I2>
+    where
+        G: Fn(ArrayBase<ViewRepr<&'a Self::F2>, I>) -> ArrayBase<OwnedRepr<Self::F2>, I2>,
+        I: Dimension,
+        I2: Dimension
+    {
+        let x = Self::ndarray_to_float_view(x);
+        let x = fnc(x);
+
+        Self::ndarray_to_self(x)
+    }
+}
+
+impl Float for f32 {
+    type F2 = Self;
+
+    fn self_to_float(self) -> Self::F2 {
+        self
+    }
+
+    fn float_to_self(x: Self::F2) -> Self {
+        x
+    }
+
+    fn ndarray_to_float<I: Dimension>(x: ArrayBase<OwnedRepr<f32>, I>) -> ArrayBase<OwnedRepr<f32>, I>
+    {
+            x
+    }
+
+    fn ndarray_to_self<I: Dimension>(x: ArrayBase<OwnedRepr<f32>, I>) -> ArrayBase<OwnedRepr<f32>, I>
+    {
+        x
+    }
+
+    fn ndarray_to_float_view<'a, I: Dimension>(x: ArrayBase<ViewRepr<&'a f32>, I>) -> ArrayBase<ViewRepr<&'a f32>, I>
+    {
+            x
+    }
+
+    fn ndarray_to_self_view<'a, I: Dimension>(x: ArrayBase<ViewRepr<&'a f32>, I>) -> ArrayBase<ViewRepr<&'a f32>, I>
+    {
+        x
+    }
+}
+
+impl Float for f64 {
+    type F2 = Self;
+
+    fn self_to_float(self) -> Self::F2 {
+        self
+    }
+
+    fn float_to_self(x: Self::F2) -> Self {
+        x
+    }
+
+    fn ndarray_to_float<I: Dimension>(x: ArrayBase<OwnedRepr<f64>, I>) -> ArrayBase<OwnedRepr<f64>, I>
+    {
+            x
+    }
+
+    fn ndarray_to_self<I: Dimension>(x: ArrayBase<OwnedRepr<f64>, I>) -> ArrayBase<OwnedRepr<f64>, I>
+    {
+        x
+    }
+
+    fn ndarray_to_float_view<'a, I: Dimension>(x: ArrayBase<ViewRepr<&'a f64>, I>) -> ArrayBase<ViewRepr<&'a f64>, I>
+    {
+            x
+    }
+
+    fn ndarray_to_self_view<'a, I: Dimension>(x: ArrayBase<ViewRepr<&'a f64>, I>) -> ArrayBase<ViewRepr<&'a f64>, I>
+    {
+        x
+    }
 }
-impl Float for f32 {}
-impl Float for f64 {}
 
 /// Discrete labels
 ///

diff --git a/src/metrics_clustering.rs b/src/metrics_clustering.rs
@@ -43,7 +43,7 @@ impl<F: Float> DistanceCount<F> {
 
     /// Divides the total distance from the sample to this cluster by the number of samples in the cluster
     pub fn mean_distance(&self) -> F {
-        self.total_distance / F::from(self.count).unwrap()
+        self.total_distance / F::cast(self.count)
     }
 
     /// To be used in the cluster in which the sample is located. The distance from the sample to itself
@@ -53,7 +53,7 @@ impl<F: Float> DistanceCount<F> {
         if self.count == 1 {
             return F::zero();
         }
-        self.total_distance / F::from(self.count - 1).unwrap()
+        self.total_distance / F::cast(self.count - 1)
     }
 
     /// adds the distance of `other_sample` from `eval_sample` to the total distance of `eval_sample` from the current cluster
@@ -135,7 +135,7 @@ impl<'a, F: Float, L: 'a + Label, D: Data<Elem = F>, T: AsTargets<Elem = L> + La
                 }
             })
             .sum::<F>();
-        let score = score / F::from(self.records().nsamples()).unwrap();
+        let score = score / F::cast(self.records().nsamples());
         Ok(score)
     }
 }

diff --git a/src/metrics_regression.rs b/src/metrics_regression.rs
@@ -72,7 +72,7 @@ pub trait SingleTargetRegression<F: Float, T: AsTargets<Elem = F>>: AsTargets<El
         abs_error.sort_by(|a, b| a.partial_cmp(&b).unwrap());
         let mid = abs_error.len() / 2;
         if abs_error.len() % 2 == 0 {
-            Ok((abs_error[mid - 1] + abs_error[mid]) / F::from(2.0).unwrap())
+            Ok((abs_error[mid - 1] + abs_error[mid]) / F::cast(2.0))
         } else {
             Ok(abs_error[mid])
         }
@@ -98,7 +98,7 @@ pub trait SingleTargetRegression<F: Float, T: AsTargets<Elem = F>>: AsTargets<El
                 / (single_target_compare_to
                     .mapv(|x| (x - mean) * (x - mean))
                     .sum()
-                    + F::from(1e-10).unwrap()))
+                    + F::cast(1e-10)))
     }
 
     /// Same as R-Squared but with biased variance
@@ -116,7 +116,7 @@ pub trait SingleTargetRegression<F: Float, T: AsTargets<Elem = F>>: AsTargets<El
                 / (single_target_compare_to
                     .mapv(|x| (x - mean) * (x - mean))
                     .sum()
-                    + F::from(1e-10).unwrap()))
+                    + F::cast(1e-10)))
     }
 }