Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NdArray bump follow up #121

Merged
merged 7 commits into from
Apr 25, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ features = ["cblas"]

[dev-dependencies]
ndarray-rand = "0.13"
approx = { version = "0.4", default-features = false, features = ["std"] }
approx = "0.4"

linfa-datasets = { path = "datasets", features = ["winequality", "iris", "diabetes"] }

Expand Down
3 changes: 1 addition & 2 deletions algorithms/linfa-clustering/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,13 @@ features = ["std", "derive"]
[dependencies]
ndarray = { version = "0.14", features = ["rayon", "approx"]}
ndarray-linalg = "0.13"
lax = "0.1"
ndarray-rand = "0.13"
ndarray-stats = "0.4"
num-traits = "0.2"
rand_isaac = "0.3"
partitions = "0.2.4"

linfa = { version = "0.3.1", path = "../.." }
partitions = "0.2.4"

[dev-dependencies]
ndarray-npy = { version = "0.7", default-features = false }
Expand Down
41 changes: 22 additions & 19 deletions algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ impl<F: Float> Clone for GaussianMixtureModel<F> {
}
}

impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
impl<F: Float> GaussianMixtureModel<F> {
fn new<D: Data<Elem = F>, R: Rng + SeedableRng + Clone, T>(
hyperparameters: &GmmHyperParams<F, R>,
dataset: &DatasetBase<ArrayBase<D, Ix2>, T>,
Expand All @@ -138,7 +138,7 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
.fit(&dataset)?;
let mut resp = Array::<F, Ix2>::zeros((n_samples, hyperparameters.n_clusters()));
for (k, idx) in model.predict(dataset.records()).iter().enumerate() {
resp[[k, *idx]] = F::from(1.).unwrap();
resp[[k, *idx]] = F::cast(1.);
}
resp
}
Expand All @@ -150,7 +150,7 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
);
let totals = &resp.sum_axis(Axis(1)).insert_axis(Axis(0));
resp = (resp.reversed_axes() / totals).reversed_axes();
resp.mapv(|v| F::from(v).unwrap())
resp.mapv(|v| F::cast(v))
}
};

Expand All @@ -162,7 +162,7 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
hyperparameters.covariance_type(),
hyperparameters.reg_covariance(),
)?;
weights /= F::from(n_samples).unwrap();
weights /= F::cast(n_samples);

// GmmCovarType = full
let precisions_chol = Self::compute_precisions_cholesky_full(&covariances)?;
Expand All @@ -179,7 +179,7 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
}
}

impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
impl<F: Float> GaussianMixtureModel<F> {
pub fn params(n_clusters: usize) -> GmmHyperParams<F, Isaac64Rng> {
GmmHyperParams::new(n_clusters)
}
Expand Down Expand Up @@ -211,7 +211,7 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
reg_covar: F,
) -> Result<(Array1<F>, Array2<F>, Array3<F>)> {
let nk = resp.sum_axis(Axis(0));
if nk.min().unwrap() < &(F::from(10.).unwrap() * F::epsilon()) {
if nk.min().unwrap() < &(F::cast(10.) * F::epsilon()) {
return Err(GmmError::EmptyCluster(format!(
"Cluster #{} has no more point. Consider decreasing number of clusters or change initialization.",
nk.argmin().unwrap() + 1
Expand Down Expand Up @@ -253,9 +253,12 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
let n_features = covariances.shape()[1];
let mut precisions_chol = Array::zeros((n_clusters, n_features, n_features));
for (k, covariance) in covariances.outer_iter().enumerate() {
let cov_chol = covariance.cholesky(UPLO::Lower)?;
let sol =
cov_chol.solve_triangular(UPLO::Lower, Diag::NonUnit, &Array::eye(n_features))?;
let sol = F::map_cauchy_bound_view(covariance, |x| {
let decomp = x.cholesky(UPLO::Lower).unwrap();

decomp.solve_triangular(UPLO::Lower, Diag::NonUnit, &Array::eye(n_features)).unwrap()
});

precisions_chol.slice_mut(s![k, .., ..]).assign(&sol.t());
}
Ok(precisions_chol)
Expand Down Expand Up @@ -296,12 +299,12 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
let n_samples = observations.nrows();
let (weights, means, covariances) = Self::estimate_gaussian_parameters(
&observations,
&log_resp.mapv(|v| Scalar::exp(v)),
&log_resp.mapv(|x| x.exp()),
&self.covar_type,
reg_covar,
)?;
self.means = means;
self.weights = weights / F::from(n_samples).unwrap();
self.weights = weights / F::cast(n_samples);
// GmmCovarType = Full()
self.precisions_chol = Self::compute_precisions_cholesky_full(&covariances)?;
Ok(())
Expand All @@ -325,9 +328,9 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
) -> (Array1<F>, Array2<F>) {
let weighted_log_prob = self.estimate_weighted_log_prob(&observations);
let log_prob_norm = weighted_log_prob
.mapv(|v| Scalar::exp(v))
.mapv(|x| x.exp())
.sum_axis(Axis(1))
.mapv(|v| Scalar::ln(v));
.mapv(|x| x.ln());
let log_resp = weighted_log_prob - log_prob_norm.to_owned().insert_axis(Axis(1));
(log_prob_norm, log_resp)
}
Expand Down Expand Up @@ -368,8 +371,8 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
.assign(&diff.mapv(|v| v * v).sum_axis(Axis(1)))
});
log_prob.mapv(|v| {
F::from(-0.5).unwrap()
* (v + F::from(n_features as f64 * f64::ln(2. * std::f64::consts::PI)).unwrap())
F::cast(-0.5)
* (v + F::cast(n_features as f64 * f64::ln(2. * std::f64::consts::PI)))
}) + log_det
}

Expand All @@ -384,16 +387,16 @@ impl<F: Float + Lapack + Scalar> GaussianMixtureModel<F> {
.unwrap()
.slice(s![.., ..; n_features+1])
.to_owned()
.mapv(|v| Scalar::ln(v));
.mapv(|x| x.ln());
log_diags.sum_axis(Axis(1))
}

fn estimate_log_weights(&self) -> Array1<F> {
self.weights().mapv(|v| Scalar::ln(v))
self.weights().mapv(|x| x.ln())
}
}

impl<'a, F: Float + Lapack + Scalar, R: Rng + SeedableRng + Clone, D: Data<Elem = F>, T>
impl<'a, F: Float, R: Rng + SeedableRng + Clone, D: Data<Elem = F>, T>
Fit<'a, ArrayBase<D, Ix2>, T> for GmmHyperParams<F, R>
{
type Object = Result<GaussianMixtureModel<F>>;
Expand All @@ -420,7 +423,7 @@ impl<'a, F: Float + Lapack + Scalar, R: Rng + SeedableRng + Clone, D: Data<Elem
lower_bound =
GaussianMixtureModel::<F>::compute_lower_bound(&log_resp, log_prob_norm);
let change = lower_bound - prev_lower_bound;
if ndarray_rand::rand_distr::num_traits::Float::abs(change) < self.tolerance() {
if change.abs() < self.tolerance() {
converged_iter = Some(n_iter);
break;
}
Expand Down
6 changes: 4 additions & 2 deletions src/correlation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@ fn pearson_correlation<F: Float, D: Data<Elem = F>>(data: &ArrayBase<D, Ix2>) ->

// center distribution by subtracting mean
let mean = data.mean_axis(Axis(0)).unwrap();
//let std_deviation = mean.clone();
let denoised = data - &mean.insert_axis(Axis(1)).t();

// calculate the covariance matrix
let covariance = denoised.t().dot(&denoised) / F::from(nobservations - 1).unwrap();
let covariance = denoised.t().dot(&denoised) / F::cast(nobservations-1);
// calculate the standard deviation vector
let std_deviation = denoised.var_axis(Axis(0), F::one()).mapv(|x| x.sqrt());
//let std_deviation: Array1<F> = NdArrayCauchy::from_cauchy(std_deviation);

// we will only save the upper triangular matrix as the diagonal is one and
// the lower triangular is a mirror of the upper triangular part
Expand Down Expand Up @@ -98,7 +100,7 @@ fn p_values<F: Float, D: Data<Elem = F>>(
}

// divide by the number of iterations to re-scale range
p_values / F::from(num_iter).unwrap()
p_values / F::cast(num_iter)
}

/// Pearson Correlation Coefficients (or Bivariate Coefficients)
Expand Down
122 changes: 116 additions & 6 deletions src/dataset/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
//! functionality.
use ndarray::{
Array1, Array2, ArrayBase, ArrayView, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2,
Axis, CowArray, Ix2, Ix3, NdFloat, OwnedRepr,
Axis, CowArray, Ix2, Ix3, OwnedRepr, ScalarOperand, Dimension, ViewRepr,
};
use ndarray_linalg::{Scalar, Lapack};
use num_traits::{AsPrimitive, FromPrimitive, NumAssignOps, Signed};
use rand::distributions::uniform::SampleUniform;

use std::fmt;
use std::cmp::{Ordering, PartialOrd};
use std::collections::{HashMap, HashSet};
use std::hash::Hash;
Expand All @@ -31,10 +33,15 @@ pub mod multi_target_model;
/// implement them for 32bit and 64bit floating points. They are used in records of a dataset and, for
/// regression task, in the targets as well.
pub trait Float:
NdFloat
+ FromPrimitive
+ Signed
FromPrimitive
+ num_traits::Float
+ PartialOrd
+ Sync
+ Send
+ Default
+ fmt::Display
+ fmt::Debug
+ Signed
+ Sum
+ NumAssignOps
+ AsPrimitive<usize>
Expand All @@ -43,10 +50,113 @@ pub trait Float:
+ for<'a> SubAssign<&'a Self>
+ for<'a> DivAssign<&'a Self>
+ SampleUniform
+ ScalarOperand
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could add num_traits::MulAdd<Output = F> (see sparsemat/sprs#278 (comment)) to ease the upgrade to sprs 0.10?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes! Haven't seen the post

{
type F2: Scalar + Lapack + Float;

fn self_to_float(self) -> Self::F2;
fn float_to_self(x: Self::F2) -> Self;

fn ndarray_to_float<I: Dimension>(x: ArrayBase<OwnedRepr<Self>, I>) -> ArrayBase<OwnedRepr<Self::F2>, I>;

fn ndarray_to_self<I: Dimension>(x: ArrayBase<OwnedRepr<Self::F2>, I>) -> ArrayBase<OwnedRepr<Self>, I>;

fn ndarray_to_float_view<'a, I: Dimension>(x: ArrayBase<ViewRepr<&'a Self>, I>) -> ArrayBase<ViewRepr<&'a Self::F2>, I>;

fn ndarray_to_self_view<'a, I: Dimension>(x: ArrayBase<ViewRepr<&'a Self::F2>, I>) -> ArrayBase<ViewRepr<&'a Self>, I>;

fn cast<T: AsPrimitive<f64>>(x: T) -> Self {
Self::from(x.as_()).unwrap()
}

fn map_cauchy_bound<G, I, I2>(x: ArrayBase<OwnedRepr<Self>, I>, fnc: G) -> ArrayBase<OwnedRepr<Self>, I2>
where
G: Fn(ArrayBase<OwnedRepr<Self::F2>, I>) -> ArrayBase<OwnedRepr<Self::F2>, I2>,
I: Dimension,
I2: Dimension
{
let x = Self::ndarray_to_float(x);
let x = fnc(x);

Self::ndarray_to_self(x)
}

fn map_cauchy_bound_view<'a, G, I, I2>(x: ArrayBase<ViewRepr<&'a Self>, I>, fnc: G) -> ArrayBase<OwnedRepr<Self>, I2>
where
G: Fn(ArrayBase<ViewRepr<&'a Self::F2>, I>) -> ArrayBase<OwnedRepr<Self::F2>, I2>,
I: Dimension,
I2: Dimension
{
let x = Self::ndarray_to_float_view(x);
let x = fnc(x);

Self::ndarray_to_self(x)
}
}

impl Float for f32 {
type F2 = Self;

fn self_to_float(self) -> Self::F2 {
self
}

fn float_to_self(x: Self::F2) -> Self {
x
}

fn ndarray_to_float<I: Dimension>(x: ArrayBase<OwnedRepr<f32>, I>) -> ArrayBase<OwnedRepr<f32>, I>
{
x
}

fn ndarray_to_self<I: Dimension>(x: ArrayBase<OwnedRepr<f32>, I>) -> ArrayBase<OwnedRepr<f32>, I>
{
x
}

fn ndarray_to_float_view<'a, I: Dimension>(x: ArrayBase<ViewRepr<&'a f32>, I>) -> ArrayBase<ViewRepr<&'a f32>, I>
{
x
}

fn ndarray_to_self_view<'a, I: Dimension>(x: ArrayBase<ViewRepr<&'a f32>, I>) -> ArrayBase<ViewRepr<&'a f32>, I>
{
x
}
}

impl Float for f64 {
type F2 = Self;

fn self_to_float(self) -> Self::F2 {
self
}

fn float_to_self(x: Self::F2) -> Self {
x
}

fn ndarray_to_float<I: Dimension>(x: ArrayBase<OwnedRepr<f64>, I>) -> ArrayBase<OwnedRepr<f64>, I>
{
x
}

fn ndarray_to_self<I: Dimension>(x: ArrayBase<OwnedRepr<f64>, I>) -> ArrayBase<OwnedRepr<f64>, I>
{
x
}

fn ndarray_to_float_view<'a, I: Dimension>(x: ArrayBase<ViewRepr<&'a f64>, I>) -> ArrayBase<ViewRepr<&'a f64>, I>
{
x
}

fn ndarray_to_self_view<'a, I: Dimension>(x: ArrayBase<ViewRepr<&'a f64>, I>) -> ArrayBase<ViewRepr<&'a f64>, I>
{
x
}
}
impl Float for f32 {}
impl Float for f64 {}

/// Discrete labels
///
Expand Down
6 changes: 3 additions & 3 deletions src/metrics_clustering.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ impl<F: Float> DistanceCount<F> {

/// Divides the total distance from the sample to this cluster by the number of samples in the cluster
pub fn mean_distance(&self) -> F {
self.total_distance / F::from(self.count).unwrap()
self.total_distance / F::cast(self.count)
}

/// To be used in the cluster in which the sample is located. The distance from the sample to itself
Expand All @@ -53,7 +53,7 @@ impl<F: Float> DistanceCount<F> {
if self.count == 1 {
return F::zero();
}
self.total_distance / F::from(self.count - 1).unwrap()
self.total_distance / F::cast(self.count - 1)
}

/// adds the distance of `other_sample` from `eval_sample` to the total distance of `eval_sample` from the current cluster
Expand Down Expand Up @@ -135,7 +135,7 @@ impl<'a, F: Float, L: 'a + Label, D: Data<Elem = F>, T: AsTargets<Elem = L> + La
}
})
.sum::<F>();
let score = score / F::from(self.records().nsamples()).unwrap();
let score = score / F::cast(self.records().nsamples());
Ok(score)
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/metrics_regression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ pub trait SingleTargetRegression<F: Float, T: AsTargets<Elem = F>>: AsTargets<El
abs_error.sort_by(|a, b| a.partial_cmp(&b).unwrap());
let mid = abs_error.len() / 2;
if abs_error.len() % 2 == 0 {
Ok((abs_error[mid - 1] + abs_error[mid]) / F::from(2.0).unwrap())
Ok((abs_error[mid - 1] + abs_error[mid]) / F::cast(2.0))
} else {
Ok(abs_error[mid])
}
Expand All @@ -98,7 +98,7 @@ pub trait SingleTargetRegression<F: Float, T: AsTargets<Elem = F>>: AsTargets<El
/ (single_target_compare_to
.mapv(|x| (x - mean) * (x - mean))
.sum()
+ F::from(1e-10).unwrap()))
+ F::cast(1e-10)))
}

/// Same as R-Squared but with biased variance
Expand All @@ -116,7 +116,7 @@ pub trait SingleTargetRegression<F: Float, T: AsTargets<Elem = F>>: AsTargets<El
/ (single_target_compare_to
.mapv(|x| (x - mean) * (x - mean))
.sum()
+ F::from(1e-10).unwrap()))
+ F::cast(1e-10)))
}
}

Expand Down