Skip to content

Commit

Permalink
[linfa-svm] Fix SVR nu parameter passing and rework SVR parameterizat…
Browse files Browse the repository at this point in the history
…ion API (#370)

* New API for SVR parameterization

* Fix nu parameter passing

* Add SVR example

* Upload code coverage on pull request only

* Bump linfa-svm version to 0.7.2

* Try to compute code coverage only on PR on master

* Add SVR test with polynomial kernel

* Fix deprecated functions (as it should have been wired)

* Test rewired deprecated API
  • Loading branch information
relf authored Jan 29, 2025
1 parent 936680a commit a30e5f1
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 54 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/codequality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ jobs:
run: cargo clippy --all-targets -- -D warnings

coverage:
needs: codequality
name: coverage
runs-on: ubuntu-latest
if: github.event.pull_request.draft == false
if: github.event.pull_request.draft == false && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master')

steps:
- name: Checkout sources
Expand Down Expand Up @@ -65,4 +66,4 @@ jobs:
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true
verbose: true

7 changes: 5 additions & 2 deletions algorithms/linfa-svm/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "linfa-svm"
version = "0.7.1"
version = "0.7.2"
edition = "2018"
authors = ["Lorenz Schmidt <[email protected]>"]
description = "Support Vector Machines"
Expand Down Expand Up @@ -33,6 +33,9 @@ linfa = { version = "0.7.1", path = "../.." }
linfa-kernel = { version = "0.7.1", path = "../linfa-kernel" }

[dev-dependencies]
linfa-datasets = { version = "0.7.1", path = "../../datasets", features = ["winequality", "diabetes"] }
linfa-datasets = { version = "0.7.1", path = "../../datasets", features = [
"winequality",
"diabetes",
] }
rand_xoshiro = "0.6"
approx = "0.4"
40 changes: 40 additions & 0 deletions algorithms/linfa-svm/examples/noisy_sin_svr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use linfa::prelude::*;
use linfa_svm::{error::Result, Svm};
use ndarray::Array1;
use ndarray_rand::{
rand::{Rng, SeedableRng},
rand_distr::Uniform,
};
use rand_xoshiro::Xoshiro256Plus;

/// Example inspired by https://scikit-learn.org/stable/auto_examples/svm/plot_svm_regression.html
fn main() -> Result<()> {
let mut rng = Xoshiro256Plus::seed_from_u64(42);
let range = Uniform::new(0., 5.);
let mut x: Vec<f64> = (0..40).map(|_| rng.sample(range)).collect();
x.sort_by(|a, b| a.partial_cmp(b).unwrap());
let x = Array1::from_vec(x);

let mut y = x.mapv(|v| v.sin());

// add some noise
y.iter_mut()
.enumerate()
.filter(|(i, _)| i % 5 == 0)
.for_each(|(_, y)| *y = 3. * (0.5 - rng.gen::<f64>()));

let x = x.into_shape((40, 1)).unwrap();
let dataset = DatasetBase::new(x, y);
let model = Svm::params()
.c_svr(100., Some(0.1))
.gaussian_kernel(10.)
.fit(&dataset)?;

println!("{}", model);

let predicted = model.predict(&dataset);
let err = predicted.mean_squared_error(&dataset).unwrap();
println!("err={}", err);

Ok(())
}
32 changes: 26 additions & 6 deletions algorithms/linfa-svm/src/hyperparams.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ impl<F: Float, T> SvmParams<F, T> {
}

/// Sets the model to use the Polynomial kernel. For this kernel the
/// distance between two points is computed as: `d(x, x') = (<x, x'> + costant)^(degree)`
/// distance between two points is computed as: `d(x, x') = (<x, x'> + constant)^(degree)`
pub fn polynomial_kernel(mut self, constant: F, degree: F) -> Self {
self.0.kernel = Kernel::params().method(KernelMethod::Polynomial(constant, degree));
self
Expand Down Expand Up @@ -168,16 +168,36 @@ impl<F: Float, T> SvmParams<F, T> {
}

impl<F: Float> SvmParams<F, F> {
/// Set the C value for regression
/// Set the C value for regression and solver epsilon stopping condition.
/// Loss epsilon value is fixed at 0.1.
#[deprecated(since = "0.7.2", note = "Use .c_svr() and .eps()")]
pub fn c_eps(mut self, c: F, eps: F) -> Self {
self.0.c = Some((c, eps));
self.0.c = Some((c, F::cast(0.1)));
self.0.nu = None;
self.0.solver_params.eps = eps;
self
}

/// Set the Nu-Eps value for regression
/// Set the Nu value for regression and solver epsilon stopping condition.
/// C value used value is fixed at 1.0.
#[deprecated(since = "0.7.2", note = "Use .nu_svr() and .eps()")]
pub fn nu_eps(mut self, nu: F, eps: F) -> Self {
self.0.nu = Some((nu, eps));
self.0.nu = Some((nu, F::one()));
self.0.c = None;
self.0.solver_params.eps = eps;
self
}

/// Set the C value and optionnaly an epsilon value used in loss function (default 0.1) for regression
pub fn c_svr(mut self, c: F, loss_eps: Option<F>) -> Self {
self.0.c = Some((c, loss_eps.unwrap_or(F::cast(0.1))));
self.0.nu = None;
self
}

/// Set the Nu and optionally a C value (default 1.) for regression
pub fn nu_svr(mut self, nu: F, c: Option<F>) -> Self {
self.0.nu = Some((nu, c.unwrap_or(F::one())));
self.0.c = None;
self
}
Expand Down Expand Up @@ -219,7 +239,7 @@ impl<F: Float, L> ParamGuard for SvmParams<F, L> {
}
}
if let Some((nu, _)) = self.0.nu {
if nu <= F::zero() {
if nu <= F::zero() || nu > F::one() {
return Err(SvmError::InvalidNu(nu.to_f32().unwrap()));
}
}
Expand Down
110 changes: 66 additions & 44 deletions algorithms/linfa-svm/src/regression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ pub fn fit_nu<F: Float>(
dataset: ArrayView2<F>,
kernel: Kernel<F>,
target: &[F],
c: F,
nu: F,
c: F,
) -> Svm<F, F> {
let mut alpha = vec![F::zero(); 2 * target.len()];
let mut linear_term = vec![F::zero(); 2 * target.len()];
Expand Down Expand Up @@ -128,21 +128,21 @@ macro_rules! impl_regression {
let target = target.as_slice().unwrap();

let ret = match (self.c(), self.nu()) {
(Some((c, eps)), _) => fit_epsilon(
(Some((c, p)), _) => fit_epsilon(
self.solver_params().clone(),
dataset.records().view(),
kernel,
target,
c,
eps,
p,
),
(None, Some((nu, eps))) => fit_nu(
(None, Some((nu, c))) => fit_nu(
self.solver_params().clone(),
dataset.records().view(),
kernel,
target,
nu,
eps,
c,
),
_ => panic!("Set either C value or Nu value"),
};
Expand Down Expand Up @@ -206,73 +206,95 @@ pub mod tests {
use linfa::dataset::Dataset;
use linfa::metrics::SingleTargetRegression;
use linfa::traits::{Fit, Predict};
use ndarray::Array;

#[test]
fn test_linear_epsilon_regression() -> Result<()> {
let target = Array::linspace(0f64, 10., 100);
let mut sin_curve = Array::zeros((100, 1));
for (i, val) in target.iter().enumerate() {
sin_curve[(i, 0)] = *val;
}

let dataset = Dataset::new(sin_curve, target);

let model = Svm::params()
.nu_eps(2., 0.01)
.gaussian_kernel(50.)
.fit(&dataset)?;
use linfa::DatasetBase;
use ndarray::{Array, Array1, Array2};

fn _check_model(model: Svm<f64, f64>, dataset: &DatasetBase<Array2<f64>, Array1<f64>>) {
println!("{}", model);

let predicted = model.predict(dataset.records());
let err = predicted.mean_squared_error(&dataset).unwrap();
println!("err={}", err);
assert!(predicted.mean_squared_error(&dataset).unwrap() < 1e-2);

Ok(())
}

#[test]
fn test_linear_nu_regression() -> Result<()> {
let target = Array::linspace(0f64, 10., 100);
let mut sin_curve = Array::zeros((100, 1));
for (i, val) in target.iter().enumerate() {
sin_curve[(i, 0)] = *val;
}

let dataset = Dataset::new(sin_curve, target);
fn test_epsilon_regression_linear() -> Result<()> {
// simple 2d straight line
let targets = Array::linspace(0f64, 10., 100);
let records = targets.clone().into_shape((100, 1)).unwrap();
let dataset = Dataset::new(records, targets);

let model = Svm::params()
.nu_eps(2., 0.01)
.gaussian_kernel(50.)
.c_svr(5., None)
.linear_kernel()
.fit(&dataset)?;
_check_model(model, &dataset);

println!("{}", model);

let predicted = model.predict(&dataset);
assert!(predicted.mean_squared_error(&dataset).unwrap() < 1e-2);
// Old API
#[allow(deprecated)]
let model2 = Svm::params()
.c_eps(5., 1e-3)
.linear_kernel()
.fit(&dataset)?;
_check_model(model2, &dataset);

Ok(())
}

#[test]
fn test_regression_linear_kernel() -> Result<()> {
fn test_nu_regression_linear() -> Result<()> {
// simple 2d straight line
let targets = Array::linspace(0f64, 10., 100);
let records = targets.clone().into_shape((100, 1)).unwrap();

let dataset = Dataset::new(records, targets);

// Test the precomputed dot product in the linear kernel case
let model = Svm::params()
.nu_eps(2., 0.01)
.nu_svr(0.5, Some(1.))
.linear_kernel()
.fit(&dataset)?;
_check_model(model, &dataset);

println!("{}", model);
// Old API
#[allow(deprecated)]
let model2 = Svm::params()
.nu_eps(0.5, 1e-3)
.linear_kernel()
.fit(&dataset)?;
_check_model(model2, &dataset);
Ok(())
}

let predicted = model.predict(&dataset);
assert!(predicted.mean_squared_error(&dataset).unwrap() < 1e-2);
#[test]
fn test_epsilon_regression_gaussian() -> Result<()> {
let records = Array::linspace(0f64, 10., 100)
.into_shape((100, 1))
.unwrap();
let sin_curve = records.mapv(|v| v.sin()).into_shape((100,)).unwrap();
let dataset = Dataset::new(records, sin_curve);

let model = Svm::params()
.c_svr(100., Some(0.1))
.gaussian_kernel(10.)
.eps(1e-3)
.fit(&dataset)?;
_check_model(model, &dataset);
Ok(())
}

#[test]
fn test_nu_regression_polynomial() -> Result<()> {
let n = 100;
let records = Array::linspace(0f64, 5., n).into_shape((n, 1)).unwrap();
let sin_curve = records.mapv(|v| v.sin()).into_shape((n,)).unwrap();
let dataset = Dataset::new(records, sin_curve);

let model = Svm::params()
.nu_svr(0.01, None)
.polynomial_kernel(1., 3.)
.eps(1e-3)
.fit(&dataset)?;
_check_model(model, &dataset);
Ok(())
}
}

0 comments on commit a30e5f1

Please sign in to comment.