Skip to content

Commit

Permalink
Add theta tuning interface for SGP
Browse files Browse the repository at this point in the history
  • Loading branch information
relf committed Jan 29, 2024
1 parent 54bb51c commit 4ec0e22
Show file tree
Hide file tree
Showing 10 changed files with 319 additions and 146 deletions.
2 changes: 1 addition & 1 deletion gp/benches/gp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ fn criterion_gp(c: &mut Criterion) {
SquaredExponentialCorr::default(),
)
.kpls_dim(Some(1))
.initial_theta(Some(vec![1.0]))
.theta_guess(vec![1.0])
.fit(&Dataset::new(xt.to_owned(), yt.to_owned()))
.expect("GP fit error"),
)
Expand Down
70 changes: 45 additions & 25 deletions gp/src/algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ use ndarray::{arr1, s, Array, Array1, Array2, ArrayBase, Axis, Data, Ix1, Ix2, Z
use ndarray_einsum_beta::*;
#[cfg(feature = "blas")]
use ndarray_linalg::{cholesky::*, eigh::*, qr::*, svd::*, triangular::*};
use ndarray_rand::rand::SeedableRng;
use ndarray_rand::rand::{Rng, SeedableRng};
use ndarray_stats::QuantileExt;

use rand_xoshiro::Xoshiro256Plus;
#[cfg(feature = "serializable")]
use serde::{Deserialize, Serialize};
use std::fmt;

use ndarray_rand::rand_distr::{Normal, Uniform};
use ndarray_rand::rand_distr::Normal;
use ndarray_rand::RandomExt;

// const LOG10_20: f64 = 1.301_029_995_663_981_3; //f64::log10(20.);
Expand Down Expand Up @@ -751,7 +751,7 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
let y = dataset.targets();
if let Some(d) = self.kpls_dim() {
if *d > x.ncols() {
return Err(GpError::InvalidValue(format!(
return Err(GpError::InvalidValueError(format!(
"Dimension reduction {} should be smaller than actual \
training input dimensions {}",
d,
Expand Down Expand Up @@ -786,12 +786,17 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
"Warning: multiple x input features have the same value (at least same row twice)."
);
}
let theta0 = self
.initial_theta()
.clone()
.map_or(Array1::from_elem(w_star.ncols(), F::cast(1e-2)), |v| {
Array::from_vec(v)
});

// Initial guess for theta
let theta0_dim = self.theta_tuning().theta0().len();
let theta0 = if theta0_dim == 1 {
Array1::from_elem(w_star.ncols(), self.theta_tuning().theta0()[0])
} else if theta0_dim == w_star.ncols() {
Array::from_vec(self.theta_tuning().theta0().to_vec())
} else {
panic!("Initial guess for theta should be either 1-dim or dim of xtrain (w_star.ncols()), got {}", theta0_dim)
};

let fx = self.mean().value(&xtrain.data);
let base: f64 = 10.;
let objfn = |x: &[f64], _gradient: Option<&mut [f64]>, _params: &mut ()| -> f64 {
Expand All @@ -815,7 +820,20 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem

// Multistart: user theta0 + 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1., 10.
// let bounds = vec![(F::cast(-6.), F::cast(2.)); theta0.len()];
let (theta0s, bounds) = prepare_multistart(self.n_start(), &theta0);
let bounds_dim = self.theta_tuning().bounds().len();
let bounds = if bounds_dim == 1 {
vec![self.theta_tuning().bounds()[0]; w_star.ncols()]
} else if theta0_dim == w_star.ncols() {
self.theta_tuning().bounds().to_vec()
} else {
panic!(
"Bounds for theta should be either 1-dim or dim of xtrain ({}), got {}",
w_star.ncols(),
theta0_dim
)
};

let (theta0s, bounds) = prepare_multistart(self.n_start(), &theta0, &bounds);

let opt_thetas = theta0s.map_axis(Axis(1), |theta| {
optimize_params(objfn, &theta.to_owned(), &bounds)
Expand All @@ -840,11 +858,13 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
pub(crate) fn prepare_multistart<F: Float>(
n_start: usize,
theta0: &Array1<F>,
bounds: &[(F, F)],
) -> (Array2<F>, Vec<(F, F)>) {
let limits = (F::cast(-6.), F::cast(2.));
// let mut bounds = vec![(F::cast(1e-16).log10(), F::cast(1.).log10()); params.ncols()];
//let limits = (F::cast(-16), F::cast(0.));
let bounds = vec![limits; theta0.len()];
// Use log10 theta as optimization parameter
let bounds: Vec<(F, F)> = bounds
.iter()
.map(|(lo, up)| (lo.log10(), up.log10()))
.collect();

// Multistart: user theta0 + 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1., 10.
let mut theta0s = Array2::zeros((n_start + 1, theta0.len()));
Expand All @@ -854,17 +874,17 @@ pub(crate) fn prepare_multistart<F: Float>(
std::cmp::Ordering::Equal => {
//let mut rng = Xoshiro256Plus::seed_from_u64(42);
let mut rng = Xoshiro256Plus::from_entropy();
theta0s.row_mut(1).assign(&Array::random_using(
theta0.len(),
Uniform::new(limits.0, limits.1),
&mut rng,
))
let vals = bounds.iter().map(|(a, b)| rng.gen_range(*a..*b)).collect();
theta0s.row_mut(1).assign(&Array::from_vec(vals))
}
std::cmp::Ordering::Greater => {
let mut xlimits: Array2<F> = Array2::zeros((theta0.len(), 2));
for mut row in xlimits.rows_mut() {
row.assign(&arr1(&[limits.0, limits.1]));
}
let mut xlimits: Array2<F> = Array2::zeros((bounds.len(), 2));
// for mut row in xlimits.rows_mut() {
// row.assign(&arr1(&[limits.0, limits.1]));
// }
Zip::from(xlimits.rows_mut())
.and(&bounds)
.for_each(|mut row, limits| row.assign(&arr1(&[limits.0, limits.1])));
// Use a seed here for reproducibility. Do we need to make it truly random
// Probably no, as it is just to get init values spread over
// [1e-6, 20] for multistart thanks to LHS method.
Expand Down Expand Up @@ -1187,7 +1207,7 @@ mod tests {
ConstantMean::default(),
SquaredExponentialCorr::default(),
)
.initial_theta(Some(vec![0.1]))
.theta_guess(vec![0.1])
.kpls_dim(Some(1))
.fit(&Dataset::new(xt, yt))
.expect("GP fit error");
Expand All @@ -1210,7 +1230,7 @@ mod tests {
[<$regr Mean>]::default(),
[<$corr Corr>]::default(),
)
.initial_theta(Some(vec![0.1]))
.theta_guess(vec![0.1])
.fit(&Dataset::new(xt, yt))
.expect("GP fit error");
let yvals = gp
Expand Down
5 changes: 1 addition & 4 deletions gp/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ pub enum GpError {
/// When PLS fails
#[error("PLS error: {0}")]
PlsError(#[from] linfa_pls::PlsError),
/// When a value is invalid
#[error("PLS error: {0}")]
InvalidValue(String),
/// When a linfa error occurs
#[error(transparent)]
LinfaError(#[from] linfa::error::Error),
Expand All @@ -36,7 +33,7 @@ pub enum GpError {
/// When error during loading
#[error("Load error: {0}")]
LoadError(String),
/// When error during loading
/// When error dur to a bad value
#[error("InvalidValue error: {0}")]
InvalidValueError(String),
}
130 changes: 107 additions & 23 deletions gp/src/parameters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,71 @@ use crate::correlation_models::{CorrelationModel, SquaredExponentialCorr};
use crate::errors::{GpError, Result};
use crate::mean_models::{ConstantMean, RegressionModel};
use linfa::{Float, ParamGuard};
use std::convert::TryFrom;

#[cfg(feature = "serializable")]
use serde::{Deserialize, Serialize};

/// A structure to represent a n-dim parameter estimation
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serializable", derive(Serialize, Deserialize))]
pub struct ParamTuning<F: Float> {
pub guess: Vec<F>,
pub bounds: Vec<(F, F)>,
}

impl<F: Float> TryFrom<ParamTuning<F>> for ThetaTuning<F> {
type Error = GpError;
fn try_from(pt: ParamTuning<F>) -> Result<ThetaTuning<F>> {
if pt.guess.len() != pt.bounds.len() && (pt.guess.len() != 1 || pt.bounds.len() != 1) {
return Err(GpError::InvalidValueError(
"Bad theta tuning specification".to_string(),
));
}
// TODO: check if guess in bounds
Ok(ThetaTuning(pt))
}
}

/// As structure for theta hyperparameters guess
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serializable", derive(Serialize, Deserialize))]

pub struct ThetaTuning<F: Float>(ParamTuning<F>);
impl<F: Float> Default for ThetaTuning<F> {
fn default() -> ThetaTuning<F> {
ThetaTuning(ParamTuning {
guess: vec![F::cast(0.01)],
bounds: vec![(F::cast(1e-6), F::cast(1e2))],
})
}
}

impl<F: Float> From<ThetaTuning<F>> for ParamTuning<F> {
fn from(tt: ThetaTuning<F>) -> ParamTuning<F> {
ParamTuning {
guess: tt.0.guess,
bounds: tt.0.bounds,
}
}
}

impl<F: Float> ThetaTuning<F> {
pub fn theta0(&self) -> &[F] {
&self.0.guess
}
pub fn bounds(&self) -> &[(F, F)] {
&self.0.bounds
}
}

/// A set of validated GP parameters.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct GpValidParams<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> {
/// Parameter of the autocorrelation model
pub(crate) theta: Option<Vec<F>>,
/// Parameter guess of the autocorrelation model
pub(crate) theta_tuning: ThetaTuning<F>,
/// Regression model representing the mean(x)
pub(crate) mean: Mean,
/// Correlation model representing the spatial correlation between errors at e(x) and e(x')
Expand All @@ -24,6 +83,7 @@ impl<F: Float> Default for GpValidParams<F, ConstantMean, SquaredExponentialCorr
fn default() -> GpValidParams<F, ConstantMean, SquaredExponentialCorr> {
GpValidParams {
theta: None,
theta_tuning: ThetaTuning::default(),
mean: ConstantMean(),
corr: SquaredExponentialCorr(),
kpls_dim: None,
Expand All @@ -34,11 +94,6 @@ impl<F: Float> Default for GpValidParams<F, ConstantMean, SquaredExponentialCorr
}

impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> GpValidParams<F, Mean, Corr> {
/// Get starting theta value for optimization
pub fn initial_theta(&self) -> &Option<Vec<F>> {
&self.theta
}

/// Get mean model
pub fn mean(&self) -> &Mean {
&self.mean
Expand All @@ -49,6 +104,11 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> GpValidParam
&self.corr
}

/// Get starting theta value for optimization
pub fn theta_tuning(&self) -> &ThetaTuning<F> {
&self.theta_tuning
}

/// Get number of components used by PLS
pub fn kpls_dim(&self) -> &Option<usize> {
&self.kpls_dim
Expand Down Expand Up @@ -77,6 +137,7 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> GpParams<F,
pub fn new(mean: Mean, corr: Corr) -> GpParams<F, Mean, Corr> {
Self(GpValidParams {
theta: None,
theta_tuning: ThetaTuning::default(),
mean,
corr,
kpls_dim: None,
Expand All @@ -85,14 +146,6 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> GpParams<F,
})
}

/// Set initial value for theta hyper parameter.
///
/// During training process, the internal optimization is started from `initial_theta`.
pub fn initial_theta(mut self, theta: Option<Vec<F>>) -> Self {
self.0.theta = theta;
self
}

/// Set mean model.
pub fn mean(mut self, mean: Mean) -> Self {
self.0.mean = mean;
Expand All @@ -112,6 +165,36 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> GpParams<F,
self
}

/// Set initial value for theta hyper parameter.
///
/// During training process, the internal optimization is started from `theta_guess`.
pub fn theta_guess(mut self, theta_guess: Vec<F>) -> Self {
self.0.theta_tuning = ParamTuning {
guess: theta_guess,
..ThetaTuning::default().into()
}
.try_into()
.unwrap();
self
}

/// Set theta hyper parameter search space.
pub fn theta_bounds(mut self, theta_bounds: Vec<(F, F)>) -> Self {
self.0.theta_tuning = ParamTuning {
bounds: theta_bounds,
..ThetaTuning::default().into()
}
.try_into()
.unwrap();
self
}

/// Set theta hyper parameter tuning
pub fn theta_tuning(mut self, theta_tuning: ThetaTuning<F>) -> Self {
self.0.theta_tuning = theta_tuning;
self
}

/// Set the number of internal GP hyperparameter theta optimization restarts
pub fn n_start(mut self, n_start: usize) -> Self {
self.0.n_start = n_start;
Expand All @@ -136,18 +219,19 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> ParamGuard
fn check_ref(&self) -> Result<&Self::Checked> {
if let Some(d) = self.0.kpls_dim {
if d == 0 {
return Err(GpError::InvalidValue("`kpls_dim` canot be 0!".to_string()));
return Err(GpError::InvalidValueError(
"`kpls_dim` canot be 0!".to_string(),
));
}
if let Some(theta) = self.0.initial_theta() {
if theta.len() > 1 && d > theta.len() {
return Err(GpError::InvalidValue(format!(
"Dimension reduction ({}) should be smaller than expected
let theta = self.0.theta_tuning().theta0();
if theta.len() > 1 && d > theta.len() {
return Err(GpError::InvalidValueError(format!(
"Dimension reduction ({}) should be smaller than expected
training input size infered from given initial theta length ({})",
d,
theta.len()
)));
};
}
d,
theta.len()
)));
};
}
Ok(&self.0)
}
Expand Down
Loading

0 comments on commit 4ec0e22

Please sign in to comment.