From 2d1baf30612ea7e13cb5edfe5e83e7b6c57acdfa Mon Sep 17 00:00:00 2001 From: relf Date: Sun, 31 Dec 2023 13:17:41 +0100 Subject: [PATCH] WIP --- gp/src/lib.rs | 5 + gp/src/parameters.rs | 10 +- gp/src/sgp_algorithm.rs | 634 +++++++++++++++++++++++++++++++++++++++ gp/src/sgp_parameters.rs | 185 ++++++++++++ 4 files changed, 829 insertions(+), 5 deletions(-) create mode 100644 gp/src/sgp_algorithm.rs create mode 100644 gp/src/sgp_parameters.rs diff --git a/gp/src/lib.rs b/gp/src/lib.rs index c536a069..4368146e 100644 --- a/gp/src/lib.rs +++ b/gp/src/lib.rs @@ -88,10 +88,15 @@ mod algorithm; pub mod correlation_models; mod errors; pub mod mean_models; +mod sgp_algorithm; + mod parameters; +mod sgp_parameters; mod utils; pub use algorithm::*; pub use errors::*; pub use parameters::*; +pub use sgp_algorithm::*; +pub use sgp_parameters::*; pub use utils::NormalizedMatrix; diff --git a/gp/src/parameters.rs b/gp/src/parameters.rs index b1068bef..64e6578d 100644 --- a/gp/src/parameters.rs +++ b/gp/src/parameters.rs @@ -7,15 +7,15 @@ use linfa::{Float, ParamGuard}; #[derive(Clone, Debug, PartialEq, Eq)] pub struct GpValidParams, Corr: CorrelationModel> { /// Parameter of the autocorrelation model - theta: Option>, + pub(crate) theta: Option>, /// Regression model representing the mean(x) - mean: Mean, + pub(crate) mean: Mean, /// Correlation model representing the spatial correlation between errors at e(x) and e(x') - corr: Corr, + pub(crate) corr: Corr, /// Optionally apply dimension reduction (KPLS) or not - kpls_dim: Option, + pub(crate) kpls_dim: Option, /// Parameter to improve numerical stability - nugget: F, + pub(crate) nugget: F, } impl Default for GpValidParams { diff --git a/gp/src/sgp_algorithm.rs b/gp/src/sgp_algorithm.rs new file mode 100644 index 00000000..4dc36390 --- /dev/null +++ b/gp/src/sgp_algorithm.rs @@ -0,0 +1,634 @@ +use crate::correlation_models::*; +use crate::errors::{GpError, Result}; +use crate::mean_models::*; +use crate::sgp_parameters::{SgpParams, SgpValidParams, SparseMethod}; +use crate::utils::{pairwise_differences, NormalizedMatrix}; +use egobox_doe::{Lhs, SamplingMethod}; +use linfa::prelude::{Dataset, DatasetBase, Fit, Float, PredictInplace}; +use linfa_linalg::{cholesky::*, eigh::*, qr::*, svd::*, triangular::*}; +use linfa_pls::PlsRegression; +use ndarray::{ + arr1, concatenate, s, stack, Array, Array1, Array2, ArrayBase, Axis, Data, Ix2, Zip, +}; +use ndarray_einsum_beta::*; +use ndarray_stats::QuantileExt; + +use rand_xoshiro::rand_core::SeedableRng; +use rand_xoshiro::Xoshiro256Plus; +#[cfg(feature = "serializable")] +use serde::{Deserialize, Serialize}; +use std::fmt; + +// use ndarray_rand::rand_distr::Normal; +// use ndarray_rand::RandomExt; + +// const LOG10_20: f64 = 1.301_029_995_663_981_3; //f64::log10(20.); +const N_START: usize = 10; // number of optimization restart (aka multistart) + +/// Woodbury data computed during training and used for prediction +#[derive(Debug, Serialize, Deserialize)] +struct WoodburyData { + vec: Array1, + inv: Array2, +} +impl Clone for WoodburyData { + fn clone(&self) -> Self { + WoodburyData { + vec: self.vec.to_owned(), + inv: self.inv.clone(), + } + } +} + +/// Structure for trained Gaussian Process model +#[derive(Debug)] +#[cfg_attr( + feature = "serializable", + derive(Serialize, Deserialize), + serde(bound(serialize = "F: Serialize", deserialize = "F: Deserialize<'de>")) +)] +pub struct SparseGaussianProcess, Corr: CorrelationModel> { + /// Regression model + #[cfg_attr( + feature = "serializable", + serde(bound(serialize = "Mean: Serialize", deserialize = "Mean: Deserialize<'de>")) + )] + mean: Mean, + /// Correlation kernel + #[cfg_attr( + feature = "serializable", + serde(bound(serialize = "Corr: Serialize", deserialize = "Corr: Deserialize<'de>")) + )] + corr: Corr, + /// Sparse method used + method: SparseMethod, + /// Parameter of the autocorrelation model + theta: Array1, + /// Gaussian process variance + sigma2: F, + /// Gaussian noise variance + noise: F, + /// Weights in case of KPLS dimension reduction coming from PLS regression (orig_dim, kpls_dim) + w_star: Array2, + /// Training inputs + xtrain: Array2, + /// Training outputs + ytrain: Array2, + /// Data used for prediction + w_data: WoodburyData, +} + +/// Kriging as GP special case when using constant mean and squared exponential correlation +pub type SparseKriging = SgpParams; + +impl SparseKriging { + pub fn params() -> SgpParams { + SgpParams::new(ConstantMean(), SquaredExponentialCorr()) + } +} + +impl, Corr: CorrelationModel> Clone + for SparseGaussianProcess +{ + fn clone(&self) -> Self { + Self { + mean: self.mean, + corr: self.corr, + method: self.method.clone(), + theta: self.theta.to_owned(), + sigma2: self.sigma2, + noise: self.noise, + w_star: self.w_star.to_owned(), + xtrain: self.xtrain.clone(), + ytrain: self.xtrain.clone(), + w_data: self.w_data.clone(), + } + } +} + +impl, Corr: CorrelationModel> fmt::Display + for SparseGaussianProcess +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "SGP({}, {})", self.mean, self.corr) + } +} + +impl, Corr: CorrelationModel> + SparseGaussianProcess +{ + /// Gp parameters contructor + pub fn params, NewCorr: CorrelationModel>( + mean: NewMean, + corr: NewCorr, + ) -> SgpParams { + SgpParams::new(mean, corr) + } + + /// Predict output values at n given `x` points of nx components specified as a (n, nx) matrix. + /// Returns n scalar output values as (n, 1) column vector. + pub fn predict_values(&self, x: &ArrayBase, Ix2>) -> Result> { + todo!() + } + + /// Predict variance values at n given `x` points of nx components specified as a (n, nx) matrix. + /// Returns n variance values as (n, 1) column vector. + pub fn predict_variances(&self, x: &ArrayBase, Ix2>) -> Result> { + todo!() + } + + /// Retrieve number of PLS components 1 <= n <= x dimension + pub fn kpls_dim(&self) -> Option { + if self.w_star.ncols() < self.xtrain.ncols() { + Some(self.w_star.ncols()) + } else { + None + } + } + + /// Retrieve input dimension before kpls dimension reduction if any + pub fn input_dim(&self) -> usize { + self.ytrain.ncols() + } + + /// Retrieve output dimension + pub fn output_dim(&self) -> usize { + self.ytrain.ncols() + } +} + +impl PredictInplace, Array2> + for SparseGaussianProcess +where + F: Float, + D: Data, + Mean: RegressionModel, + Corr: CorrelationModel, +{ + fn predict_inplace(&self, x: &ArrayBase, y: &mut Array2) { + assert_eq!( + x.nrows(), + y.nrows(), + "The number of data points must match the number of output targets." + ); + + let values = self.predict_values(x).expect("GP Prediction"); + *y = values; + } + + fn default_target(&self, x: &ArrayBase) -> Array2 { + Array2::zeros((x.nrows(), self.output_dim())) + } +} + +/// Gausssian Process adaptator to implement `linfa::Predict` trait for variance prediction. +struct GpVariancePredictor<'a, F, Mean, Corr>(&'a SparseGaussianProcess) +where + F: Float, + Mean: RegressionModel, + Corr: CorrelationModel; + +impl<'a, F, D, Mean, Corr> PredictInplace, Array2> + for GpVariancePredictor<'a, F, Mean, Corr> +where + F: Float, + D: Data, + Mean: RegressionModel, + Corr: CorrelationModel, +{ + fn predict_inplace(&self, x: &ArrayBase, y: &mut Array2) { + assert_eq!( + x.nrows(), + y.nrows(), + "The number of data points must match the number of output targets." + ); + + let values = self.0.predict_variances(x).expect("GP Prediction"); + *y = values; + } + + fn default_target(&self, x: &ArrayBase) -> Array2 { + Array2::zeros((x.nrows(), self.0.output_dim())) + } +} + +impl, Corr: CorrelationModel, D: Data> + Fit, ArrayBase, GpError> for SgpValidParams +{ + type Object = SparseGaussianProcess; + + /// Fit GP parameters using maximum likelihood + fn fit( + &self, + dataset: &DatasetBase, ArrayBase>, + ) -> Result { + let x = dataset.records(); + let y = dataset.targets(); + if let Some(d) = self.kpls_dim() { + if *d > x.ncols() { + return Err(GpError::InvalidValue(format!( + "Dimension reduction {} should be smaller than actual \ + training input dimensions {}", + d, + x.ncols() + ))); + }; + } + + let xtrain = x.to_owned(); + let ytrain = y.to_owned(); + + let mut w_star = Array2::eye(x.ncols()); + if let Some(n_components) = self.kpls_dim() { + let ds = Dataset::new(x.to_owned(), y.to_owned()); + w_star = PlsRegression::params(*n_components).fit(&ds).map_or_else( + |e| match e { + linfa_pls::PlsError::PowerMethodConstantResidualError() => { + Ok(Array2::zeros((x.ncols(), *n_components))) + } + err => Err(err), + }, + |v| Ok(v.rotations().0.to_owned()), + )?; + }; + let theta0: Array1<_> = self + .initial_theta() + .clone() + .map_or(Array1::from_elem(w_star.ncols(), F::cast(1e-2)), |v| { + Array::from_vec(v) + }); + let sigma2_0 = F::cast(1e-2); + let noise_0 = F::cast(1e-2); + + let n = theta0.len() + 2; + let mut params_0 = Array1::zeros(n); + params_0.slice_mut(s![..n - 2]).assign(&theta0); + params_0[n - 2] = sigma2_0; + params_0[n - 1] = noise_0; + + let base: f64 = 10.; + let objfn = |x: &[f64], _gradient: Option<&mut [f64]>, _params: &mut ()| -> f64 { + for v in x.iter() { + // check theta as optimizer may give nan values + if v.is_nan() { + // shortcut return worst value wrt to rlf minimization + return f64::INFINITY; + } + } + let input = Array1::from_shape_vec( + (x.len(),), + x.iter().map(|v| F::cast(base.powf(*v))).collect(), + ) + .unwrap(); + + let theta = input.slice(s![..input.len() - 2]); + let sigma2 = input[input.len() - 2]; + let noise = input[input.len() - 1]; + + let theta = theta.mapv(F::cast); + match self.reduced_likelihood( + &theta, + sigma2, + noise, + &w_star, + &xtrain, + &ytrain, + self.nugget(), + ) { + Ok(r) => unsafe { -(*(&r.0 as *const F as *const f64)) }, + Err(_) => f64::INFINITY, + } + }; + + // Multistart: user theta0 + 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1., 10. + let mut params = Array2::zeros((N_START + 1, params_0.len())); + params.row_mut(0).assign(¶ms_0.mapv(|v| F::log10(v))); + let mut xlimits: Array2 = Array2::zeros((params_0.len(), 2)); + for mut row in xlimits.rows_mut() { + row.assign(&arr1(&[F::cast(-6), F::cast(2)])); + } + // Use a seed here for reproducibility. Do we need to make it truly random + // Probably no, as it is just to get init values spread over + // [1e-6, 20] for multistart thanks to LHS method. + let seeds = Lhs::new(&xlimits) + .kind(egobox_doe::LhsKind::Maximin) + .with_rng(Xoshiro256Plus::seed_from_u64(42)) + .sample(N_START); + Zip::from(params.slice_mut(s![1.., ..]).rows_mut()) + .and(seeds.rows()) + .par_for_each(|mut theta, row| theta.assign(&row)); + + let opt_params = params.map_axis(Axis(1), |p| optimize_theta(objfn, &p.to_owned())); + let opt_index = opt_params.map(|(_, opt_f)| opt_f).argmin().unwrap(); + let opt_params = &(opt_params[opt_index]).0.mapv(F::cast); + // println!("opt_theta={}", opt_theta); + let opt_theta = opt_params.slice(s![..n - 2]).to_owned(); + let opt_sigma2 = opt_params[n - 2]; + let opt_noise = opt_params[n - 2]; + let (_, w_data) = self.reduced_likelihood( + &opt_theta, + opt_sigma2, + opt_noise, + &w_star, + &xtrain, + &ytrain, + self.nugget(), + )?; + Ok(SparseGaussianProcess { + mean: *self.mean(), + corr: *self.corr(), + method: self.method(), + theta: opt_theta, + sigma2: opt_sigma2, + noise: opt_noise, + w_data, + w_star, + xtrain: xtrain.to_owned(), + ytrain: ytrain.to_owned(), + }) + } +} + +impl, Corr: CorrelationModel> SgpValidParams { + /// Constructor of valid params from values + #[doc(hidden)] + pub fn from( + mean: Mean, + corr: Corr, + method: SparseMethod, + theta: Array1, + sigma2: F, + noise: F, + w_star: Array2, + xtrain: Array2, + ytrain: Array2, + w_data: WoodburyData, + ) -> Result> { + // TODO: add some consistency checks + Ok(SparseGaussianProcess { + mean, + corr, + method, + theta, + sigma2, + noise, + w_star, + xtrain, + ytrain, + w_data, + }) + } + + /// Compute reduced likelihood function + /// nugget: factor to improve numerical stability + pub fn reduced_likelihood( + &self, + theta: &Array1, + sigma2: F, + noise: F, + w_star: &Array2, + xtrain: &Array2, + ytrain: &Array2, + nugget: F, + ) -> Result<(F, WoodburyData)> { + let (likelihood, w_data) = match self.method() { + SparseMethod::Fitc => self.fitc(theta, sigma2, noise, w_star, xtrain, ytrain, nugget), + SparseMethod::Vfe => self.vfe(theta, sigma2, noise, nugget), + }; + + Ok((likelihood, w_data)) + } + + /// Compute covariance matrix between a and b matrices + fn compute_k( + &self, + a: &ArrayBase, Ix2>, + b: &ArrayBase, Ix2>, + w_star: &Array2, + theta: &Array1, + sigma2: F, + ) -> Array2 { + // Get pairwise componentwise L1-distances to the input training set + let dx = pairwise_differences(a, b); + // Compute the correlation function + let r = self.corr().value(&dx, &theta, w_star); + r.into_shape((a.nrows(), b.nrows())) + .unwrap() + .mapv(|v| v * sigma2) + } + + /// FITC method + fn fitc( + &self, + theta: &Array1, + sigma2: F, + noise: F, + w_star: &Array2, + xtrain: &Array2, + ytrain: &Array2, + nugget: F, + ) -> (F, WoodburyData) { + let z = self.inducings(); + let nz = z.nrows(); + // Compute: diag(Knn), Kmm and Kmn + let knn = Array1::from_elem(xtrain.nrows(), sigma2); + let kmm = self.compute_k(&z, &z, w_star, theta, sigma2) + Array::eye(nz) * nugget; + let kmn = self.compute_k(&z, xtrain, w_star, theta, sigma2); + + // Compute (lower) Cholesky decomposition: Kmm = U U^T + let u = kmm.cholesky().unwrap(); + //U = linalg.cholesky(Kmm, lower=True) + + // Compute (upper) Cholesky decomposition: Qnn = V^T V + let ui = u + .solve_triangular(&Array::eye(u.nrows()), UPLO::Lower) + .unwrap(); + // Ui = linalg.inv(U) + let v = ui.dot(&kmm); + //V = Ui @ Kmn + + // Assumption on the gaussian noise on training outputs + let eta2 = noise; + + // Compute diagonal correction: nu = Knn_diag - Qnn_diag + \eta^2 + let nu = knn - (v * v).sum_axis(Axis(0)) + eta2; + //let nu = knn - np.sum(np.square(V), 0) + eta2; + // Compute beta, the effective noise precision + let beta = nu.mapv(|v| F::one() / v); + //beta = 1.0 / nu + + // Compute (lower) Cholesky decomposition: A = I + V diag(beta) V^T = L L^T + let a = Array::eye(nz) + &(v * beta.insert_axis(Axis(0))).dot(&v.t()); + // A = np.eye(self.nz) + V * beta @ V.T + + let l = a.cholesky().unwrap(); + // L = linalg.cholesky(A, lower=True) + let li = l + .solve_triangular(&Array::eye(l.nrows()), UPLO::Lower) + .unwrap(); + // Li = linalg.inv(L) + + // Compute a and b + let a = einsum("ij,j->ij", &[ytrain, &beta]) + .unwrap() + .into_dimensionality::() + .unwrap(); + // a = np.einsum("ij,i->ij", Y, beta) # avoid reshape for mat-vec multiplication + let tmp = li.dot(&v); + let b = tmp.dot(&a); + // b = Li @ V @ a + + // Compute marginal log-likelihood + // constant term ignored in reduced likelihood + //let term0 = self.ytrain.nrows() * F::cast(2. * std::f64::consts::PI); + let term1 = nu.mapv(|v| v.ln()).sum(); + let term2 = l.slice(s![.., ..; l.nrows()]).mapv(|v| v.ln()).sum(); + let term3 = (a.t().to_owned()).dot(ytrain)[[0, 0]]; + let term4 = einsum("ij,ij->", &[&b, &b]).unwrap()[[0, 0]]; + let likelihood = term1 + term2 + term3 + term4; + // likelihood = -0.5 * ( + // # num_data * np.log(2.0 * np.pi) # constant term ignored in reduced likelihood + // +np.sum(np.log(nu)) + // + 2.0 * np.sum(np.log(np.diag(L))) + // + a.T @ Y + // - np.einsum("ij,ij->", b, b) + // ) + + // Store Woodbury vectors for prediction step + // LiUi = Li @ Ui + // LiUiT = LiUi.T + // woodbury_vec = LiUiT @ b + // woodbury_inv = Ui.T @ Ui - LiUiT @ LiUi + let li_ui = li.dot(&ui); + let li_ui_t = li_ui.t(); + + let w_data = WoodburyData { + vec: li_ui.dot(&b), + inv: (ui.t()).dot(&ui) - li_ui_t.dot(li_ui), + }; + + (likelihood, w_data) + } + + fn vfe(&self, theta: &Array1, variance: F, noise: F, nugget: F) -> (F, WoodburyData) { + todo!() + } +} + +/// Optimize gp hyper parameter theta given an initial guess `theta0` +#[cfg(not(feature = "nlopt"))] +fn optimize_theta(objfn: ObjF, theta0: &Array1) -> (Array1, f64) +where + ObjF: Fn(&[f64], Option<&mut [f64]>, &mut ()) -> f64, + F: Float, +{ + use cobyla::{minimize, Func, StopTols}; + + let base: f64 = 10.; + let cons: Vec<&dyn Func<()>> = vec![]; + let theta_init = theta0 + .map(|v| unsafe { *(v as *const F as *const f64) }) + .into_raw_vec(); + + let initial_step = 0.5; + let ftol_rel = 1e-4; + let maxeval = 15 * theta0.len(); + let bounds = vec![(-6., 2.); theta0.len()]; + + match minimize( + |x, u| objfn(x, None, u), + &theta_init, + &bounds, + &cons, + (), + maxeval, + cobyla::RhoBeg::All(initial_step), + Some(StopTols { + ftol_rel, + ..StopTols::default() + }), + ) { + Ok((_, x_opt, fval)) => { + let thetas_opt = arr1(&x_opt).mapv(|v| base.powf(v)); + let fval = if f64::is_nan(fval) { + f64::INFINITY + } else { + fval + }; + (thetas_opt, fval) + } + Err((status, x_opt, _)) => { + println!("ERROR Cobyla optimizer in GP status={:?}", status); + (arr1(&x_opt).mapv(|v| base.powf(v)), f64::INFINITY) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use approx::assert_abs_diff_eq; + use ndarray::{arr2, Array}; + use ndarray_npy::write_npy; + use ndarray_rand::rand::SeedableRng; + use ndarray_rand::rand_distr::{Normal, StandardNormal}; + use ndarray_rand::RandomExt; + use rand_xoshiro::Xoshiro256Plus; + + const PI: f64 = std::f64::consts::PI; + + fn f_obj(x: &ArrayBase, Ix2>) -> Array2 { + x.mapv(|v| (3. * PI * v).sin() + 0.3 * (9. * PI * v).cos() + 0.5 * (7. * PI * v).sin()) + } + + #[test] + fn test_sgp() { + let mut rng = Xoshiro256Plus::seed_from_u64(42); + // Generate training data + let nt = 200; + // Variance of the gaussian noise on our trainingg data + let eta2: f64 = 0.01; + let normal = Normal::new(0., eta2.sqrt()).unwrap(); + let gaussian_noise = Array::::random_using((nt, 1), normal, &mut rng); + let xt = 2. * Array::::random_using((nt, 1), StandardNormal, &mut rng) - 1.; + let yt = f_obj(&xt) + gaussian_noise; + + let xplot = Array::linspace(0., 4., 100).insert_axis(Axis(1)); + + let gp = SparseGaussianProcess::::params( + ConstantMean::default(), + SquaredExponentialCorr::default(), + ) + .initial_theta(Some(vec![0.1])) + .fit(&Dataset::new(xt, yt)) + .expect("GP fit error"); + + let yvals = gp + .predict_values(&arr2(&[[1.0], [3.5]])) + .expect("prediction error"); + let expected_y = arr2(&[[1.0], [0.9]]); + assert_abs_diff_eq!(expected_y, yvals, epsilon = 0.5); + + let sgpr_vals = gp.predict_values(&xplot).unwrap(); + + let yvars = gp + .predict_variances(&arr2(&[[1.0], [3.5]])) + .expect("prediction error"); + let expected_vars = arr2(&[[0.], [0.1]]); + assert_abs_diff_eq!(expected_vars, yvars, epsilon = 0.5); + + let sgpr_vars = gp.predict_variances(&xplot).unwrap(); + + let test_dir = "target/tests"; + std::fs::create_dir_all(test_dir).ok(); + + let file_path = format!("{}/sgp_x.npy", test_dir); + write_npy(file_path, &xplot).expect("x saved"); + + let file_path = format!("{}/sgp_vals.npy", test_dir); + write_npy(file_path, &sgpr_vals).expect("sgp vals saved"); + + let file_path = format!("{}/sgp_vars.npy", test_dir); + write_npy(file_path, &sgpr_vars).expect("sgp vars saved"); + } +} diff --git a/gp/src/sgp_parameters.rs b/gp/src/sgp_parameters.rs new file mode 100644 index 00000000..6e26fe22 --- /dev/null +++ b/gp/src/sgp_parameters.rs @@ -0,0 +1,185 @@ +use crate::correlation_models::{CorrelationModel, SquaredExponentialCorr}; +use crate::errors::{GpError, Result}; +use crate::mean_models::{ConstantMean, RegressionModel}; +use crate::parameters::GpValidParams; +use linfa::{Float, ParamGuard}; +use ndarray::Array2; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum NoiseVarianceConfig { + Constant(F), + Estimated { + initial_guess: F, + lower_bound: F, + upper_bound: F, + }, +} +impl Default for NoiseVarianceConfig { + fn default() -> NoiseVarianceConfig { + Self::Constant(F::one()) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub enum SparseMethod { + Fitc, + Vfe, +} +impl Default for SparseMethod { + fn default() -> Self { + SparseMethod::Fitc + } +} + +/// A set of validated SGP parameters. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct SgpValidParams, Corr: CorrelationModel> { + /// gp + gp_params: GpValidParams, + /// Gaussian homeoscedastic noise variance + noise: NoiseVarianceConfig, + /// Inducing points + z: Option>, + /// Method + method: SparseMethod, +} + +impl Default for SgpValidParams { + fn default() -> SgpValidParams { + SgpValidParams { + gp_params: GpValidParams::default(), + noise: NoiseVarianceConfig::default(), + z: None, + method: SparseMethod::default(), + } + } +} + +impl, Corr: CorrelationModel> SgpValidParams { + /// Get starting theta value for optimization + pub fn initial_theta(&self) -> &Option> { + &self.gp_params.theta + } + + /// Get mean model + pub fn mean(&self) -> &Mean { + &self.gp_params.mean + } + + /// Get correlation corr k(x, x') + pub fn corr(&self) -> &Corr { + &self.gp_params.corr + } + + /// Get number of components used by PLS + pub fn kpls_dim(&self) -> &Option { + &self.gp_params.kpls_dim + } + + /// Get number of components used by PLS + pub fn nugget(&self) -> F { + self.gp_params.nugget + } + + /// Get used sparse method + pub fn method(&self) -> SparseMethod { + self.method + } + + /// Get inducing points + pub fn inducings(&self) -> Array2 { + self.z + } +} + +#[derive(Clone, Debug)] +/// The set of hyperparameters that can be specified for the execution of +/// the [GP algorithm](struct.GaussianProcess.html). +pub struct SgpParams, Corr: CorrelationModel>( + SgpValidParams, +); + +impl, Corr: CorrelationModel> SgpParams { + /// A constructor for GP parameters given mean and correlation models + pub fn new(mean: Mean, corr: Corr) -> SgpParams { + Self(SgpValidParams { + gp_params: GpValidParams { + theta: None, + mean, + corr, + kpls_dim: None, + nugget: F::cast(100.0) * F::epsilon(), + }, + noise: NoiseVarianceConfig::default(), + z: None, + method: SparseMethod::default(), + }) + } + + /// Set initial value for theta hyper parameter. + /// + /// During training process, the internal optimization is started from `initial_theta`. + pub fn initial_theta(mut self, theta: Option>) -> Self { + self.0.gp_params.theta = theta; + self + } + + /// Set mean model. + pub fn mean(mut self, mean: Mean) -> Self { + self.0.gp_params.mean = mean; + self + } + + /// Set correlation model. + pub fn corr(mut self, corr: Corr) -> Self { + self.0.gp_params.corr = corr; + self + } + + /// Set number of PLS components. + /// Should be 0 < n < pb size (i.e. x dimension) + pub fn kpls_dim(mut self, kpls_dim: Option) -> Self { + self.0.gp_params.kpls_dim = kpls_dim; + self + } + + /// Set nugget. + /// + /// Nugget is used to improve numerical stability + pub fn nugget(mut self, nugget: F) -> Self { + self.0.gp_params.nugget = nugget; + self + } +} + +impl, Corr: CorrelationModel> ParamGuard + for SgpParams +{ + type Checked = SgpValidParams; + type Error = GpError; + + fn check_ref(&self) -> Result<&Self::Checked> { + if let Some(d) = self.0.gp_params.kpls_dim { + if d == 0 { + return Err(GpError::InvalidValue("`kpls_dim` canot be 0!".to_string())); + } + if let Some(theta) = self.0.initial_theta() { + if theta.len() > 1 && d > theta.len() { + return Err(GpError::InvalidValue(format!( + "Dimension reduction ({}) should be smaller than expected + training input size infered from given initial theta length ({})", + d, + theta.len() + ))); + }; + } + } + Ok(&self.0) + } + + fn check(self) -> Result { + self.check_ref()?; + Ok(self.0) + } +}