Skip to content

Commit ef0c44b

Browse files
authored
Improve GP/SGP API (#132)
* Add experts getter * Add experts API * Add sparse method choice in py API * Manage traces, Add initial_theta and sparse_method * Make GP n_start configurable * Add n_start argument to control hyperparams optimization restarts * Add theta tuning interface for SGP * Fix theta tuning initialization, refactor cobyla params * Renaming sparse_algorithm|parameters * Renaming theta_init * Rename guess in init * Add thate_tuning in GP API * Add theta_tuning to GP * Fix cobyla maxeval parameter * Parallellize multistart optimizations for SGP * Add SparseGpx basic tutorial * Trained Gp model store reduced likelihood value * Improve display trained model infos * Fix moe display test * Make GP/SGP computation interruptible * Fix double import * Cleanup * Remove fallible assertion in moe display test * Avoid ctrlc multiple handlers errors * Relax sgp noise test tolerance * Add parallel multistart to GP
1 parent 82c24c8 commit ef0c44b

22 files changed

+1183
-261
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ pyo3 = { version = "0.20.0", features = ["extension-module"] }
5252
pyo3-log = "0.9.0"
5353
serde = "1"
5454
serde_json = "1"
55+
ctrlc = "3.4"
5556

5657
[dev-dependencies]
5758
criterion = "0.4"

doc/SparseGpx_Tutorial.ipynb

Lines changed: 329 additions & 0 deletions
Large diffs are not rendered by default.

gp/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ paste = "1.0"
3737
num-traits = "0.2"
3838
thiserror = "1"
3939
log = "0.4"
40+
rayon = "1"
4041

4142
serde = { version = "1", features = ["derive"], optional = true }
4243
serde_json = { version = "1", optional = true }

gp/benches/gp.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ fn criterion_gp(c: &mut Criterion) {
5858
SquaredExponentialCorr::default(),
5959
)
6060
.kpls_dim(Some(1))
61-
.initial_theta(Some(vec![1.0]))
61+
.theta_init(vec![1.0])
6262
.fit(&Dataset::new(xt.to_owned(), yt.to_owned()))
6363
.expect("GP fit error"),
6464
)

gp/src/algorithm.rs

Lines changed: 147 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,37 @@ use ndarray::{arr1, s, Array, Array1, Array2, ArrayBase, Axis, Data, Ix1, Ix2, Z
1515
use ndarray_einsum_beta::*;
1616
#[cfg(feature = "blas")]
1717
use ndarray_linalg::{cholesky::*, eigh::*, qr::*, svd::*, triangular::*};
18-
use ndarray_rand::rand::SeedableRng;
18+
use ndarray_rand::rand::{Rng, SeedableRng};
19+
use ndarray_rand::rand_distr::Normal;
20+
use ndarray_rand::RandomExt;
1921
use ndarray_stats::QuantileExt;
2022

23+
use log::debug;
2124
use rand_xoshiro::Xoshiro256Plus;
25+
use rayon::prelude::*;
2226
#[cfg(feature = "serializable")]
2327
use serde::{Deserialize, Serialize};
2428
use std::fmt;
29+
use std::time::Instant;
2530

26-
use ndarray_rand::rand_distr::Normal;
27-
use ndarray_rand::RandomExt;
31+
pub(crate) struct CobylaParams {
32+
pub rhobeg: f64,
33+
pub ftol_rel: f64,
34+
pub maxeval: usize,
35+
}
36+
37+
impl Default for CobylaParams {
38+
fn default() -> Self {
39+
CobylaParams {
40+
rhobeg: 0.5,
41+
ftol_rel: 1e-4,
42+
maxeval: 25,
43+
}
44+
}
45+
}
2846

2947
// const LOG10_20: f64 = 1.301_029_995_663_981_3; //f64::log10(20.);
30-
const N_START: usize = 10; // number of optimization restart (aka multistart)
48+
//const N_START: usize = 0; // number of optimization restart (aka multistart)
3149

3250
/// Internal parameters computed Gp during training
3351
/// used later on in prediction computations
@@ -160,6 +178,9 @@ impl<F: Float> Clone for GpInnerParams<F> {
160178
pub struct GaussianProcess<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> {
161179
/// Parameter of the autocorrelation model
162180
theta: Array1<F>,
181+
/// Reduced likelihood value (result from internal optimization)
182+
/// Maybe used to compare different trained models
183+
likelihood: F,
163184
/// Regression model
164185
#[cfg_attr(
165186
feature = "serializable",
@@ -202,6 +223,7 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> Clone
202223
fn clone(&self) -> Self {
203224
Self {
204225
theta: self.theta.to_owned(),
226+
likelihood: self.likelihood,
205227
mean: self.mean,
206228
corr: self.corr,
207229
inner_params: self.inner_params.clone(),
@@ -216,7 +238,11 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> fmt::Display
216238
for GaussianProcess<F, Mean, Corr>
217239
{
218240
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
219-
write!(f, "GP({}, {})", self.mean, self.corr)
241+
write!(
242+
f,
243+
"GP(mean={}, corr={}, theta={}, variance={}, likelihood={})",
244+
self.mean, self.corr, self.theta, self.inner_params.sigma2, self.likelihood,
245+
)
220246
}
221247
}
222248

@@ -751,7 +777,7 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
751777
let y = dataset.targets();
752778
if let Some(d) = self.kpls_dim() {
753779
if *d > x.ncols() {
754-
return Err(GpError::InvalidValue(format!(
780+
return Err(GpError::InvalidValueError(format!(
755781
"Dimension reduction {} should be smaller than actual \
756782
training input dimensions {}",
757783
d,
@@ -786,14 +812,18 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
786812
"Warning: multiple x input features have the same value (at least same row twice)."
787813
);
788814
}
789-
let theta0 = self
790-
.initial_theta()
791-
.clone()
792-
.map_or(Array1::from_elem(w_star.ncols(), F::cast(1e-2)), |v| {
793-
Array::from_vec(v)
794-
});
815+
816+
// Initial guess for theta
817+
let theta0_dim = self.theta_tuning().theta0().len();
818+
let theta0 = if theta0_dim == 1 {
819+
Array1::from_elem(w_star.ncols(), self.theta_tuning().theta0()[0])
820+
} else if theta0_dim == w_star.ncols() {
821+
Array::from_vec(self.theta_tuning().theta0().to_vec())
822+
} else {
823+
panic!("Initial guess for theta should be either 1-dim or dim of xtrain (w_star.ncols()), got {}", theta0_dim)
824+
};
825+
795826
let fx = self.mean().value(&xtrain.data);
796-
let y_t = ytrain.clone();
797827
let base: f64 = 10.;
798828
let objfn = |x: &[f64], _gradient: Option<&mut [f64]>, _params: &mut ()| -> f64 {
799829
let theta =
@@ -808,42 +838,60 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
808838
}
809839
let theta = theta.mapv(F::cast);
810840
let rxx = self.corr().value(&x_distances.d, &theta, &w_star);
811-
match reduced_likelihood(&fx, rxx, &x_distances, &y_t, self.nugget()) {
841+
match reduced_likelihood(&fx, rxx, &x_distances, &ytrain, self.nugget()) {
812842
Ok(r) => unsafe { -(*(&r.0 as *const F as *const f64)) },
813843
Err(_) => f64::INFINITY,
814844
}
815845
};
816846

817847
// Multistart: user theta0 + 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1., 10.
818-
let mut theta0s = Array2::zeros((N_START + 1, theta0.len()));
819-
theta0s.row_mut(0).assign(&theta0.mapv(|v| F::log10(v)));
820-
let mut xlimits: Array2<F> = Array2::zeros((theta0.len(), 2));
821-
for mut row in xlimits.rows_mut() {
822-
row.assign(&arr1(&[F::cast(-6), F::cast(2)]));
823-
}
824-
// Use a seed here for reproducibility. Do we need to make it truly random
825-
// Probably no, as it is just to get init values spread over
826-
// [1e-6, 20] for multistart thanks to LHS method.
827-
let seeds = Lhs::new(&xlimits)
828-
.kind(egobox_doe::LhsKind::Maximin)
829-
.with_rng(Xoshiro256Plus::seed_from_u64(42))
830-
.sample(N_START);
831-
Zip::from(theta0s.slice_mut(s![1.., ..]).rows_mut())
832-
.and(seeds.rows())
833-
.par_for_each(|mut theta, row| theta.assign(&row));
834-
835-
let bounds = vec![(F::cast(-6.), F::cast(2.)); theta0.len()];
836-
837-
let opt_thetas = theta0s.map_axis(Axis(1), |theta| {
838-
optimize_params(objfn, &theta.to_owned(), &bounds)
839-
});
840-
let opt_index = opt_thetas.map(|(_, opt_f)| opt_f).argmin().unwrap();
841-
let opt_theta = &(opt_thetas[opt_index]).0.mapv(|v| F::cast(base.powf(v)));
842-
// println!("opt_theta={}", opt_theta);
843-
let rxx = self.corr().value(&x_distances.d, opt_theta, &w_star);
844-
let (_, inner_params) = reduced_likelihood(&fx, rxx, &x_distances, &ytrain, self.nugget())?;
848+
// let bounds = vec![(F::cast(-6.), F::cast(2.)); theta0.len()];
849+
let bounds_dim = self.theta_tuning().bounds().len();
850+
let bounds = if bounds_dim == 1 {
851+
vec![self.theta_tuning().bounds()[0]; w_star.ncols()]
852+
} else if theta0_dim == w_star.ncols() {
853+
self.theta_tuning().bounds().to_vec()
854+
} else {
855+
panic!(
856+
"Bounds for theta should be either 1-dim or dim of xtrain ({}), got {}",
857+
w_star.ncols(),
858+
theta0_dim
859+
)
860+
};
861+
862+
let (params, bounds) = prepare_multistart(self.n_start(), &theta0, &bounds);
863+
debug!(
864+
"Optimize with multistart theta = {:?} and bounds = {:?}",
865+
params, bounds
866+
);
867+
let now = Instant::now();
868+
let opt_params = (0..params.nrows())
869+
.into_par_iter()
870+
.map(|i| {
871+
let opt_res = optimize_params(
872+
objfn,
873+
&params.row(i).to_owned(),
874+
&bounds,
875+
CobylaParams {
876+
maxeval: (10 * theta0_dim).max(CobylaParams::default().maxeval),
877+
..CobylaParams::default()
878+
},
879+
);
880+
881+
opt_res
882+
})
883+
.reduce(
884+
|| (Array::ones((params.ncols(),)), f64::INFINITY),
885+
|a, b| if b.1 < a.1 { b } else { a },
886+
);
887+
debug!("elapsed optim = {:?}", now.elapsed().as_millis());
888+
let opt_params = opt_params.0.mapv(|v| F::cast(base.powf(v)));
889+
let rxx = self.corr().value(&x_distances.d, &opt_params, &w_star);
890+
let (lkh, inner_params) =
891+
reduced_likelihood(&fx, rxx, &x_distances, &ytrain, self.nugget())?;
845892
Ok(GaussianProcess {
846-
theta: opt_theta.to_owned(),
893+
theta: opt_params,
894+
likelihood: lkh,
847895
mean: *self.mean(),
848896
corr: *self.corr(),
849897
inner_params,
@@ -854,12 +902,60 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
854902
}
855903
}
856904

905+
pub(crate) fn prepare_multistart<F: Float>(
906+
n_start: usize,
907+
theta0: &Array1<F>,
908+
bounds: &[(F, F)],
909+
) -> (Array2<F>, Vec<(F, F)>) {
910+
// Use log10 theta as optimization parameter
911+
let bounds: Vec<(F, F)> = bounds
912+
.iter()
913+
.map(|(lo, up)| (lo.log10(), up.log10()))
914+
.collect();
915+
916+
// Multistart: user theta0 + 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1., 10.
917+
let mut theta0s = Array2::zeros((n_start + 1, theta0.len()));
918+
theta0s.row_mut(0).assign(&theta0.mapv(|v| F::log10(v)));
919+
920+
match n_start.cmp(&1) {
921+
std::cmp::Ordering::Equal => {
922+
//let mut rng = Xoshiro256Plus::seed_from_u64(42);
923+
let mut rng = Xoshiro256Plus::from_entropy();
924+
let vals = bounds.iter().map(|(a, b)| rng.gen_range(*a..*b)).collect();
925+
theta0s.row_mut(1).assign(&Array::from_vec(vals))
926+
}
927+
std::cmp::Ordering::Greater => {
928+
let mut xlimits: Array2<F> = Array2::zeros((bounds.len(), 2));
929+
// for mut row in xlimits.rows_mut() {
930+
// row.assign(&arr1(&[limits.0, limits.1]));
931+
// }
932+
Zip::from(xlimits.rows_mut())
933+
.and(&bounds)
934+
.for_each(|mut row, limits| row.assign(&arr1(&[limits.0, limits.1])));
935+
// Use a seed here for reproducibility. Do we need to make it truly random
936+
// Probably no, as it is just to get init values spread over
937+
// [1e-6, 20] for multistart thanks to LHS method.
938+
939+
let seeds = Lhs::new(&xlimits)
940+
.kind(egobox_doe::LhsKind::Maximin)
941+
.with_rng(Xoshiro256Plus::seed_from_u64(42))
942+
.sample(n_start);
943+
Zip::from(theta0s.slice_mut(s![1.., ..]).rows_mut())
944+
.and(seeds.rows())
945+
.par_for_each(|mut theta, row| theta.assign(&row));
946+
}
947+
std::cmp::Ordering::Less => (),
948+
};
949+
(theta0s, bounds)
950+
}
951+
857952
/// Optimize gp hyper parameters given an initial guess and bounds with NLOPT::Cobyla
858953
#[cfg(feature = "nlopt")]
859954
pub(crate) fn optimize_params<ObjF, F>(
860955
objfn: ObjF,
861956
param0: &Array1<F>,
862957
bounds: &[(F, F)],
958+
cobyla: CobylaParams,
863959
) -> (Array1<f64>, f64)
864960
where
865961
ObjF: Fn(&[f64], Option<&mut [f64]>, &mut ()) -> f64,
@@ -879,9 +975,9 @@ where
879975
let upper_bounds = bounds.iter().map(|b| into_f64(&b.1)).collect::<Vec<_>>();
880976
optimizer.set_upper_bounds(&upper_bounds).unwrap();
881977

882-
optimizer.set_initial_step1(0.5).unwrap();
883-
optimizer.set_maxeval(15 * param0.len() as u32).unwrap();
884-
optimizer.set_ftol_rel(1e-4).unwrap();
978+
optimizer.set_initial_step1(cobyla.rhobeg).unwrap();
979+
optimizer.set_maxeval(cobyla.maxeval as u32).unwrap();
980+
optimizer.set_ftol_rel(cobyla.ftol_rel).unwrap();
885981

886982
match optimizer.optimize(&mut param) {
887983
Ok((_, fmin)) => {
@@ -906,6 +1002,7 @@ pub(crate) fn optimize_params<ObjF, F>(
9061002
objfn: ObjF,
9071003
param0: &Array1<F>,
9081004
bounds: &[(F, F)],
1005+
cobyla: CobylaParams,
9091006
) -> (Array1<f64>, f64)
9101007
where
9111008
ObjF: Fn(&[f64], Option<&mut [f64]>, &mut ()) -> f64,
@@ -917,10 +1014,6 @@ where
9171014
let cons: Vec<&dyn Func<()>> = vec![];
9181015
let param0 = param0.map(|v| into_f64(v)).into_raw_vec();
9191016

920-
let initial_step = 0.5;
921-
let ftol_rel = 1e-4;
922-
let maxeval = 15 * param0.len();
923-
9241017
let bounds: Vec<_> = bounds
9251018
.iter()
9261019
.map(|(lo, up)| (into_f64(lo), into_f64(up)))
@@ -932,10 +1025,10 @@ where
9321025
&bounds,
9331026
&cons,
9341027
(),
935-
maxeval,
936-
cobyla::RhoBeg::All(initial_step),
1028+
cobyla.maxeval,
1029+
cobyla::RhoBeg::All(cobyla.rhobeg),
9371030
Some(StopTols {
938-
ftol_rel,
1031+
ftol_rel: cobyla.ftol_rel,
9391032
..StopTols::default()
9401033
}),
9411034
) {
@@ -1158,7 +1251,7 @@ mod tests {
11581251
ConstantMean::default(),
11591252
SquaredExponentialCorr::default(),
11601253
)
1161-
.initial_theta(Some(vec![0.1]))
1254+
.theta_init(vec![0.1])
11621255
.kpls_dim(Some(1))
11631256
.fit(&Dataset::new(xt, yt))
11641257
.expect("GP fit error");
@@ -1181,7 +1274,7 @@ mod tests {
11811274
[<$regr Mean>]::default(),
11821275
[<$corr Corr>]::default(),
11831276
)
1184-
.initial_theta(Some(vec![0.1]))
1277+
.theta_init(vec![0.1])
11851278
.fit(&Dataset::new(xt, yt))
11861279
.expect("GP fit error");
11871280
let yvals = gp

gp/src/correlation_models.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use std::convert::TryFrom;
1616
use std::fmt;
1717

1818
/// A trait for using a correlation model in GP regression
19-
pub trait CorrelationModel<F: Float>: Clone + Copy + Default + fmt::Display {
19+
pub trait CorrelationModel<F: Float>: Clone + Copy + Default + fmt::Display + Sync {
2020
/// Compute correlation function matrix r(x, x') given distances `d` between x and x',
2121
/// `theta` parameters, and PLS `weights`, where:
2222
/// `theta` : hyperparameters (1xd)

gp/src/errors.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,6 @@ pub enum GpError {
2121
/// When PLS fails
2222
#[error("PLS error: {0}")]
2323
PlsError(#[from] linfa_pls::PlsError),
24-
/// When a value is invalid
25-
#[error("PLS error: {0}")]
26-
InvalidValue(String),
2724
/// When a linfa error occurs
2825
#[error(transparent)]
2926
LinfaError(#[from] linfa::error::Error),
@@ -36,7 +33,7 @@ pub enum GpError {
3633
/// When error during loading
3734
#[error("Load error: {0}")]
3835
LoadError(String),
39-
/// When error during loading
36+
/// When error dur to a bad value
4037
#[error("InvalidValue error: {0}")]
4138
InvalidValueError(String),
4239
}

gp/src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ mod algorithm;
1414
pub mod correlation_models;
1515
mod errors;
1616
pub mod mean_models;
17-
mod sgp_algorithm;
17+
mod sparse_algorithm;
1818

1919
mod parameters;
20-
mod sgp_parameters;
20+
mod sparse_parameters;
2121
mod utils;
2222

2323
pub use algorithm::*;
2424
pub use errors::*;
2525
pub use parameters::*;
26-
pub use sgp_algorithm::*;
27-
pub use sgp_parameters::*;
26+
pub use sparse_algorithm::*;
27+
pub use sparse_parameters::*;

0 commit comments

Comments
 (0)