@@ -15,19 +15,37 @@ use ndarray::{arr1, s, Array, Array1, Array2, ArrayBase, Axis, Data, Ix1, Ix2, Z
15
15
use ndarray_einsum_beta:: * ;
16
16
#[ cfg( feature = "blas" ) ]
17
17
use ndarray_linalg:: { cholesky:: * , eigh:: * , qr:: * , svd:: * , triangular:: * } ;
18
- use ndarray_rand:: rand:: SeedableRng ;
18
+ use ndarray_rand:: rand:: { Rng , SeedableRng } ;
19
+ use ndarray_rand:: rand_distr:: Normal ;
20
+ use ndarray_rand:: RandomExt ;
19
21
use ndarray_stats:: QuantileExt ;
20
22
23
+ use log:: debug;
21
24
use rand_xoshiro:: Xoshiro256Plus ;
25
+ use rayon:: prelude:: * ;
22
26
#[ cfg( feature = "serializable" ) ]
23
27
use serde:: { Deserialize , Serialize } ;
24
28
use std:: fmt;
29
+ use std:: time:: Instant ;
25
30
26
- use ndarray_rand:: rand_distr:: Normal ;
27
- use ndarray_rand:: RandomExt ;
31
+ pub ( crate ) struct CobylaParams {
32
+ pub rhobeg : f64 ,
33
+ pub ftol_rel : f64 ,
34
+ pub maxeval : usize ,
35
+ }
36
+
37
+ impl Default for CobylaParams {
38
+ fn default ( ) -> Self {
39
+ CobylaParams {
40
+ rhobeg : 0.5 ,
41
+ ftol_rel : 1e-4 ,
42
+ maxeval : 25 ,
43
+ }
44
+ }
45
+ }
28
46
29
47
// const LOG10_20: f64 = 1.301_029_995_663_981_3; //f64::log10(20.);
30
- const N_START : usize = 10 ; // number of optimization restart (aka multistart)
48
+ // const N_START: usize = 0 ; // number of optimization restart (aka multistart)
31
49
32
50
/// Internal parameters computed Gp during training
33
51
/// used later on in prediction computations
@@ -160,6 +178,9 @@ impl<F: Float> Clone for GpInnerParams<F> {
160
178
pub struct GaussianProcess < F : Float , Mean : RegressionModel < F > , Corr : CorrelationModel < F > > {
161
179
/// Parameter of the autocorrelation model
162
180
theta : Array1 < F > ,
181
+ /// Reduced likelihood value (result from internal optimization)
182
+ /// Maybe used to compare different trained models
183
+ likelihood : F ,
163
184
/// Regression model
164
185
#[ cfg_attr(
165
186
feature = "serializable" ,
@@ -202,6 +223,7 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> Clone
202
223
fn clone ( & self ) -> Self {
203
224
Self {
204
225
theta : self . theta . to_owned ( ) ,
226
+ likelihood : self . likelihood ,
205
227
mean : self . mean ,
206
228
corr : self . corr ,
207
229
inner_params : self . inner_params . clone ( ) ,
@@ -216,7 +238,11 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> fmt::Display
216
238
for GaussianProcess < F , Mean , Corr >
217
239
{
218
240
fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
219
- write ! ( f, "GP({}, {})" , self . mean, self . corr)
241
+ write ! (
242
+ f,
243
+ "GP(mean={}, corr={}, theta={}, variance={}, likelihood={})" ,
244
+ self . mean, self . corr, self . theta, self . inner_params. sigma2, self . likelihood,
245
+ )
220
246
}
221
247
}
222
248
@@ -751,7 +777,7 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
751
777
let y = dataset. targets ( ) ;
752
778
if let Some ( d) = self . kpls_dim ( ) {
753
779
if * d > x. ncols ( ) {
754
- return Err ( GpError :: InvalidValue ( format ! (
780
+ return Err ( GpError :: InvalidValueError ( format ! (
755
781
"Dimension reduction {} should be smaller than actual \
756
782
training input dimensions {}",
757
783
d,
@@ -786,14 +812,18 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
786
812
"Warning: multiple x input features have the same value (at least same row twice)."
787
813
) ;
788
814
}
789
- let theta0 = self
790
- . initial_theta ( )
791
- . clone ( )
792
- . map_or ( Array1 :: from_elem ( w_star. ncols ( ) , F :: cast ( 1e-2 ) ) , |v| {
793
- Array :: from_vec ( v)
794
- } ) ;
815
+
816
+ // Initial guess for theta
817
+ let theta0_dim = self . theta_tuning ( ) . theta0 ( ) . len ( ) ;
818
+ let theta0 = if theta0_dim == 1 {
819
+ Array1 :: from_elem ( w_star. ncols ( ) , self . theta_tuning ( ) . theta0 ( ) [ 0 ] )
820
+ } else if theta0_dim == w_star. ncols ( ) {
821
+ Array :: from_vec ( self . theta_tuning ( ) . theta0 ( ) . to_vec ( ) )
822
+ } else {
823
+ panic ! ( "Initial guess for theta should be either 1-dim or dim of xtrain (w_star.ncols()), got {}" , theta0_dim)
824
+ } ;
825
+
795
826
let fx = self . mean ( ) . value ( & xtrain. data ) ;
796
- let y_t = ytrain. clone ( ) ;
797
827
let base: f64 = 10. ;
798
828
let objfn = |x : & [ f64 ] , _gradient : Option < & mut [ f64 ] > , _params : & mut ( ) | -> f64 {
799
829
let theta =
@@ -808,42 +838,60 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
808
838
}
809
839
let theta = theta. mapv ( F :: cast) ;
810
840
let rxx = self . corr ( ) . value ( & x_distances. d , & theta, & w_star) ;
811
- match reduced_likelihood ( & fx, rxx, & x_distances, & y_t , self . nugget ( ) ) {
841
+ match reduced_likelihood ( & fx, rxx, & x_distances, & ytrain , self . nugget ( ) ) {
812
842
Ok ( r) => unsafe { -( * ( & r. 0 as * const F as * const f64 ) ) } ,
813
843
Err ( _) => f64:: INFINITY ,
814
844
}
815
845
} ;
816
846
817
847
// Multistart: user theta0 + 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1., 10.
818
- let mut theta0s = Array2 :: zeros ( ( N_START + 1 , theta0. len ( ) ) ) ;
819
- theta0s. row_mut ( 0 ) . assign ( & theta0. mapv ( |v| F :: log10 ( v) ) ) ;
820
- let mut xlimits: Array2 < F > = Array2 :: zeros ( ( theta0. len ( ) , 2 ) ) ;
821
- for mut row in xlimits. rows_mut ( ) {
822
- row. assign ( & arr1 ( & [ F :: cast ( -6 ) , F :: cast ( 2 ) ] ) ) ;
823
- }
824
- // Use a seed here for reproducibility. Do we need to make it truly random
825
- // Probably no, as it is just to get init values spread over
826
- // [1e-6, 20] for multistart thanks to LHS method.
827
- let seeds = Lhs :: new ( & xlimits)
828
- . kind ( egobox_doe:: LhsKind :: Maximin )
829
- . with_rng ( Xoshiro256Plus :: seed_from_u64 ( 42 ) )
830
- . sample ( N_START ) ;
831
- Zip :: from ( theta0s. slice_mut ( s ! [ 1 .., ..] ) . rows_mut ( ) )
832
- . and ( seeds. rows ( ) )
833
- . par_for_each ( |mut theta, row| theta. assign ( & row) ) ;
834
-
835
- let bounds = vec ! [ ( F :: cast( -6. ) , F :: cast( 2. ) ) ; theta0. len( ) ] ;
836
-
837
- let opt_thetas = theta0s. map_axis ( Axis ( 1 ) , |theta| {
838
- optimize_params ( objfn, & theta. to_owned ( ) , & bounds)
839
- } ) ;
840
- let opt_index = opt_thetas. map ( |( _, opt_f) | opt_f) . argmin ( ) . unwrap ( ) ;
841
- let opt_theta = & ( opt_thetas[ opt_index] ) . 0 . mapv ( |v| F :: cast ( base. powf ( v) ) ) ;
842
- // println!("opt_theta={}", opt_theta);
843
- let rxx = self . corr ( ) . value ( & x_distances. d , opt_theta, & w_star) ;
844
- let ( _, inner_params) = reduced_likelihood ( & fx, rxx, & x_distances, & ytrain, self . nugget ( ) ) ?;
848
+ // let bounds = vec![(F::cast(-6.), F::cast(2.)); theta0.len()];
849
+ let bounds_dim = self . theta_tuning ( ) . bounds ( ) . len ( ) ;
850
+ let bounds = if bounds_dim == 1 {
851
+ vec ! [ self . theta_tuning( ) . bounds( ) [ 0 ] ; w_star. ncols( ) ]
852
+ } else if theta0_dim == w_star. ncols ( ) {
853
+ self . theta_tuning ( ) . bounds ( ) . to_vec ( )
854
+ } else {
855
+ panic ! (
856
+ "Bounds for theta should be either 1-dim or dim of xtrain ({}), got {}" ,
857
+ w_star. ncols( ) ,
858
+ theta0_dim
859
+ )
860
+ } ;
861
+
862
+ let ( params, bounds) = prepare_multistart ( self . n_start ( ) , & theta0, & bounds) ;
863
+ debug ! (
864
+ "Optimize with multistart theta = {:?} and bounds = {:?}" ,
865
+ params, bounds
866
+ ) ;
867
+ let now = Instant :: now ( ) ;
868
+ let opt_params = ( 0 ..params. nrows ( ) )
869
+ . into_par_iter ( )
870
+ . map ( |i| {
871
+ let opt_res = optimize_params (
872
+ objfn,
873
+ & params. row ( i) . to_owned ( ) ,
874
+ & bounds,
875
+ CobylaParams {
876
+ maxeval : ( 10 * theta0_dim) . max ( CobylaParams :: default ( ) . maxeval ) ,
877
+ ..CobylaParams :: default ( )
878
+ } ,
879
+ ) ;
880
+
881
+ opt_res
882
+ } )
883
+ . reduce (
884
+ || ( Array :: ones ( ( params. ncols ( ) , ) ) , f64:: INFINITY ) ,
885
+ |a, b| if b. 1 < a. 1 { b } else { a } ,
886
+ ) ;
887
+ debug ! ( "elapsed optim = {:?}" , now. elapsed( ) . as_millis( ) ) ;
888
+ let opt_params = opt_params. 0 . mapv ( |v| F :: cast ( base. powf ( v) ) ) ;
889
+ let rxx = self . corr ( ) . value ( & x_distances. d , & opt_params, & w_star) ;
890
+ let ( lkh, inner_params) =
891
+ reduced_likelihood ( & fx, rxx, & x_distances, & ytrain, self . nugget ( ) ) ?;
845
892
Ok ( GaussianProcess {
846
- theta : opt_theta. to_owned ( ) ,
893
+ theta : opt_params,
894
+ likelihood : lkh,
847
895
mean : * self . mean ( ) ,
848
896
corr : * self . corr ( ) ,
849
897
inner_params,
@@ -854,12 +902,60 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
854
902
}
855
903
}
856
904
905
+ pub ( crate ) fn prepare_multistart < F : Float > (
906
+ n_start : usize ,
907
+ theta0 : & Array1 < F > ,
908
+ bounds : & [ ( F , F ) ] ,
909
+ ) -> ( Array2 < F > , Vec < ( F , F ) > ) {
910
+ // Use log10 theta as optimization parameter
911
+ let bounds: Vec < ( F , F ) > = bounds
912
+ . iter ( )
913
+ . map ( |( lo, up) | ( lo. log10 ( ) , up. log10 ( ) ) )
914
+ . collect ( ) ;
915
+
916
+ // Multistart: user theta0 + 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1., 10.
917
+ let mut theta0s = Array2 :: zeros ( ( n_start + 1 , theta0. len ( ) ) ) ;
918
+ theta0s. row_mut ( 0 ) . assign ( & theta0. mapv ( |v| F :: log10 ( v) ) ) ;
919
+
920
+ match n_start. cmp ( & 1 ) {
921
+ std:: cmp:: Ordering :: Equal => {
922
+ //let mut rng = Xoshiro256Plus::seed_from_u64(42);
923
+ let mut rng = Xoshiro256Plus :: from_entropy ( ) ;
924
+ let vals = bounds. iter ( ) . map ( |( a, b) | rng. gen_range ( * a..* b) ) . collect ( ) ;
925
+ theta0s. row_mut ( 1 ) . assign ( & Array :: from_vec ( vals) )
926
+ }
927
+ std:: cmp:: Ordering :: Greater => {
928
+ let mut xlimits: Array2 < F > = Array2 :: zeros ( ( bounds. len ( ) , 2 ) ) ;
929
+ // for mut row in xlimits.rows_mut() {
930
+ // row.assign(&arr1(&[limits.0, limits.1]));
931
+ // }
932
+ Zip :: from ( xlimits. rows_mut ( ) )
933
+ . and ( & bounds)
934
+ . for_each ( |mut row, limits| row. assign ( & arr1 ( & [ limits. 0 , limits. 1 ] ) ) ) ;
935
+ // Use a seed here for reproducibility. Do we need to make it truly random
936
+ // Probably no, as it is just to get init values spread over
937
+ // [1e-6, 20] for multistart thanks to LHS method.
938
+
939
+ let seeds = Lhs :: new ( & xlimits)
940
+ . kind ( egobox_doe:: LhsKind :: Maximin )
941
+ . with_rng ( Xoshiro256Plus :: seed_from_u64 ( 42 ) )
942
+ . sample ( n_start) ;
943
+ Zip :: from ( theta0s. slice_mut ( s ! [ 1 .., ..] ) . rows_mut ( ) )
944
+ . and ( seeds. rows ( ) )
945
+ . par_for_each ( |mut theta, row| theta. assign ( & row) ) ;
946
+ }
947
+ std:: cmp:: Ordering :: Less => ( ) ,
948
+ } ;
949
+ ( theta0s, bounds)
950
+ }
951
+
857
952
/// Optimize gp hyper parameters given an initial guess and bounds with NLOPT::Cobyla
858
953
#[ cfg( feature = "nlopt" ) ]
859
954
pub ( crate ) fn optimize_params < ObjF , F > (
860
955
objfn : ObjF ,
861
956
param0 : & Array1 < F > ,
862
957
bounds : & [ ( F , F ) ] ,
958
+ cobyla : CobylaParams ,
863
959
) -> ( Array1 < f64 > , f64 )
864
960
where
865
961
ObjF : Fn ( & [ f64 ] , Option < & mut [ f64 ] > , & mut ( ) ) -> f64 ,
@@ -879,9 +975,9 @@ where
879
975
let upper_bounds = bounds. iter ( ) . map ( |b| into_f64 ( & b. 1 ) ) . collect :: < Vec < _ > > ( ) ;
880
976
optimizer. set_upper_bounds ( & upper_bounds) . unwrap ( ) ;
881
977
882
- optimizer. set_initial_step1 ( 0.5 ) . unwrap ( ) ;
883
- optimizer. set_maxeval ( 15 * param0 . len ( ) as u32 ) . unwrap ( ) ;
884
- optimizer. set_ftol_rel ( 1e-4 ) . unwrap ( ) ;
978
+ optimizer. set_initial_step1 ( cobyla . rhobeg ) . unwrap ( ) ;
979
+ optimizer. set_maxeval ( cobyla . maxeval as u32 ) . unwrap ( ) ;
980
+ optimizer. set_ftol_rel ( cobyla . ftol_rel ) . unwrap ( ) ;
885
981
886
982
match optimizer. optimize ( & mut param) {
887
983
Ok ( ( _, fmin) ) => {
@@ -906,6 +1002,7 @@ pub(crate) fn optimize_params<ObjF, F>(
906
1002
objfn : ObjF ,
907
1003
param0 : & Array1 < F > ,
908
1004
bounds : & [ ( F , F ) ] ,
1005
+ cobyla : CobylaParams ,
909
1006
) -> ( Array1 < f64 > , f64 )
910
1007
where
911
1008
ObjF : Fn ( & [ f64 ] , Option < & mut [ f64 ] > , & mut ( ) ) -> f64 ,
@@ -917,10 +1014,6 @@ where
917
1014
let cons: Vec < & dyn Func < ( ) > > = vec ! [ ] ;
918
1015
let param0 = param0. map ( |v| into_f64 ( v) ) . into_raw_vec ( ) ;
919
1016
920
- let initial_step = 0.5 ;
921
- let ftol_rel = 1e-4 ;
922
- let maxeval = 15 * param0. len ( ) ;
923
-
924
1017
let bounds: Vec < _ > = bounds
925
1018
. iter ( )
926
1019
. map ( |( lo, up) | ( into_f64 ( lo) , into_f64 ( up) ) )
@@ -932,10 +1025,10 @@ where
932
1025
& bounds,
933
1026
& cons,
934
1027
( ) ,
935
- maxeval,
936
- cobyla:: RhoBeg :: All ( initial_step ) ,
1028
+ cobyla . maxeval ,
1029
+ cobyla:: RhoBeg :: All ( cobyla . rhobeg ) ,
937
1030
Some ( StopTols {
938
- ftol_rel,
1031
+ ftol_rel : cobyla . ftol_rel ,
939
1032
..StopTols :: default ( )
940
1033
} ) ,
941
1034
) {
@@ -1158,7 +1251,7 @@ mod tests {
1158
1251
ConstantMean :: default ( ) ,
1159
1252
SquaredExponentialCorr :: default ( ) ,
1160
1253
)
1161
- . initial_theta ( Some ( vec ! [ 0.1 ] ) )
1254
+ . theta_init ( vec ! [ 0.1 ] )
1162
1255
. kpls_dim ( Some ( 1 ) )
1163
1256
. fit ( & Dataset :: new ( xt, yt) )
1164
1257
. expect ( "GP fit error" ) ;
@@ -1181,7 +1274,7 @@ mod tests {
1181
1274
[ <$regr Mean >] :: default ( ) ,
1182
1275
[ <$corr Corr >] :: default ( ) ,
1183
1276
)
1184
- . initial_theta ( Some ( vec![ 0.1 ] ) )
1277
+ . theta_init ( vec![ 0.1 ] )
1185
1278
. fit( & Dataset :: new( xt, yt) )
1186
1279
. expect( "GP fit error" ) ;
1187
1280
let yvals = gp
0 commit comments