@@ -121,8 +121,9 @@ plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in%
121121method <- " dplyr" # base/dplyr
122122# base R - a bit slow
123123# dplyr - much faster
124- split_data <- spectratrait :: create_data_split(dataset = plsr_data , approach = method , split_seed = 7529075 ,
125- prop = 0.8 , group_variables = " Species_Code" )
124+ split_data <- spectratrait :: create_data_split(dataset = plsr_data , approach = method ,
125+ split_seed = 7529075 ,prop = 0.8 ,
126+ group_variables = " Species_Code" )
126127names(split_data )
127128cal.plsr.data <- split_data $ cal_data
128129head(cal.plsr.data )[1 : 8 ]
@@ -138,11 +139,13 @@ text_loc <- c(max(hist(cal.plsr.data[,paste0(inVar)])$counts),
138139 max(hist(cal.plsr.data [,paste0(inVar )])$ mids ))
139140cal_hist_plot <- qplot(cal.plsr.data [,paste0(inVar )],geom = " histogram" ,
140141 main = paste0(" Calibration Histogram for " ,inVar ),
141- xlab = paste0(inVar ),ylab = " Count" ,fill = I(" grey50" ),col = I(" black" ),alpha = I(.7 )) +
142+ xlab = paste0(inVar ),ylab = " Count" ,fill = I(" grey50" ),col = I(" black" ),
143+ alpha = I(.7 )) +
142144 annotate(" text" , x = text_loc [2 ], y = text_loc [1 ], label = " 1." ,size = 10 )
143145val_hist_plot <- qplot(val.plsr.data [,paste0(inVar )],geom = " histogram" ,
144146 main = paste0(" Validation Histogram for " ,inVar ),
145- xlab = paste0(inVar ),ylab = " Count" ,fill = I(" grey50" ),col = I(" black" ),alpha = I(.7 ))
147+ xlab = paste0(inVar ),ylab = " Count" ,fill = I(" grey50" ),col = I(" black" ),
148+ alpha = I(.7 ))
146149histograms <- grid.arrange(cal_hist_plot , val_hist_plot , ncol = 2 )
147150ggsave(filename = file.path(outdir ,paste0(inVar ," _Cal_Val_Histograms.png" )), plot = histograms ,
148151 device = " png" , width = 30 , height = 12 , units = " cm" , dpi = 300 )
@@ -152,6 +155,14 @@ write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.cs
152155# --------------------------------------------------------------------------------------------------#
153156
154157
158+ # --------------------------------------------------------------------------------------------------#
159+ # Figure S1. The resulting leaf mass area (LMA, g/m2) distribution (histogram) for the
160+ # calibration (i.e. model training) and validation datasets. The data was split using the
161+ # spectratrait::create_data_split() function using "Species_Code" as the group_variable and
162+ # using a data split proportion per group of 80% to calibration and 20% to validation
163+ # --------------------------------------------------------------------------------------------------#
164+
165+
155166# --------------------------------------------------------------------------------------------------#
156167# ## Step 8.
157168# ## Format PLSR data for model fitting
@@ -181,6 +192,13 @@ par(mfrow=c(1,1))
181192# --------------------------------------------------------------------------------------------------#
182193
183194
195+ # --------------------------------------------------------------------------------------------------#
196+ # Figure S2. The resulting calibration and validation spectral reflectance distribution by
197+ # wavelength. The spectra split was done at the same time as LMA, as described in
198+ # Supplemental Figure S1.
199+ # --------------------------------------------------------------------------------------------------#
200+
201+
184202# --------------------------------------------------------------------------------------------------#
185203# ## Step 10.
186204# ## Use permutation to determine the optimal number of components
@@ -206,13 +224,26 @@ if (method=="pls") {
206224 maxComps = maxComps , iterations = iterations ,
207225 seg = seg , prop = prop , random_seed = random_seed )
208226}
209- print(" *** Figure 3 . Optimal PLSR component selection ***" )
227+ print(" *** Figure S3 . Optimal PLSR component selection ***" )
210228dev.copy(png ,file.path(outdir ,paste0(paste0(" Figure_3_" ,inVar ," _PLSR_Component_Selection.png" ))),
211229 height = 2800 , width = 3400 , res = 340 )
212230dev.off();
213231# --------------------------------------------------------------------------------------------------#
214232
215233
234+ # --------------------------------------------------------------------------------------------------#
235+ # Figure S3. A key challenge in building robust and parsimonious PLSR models is determining the
236+ # optimal number of PLSR components. A good definition is the minimum number of components that
237+ # minimizes the PRESS statistic and where the next higher component doesn't produce a meaningful
238+ # increase in model performance (i.e. lower PRESS). We provide three methods in the
239+ # find_optimal_components() function to determine the optimal number of components statistically
240+ # using the internal pls package jackknife method or our custom methods that are better in some
241+ # conditions, including for large datasets. In this example we show "firstMin" option that
242+ # selects the number of components corresponding to the first statistical minimum PRESS value
243+ # (vertical broken blue line).
244+ # --------------------------------------------------------------------------------------------------#
245+
246+
216247# --------------------------------------------------------------------------------------------------#
217248# ## Step 11.
218249# ## Fit final model - using leave-one-out cross validation
@@ -242,6 +273,12 @@ par(opar)
242273# --------------------------------------------------------------------------------------------------#
243274
244275
276+ # --------------------------------------------------------------------------------------------------#
277+ # Figure S4. A plot of the validation root mean square error of prediction (RMSEP, left) and
278+ # coefficient of determination (right) for the 0 to optimal number of components
279+ # --------------------------------------------------------------------------------------------------#
280+
281+
245282# --------------------------------------------------------------------------------------------------#
246283# ## Step 12.
247284# ## PLSR fit observed vs. predicted plot data
@@ -319,6 +356,14 @@ ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")),
319356# --------------------------------------------------------------------------------------------------#
320357
321358
359+ # --------------------------------------------------------------------------------------------------#
360+ # Figure S5. The calibration model and independent validation scatter plot results for the example
361+ # LMA PLSR model (top row). Also shown are the calibration model and validation PLSR
362+ # residuals, where the calibration results are based on the internal model cross-validation
363+ # and the validation residuals are the predicted minus observed values of LMA.
364+ # --------------------------------------------------------------------------------------------------#
365+
366+
322367# --------------------------------------------------------------------------------------------------#
323368# ## Step 13.
324369# ## Generate Coefficient and VIP plots
@@ -341,6 +386,12 @@ par(opar)
341386# --------------------------------------------------------------------------------------------------#
342387
343388
389+ # --------------------------------------------------------------------------------------------------#
390+ # Figure S6. The calibration model PLSR regression coefficient (top) and variable importance of
391+ # projection (bottom) plots
392+ # --------------------------------------------------------------------------------------------------#
393+
394+
344395# --------------------------------------------------------------------------------------------------#
345396# ## Step 14.
346397# ## Permutation analysis to derive uncertainty estimates
@@ -386,6 +437,10 @@ dev.copy(png,file.path(outdir,paste0(inVar,'_Jackknife_Regression_Coefficients.p
386437 height = 2100 , width = 3800 , res = 340 )
387438dev.off();
388439
440+ # --------------------------------------------------------------------------------------------------#
441+ # Figure S7. The calibration model jackknife PLSR regression coefficients
442+ # --------------------------------------------------------------------------------------------------#
443+
389444# JK validation plot
390445RMSEP <- sqrt(mean(val.plsr.output $ PLSR_Residuals ^ 2 ))
391446pecr_RMSEP <- RMSEP / mean(val.plsr.output [,inVar ])* 100
@@ -413,9 +468,17 @@ dev.off();
413468# --------------------------------------------------------------------------------------------------#
414469
415470
471+ # --------------------------------------------------------------------------------------------------#
472+ # Figure S8. Independent validation results for the LMA PLSR model with associated jackknife
473+ # uncertainty estimate 95% prediction intervals for each estimate LMA value. The %RMSEP is the
474+ # model prediction performance standardized to the percentage of the response range, in this case
475+ # the range of LMA values
476+ # --------------------------------------------------------------------------------------------------#
477+
478+
416479# ---------------- Output jackknife results --------------------------------------------------------#
417- # ## Step 15.
418- # JK Coefficents
480+ # ## Step 15. Outputs the final PLSR model jackknife coefficients
481+ # JK Coefficients
419482out.jk.coefs <- data.frame (Iteration = seq(1 ,length(Jackknife_intercept ),1 ),
420483 Intercept = Jackknife_intercept ,t(Jackknife_coef ))
421484head(out.jk.coefs )[1 : 6 ]
@@ -425,7 +488,7 @@ write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,'_Jackkife_PLSR_Coeffi
425488
426489
427490# ---------------- Export Model Output -------------------------------------------------------------#
428- # ## Step 16.
491+ # ## Step 16. Create and write all relevant PLSR model output to disk in .csv format
429492print(paste(" Output directory: " , getwd()))
430493
431494# Observed versus predicted
0 commit comments