Skip to content

Commit 005a08f

Browse files
author
Shawn P. Serbin
authored
Merge pull request #76 from TESTgroup-BNL/updating_manuscript_example
Adding figure captions
2 parents 1e2216a + a6fc608 commit 005a08f

23 files changed

Lines changed: 1462 additions & 80 deletions

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ Rpubs LeafN bootstrap example output: https://rpubs.com/sserbin/721908
8888
EcoSIS URL: https://ecosis.org/package/leaf-spectra--structural-and-biochemical-leaf-traits-of-eight-crop-species <br>
8989
EcoSIS ID: 25770ad9-d47c-428b-bf99-d1543a4b0ec9 <br>
9090
DOI: https://doi.org/doi:10.21232/C2GM2Z <br>
91+
Rpubs LeafN bootstrap example output: https://rpubs.com/sserbin/736689 <br>
9192

9293
5) Canopy spectra to map foliar functional traits over NEON domains in eastern United States <br>
9394
Target variable: leaf nitrogen <br>

inst/scripts/spectra-trait_reseco_lma_plsr_example.R

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,9 @@ plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in%
121121
method <- "dplyr" #base/dplyr
122122
# base R - a bit slow
123123
# dplyr - much faster
124-
split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, split_seed=7529075,
125-
prop=0.8, group_variables="Species_Code")
124+
split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method,
125+
split_seed=7529075,prop=0.8,
126+
group_variables="Species_Code")
126127
names(split_data)
127128
cal.plsr.data <- split_data$cal_data
128129
head(cal.plsr.data)[1:8]
@@ -138,11 +139,13 @@ text_loc <- c(max(hist(cal.plsr.data[,paste0(inVar)])$counts),
138139
max(hist(cal.plsr.data[,paste0(inVar)])$mids))
139140
cal_hist_plot <- qplot(cal.plsr.data[,paste0(inVar)],geom="histogram",
140141
main = paste0("Calibration Histogram for ",inVar),
141-
xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),alpha=I(.7)) +
142+
xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
143+
alpha=I(.7)) +
142144
annotate("text", x=text_loc[2], y=text_loc[1], label= "1.",size=10)
143145
val_hist_plot <- qplot(val.plsr.data[,paste0(inVar)],geom="histogram",
144146
main = paste0("Validation Histogram for ",inVar),
145-
xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),alpha=I(.7))
147+
xlab = paste0(inVar),ylab = "Count",fill=I("grey50"),col=I("black"),
148+
alpha=I(.7))
146149
histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2)
147150
ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), plot = histograms,
148151
device="png", width = 30, height = 12, units = "cm", dpi = 300)
@@ -152,6 +155,14 @@ write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.cs
152155
#--------------------------------------------------------------------------------------------------#
153156

154157

158+
#--------------------------------------------------------------------------------------------------#
159+
# Figure S1. The resulting leaf mass area (LMA, g/m2) distribution (histogram) for the
160+
# calibration (i.e. model training) and validation datasets. The data was split using the
161+
# spectratrait::create_data_split() function using "Species_Code" as the group_variable and
162+
# using a data split proportion per group of 80% to calibration and 20% to validation
163+
#--------------------------------------------------------------------------------------------------#
164+
165+
155166
#--------------------------------------------------------------------------------------------------#
156167
### Step 8.
157168
### Format PLSR data for model fitting
@@ -181,6 +192,13 @@ par(mfrow=c(1,1))
181192
#--------------------------------------------------------------------------------------------------#
182193

183194

195+
#--------------------------------------------------------------------------------------------------#
196+
# Figure S2. The resulting calibration and validation spectral reflectance distribution by
197+
# wavelength. The spectra split was done at the same time as LMA, as described in
198+
# Supplemental Figure S1.
199+
#--------------------------------------------------------------------------------------------------#
200+
201+
184202
#--------------------------------------------------------------------------------------------------#
185203
### Step 10.
186204
### Use permutation to determine the optimal number of components
@@ -206,13 +224,26 @@ if (method=="pls") {
206224
maxComps=maxComps, iterations=iterations,
207225
seg=seg, prop=prop, random_seed=random_seed)
208226
}
209-
print("*** Figure 3. Optimal PLSR component selection ***")
227+
print("*** Figure S3. Optimal PLSR component selection ***")
210228
dev.copy(png,file.path(outdir,paste0(paste0("Figure_3_",inVar,"_PLSR_Component_Selection.png"))),
211229
height=2800, width=3400, res=340)
212230
dev.off();
213231
#--------------------------------------------------------------------------------------------------#
214232

215233

234+
#--------------------------------------------------------------------------------------------------#
235+
# Figure S3. A key challenge in building robust and parsimonious PLSR models is determining the
236+
# optimal number of PLSR components. A good definition is the minimum number of components that
237+
# minimizes the PRESS statistic and where the next higher component doesn't produce a meaningful
238+
# increase in model performance (i.e. lower PRESS). We provide three methods in the
239+
# find_optimal_components() function to determine the optimal number of components statistically
240+
# using the internal pls package jackknife method or our custom methods that are better in some
241+
# conditions, including for large datasets. In this example we show "firstMin" option that
242+
# selects the number of components corresponding to the first statistical minimum PRESS value
243+
# (vertical broken blue line).
244+
#--------------------------------------------------------------------------------------------------#
245+
246+
216247
#--------------------------------------------------------------------------------------------------#
217248
### Step 11.
218249
### Fit final model - using leave-one-out cross validation
@@ -242,6 +273,12 @@ par(opar)
242273
#--------------------------------------------------------------------------------------------------#
243274

244275

276+
#--------------------------------------------------------------------------------------------------#
277+
# Figure S4. A plot of the validation root mean square error of prediction (RMSEP, left) and
278+
# coefficient of determination (right) for the 0 to optimal number of components
279+
#--------------------------------------------------------------------------------------------------#
280+
281+
245282
#--------------------------------------------------------------------------------------------------#
246283
### Step 12.
247284
### PLSR fit observed vs. predicted plot data
@@ -319,6 +356,14 @@ ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")),
319356
#--------------------------------------------------------------------------------------------------#
320357

321358

359+
#--------------------------------------------------------------------------------------------------#
360+
# Figure S5. The calibration model and independent validation scatter plot results for the example
361+
# LMA PLSR model (top row). Also shown are the calibration model and validation PLSR
362+
# residuals, where the calibration results are based on the internal model cross-validation
363+
# and the validation residuals are the predicted minus observed values of LMA.
364+
#--------------------------------------------------------------------------------------------------#
365+
366+
322367
#--------------------------------------------------------------------------------------------------#
323368
### Step 13.
324369
### Generate Coefficient and VIP plots
@@ -341,6 +386,12 @@ par(opar)
341386
#--------------------------------------------------------------------------------------------------#
342387

343388

389+
#--------------------------------------------------------------------------------------------------#
390+
# Figure S6. The calibration model PLSR regression coefficient (top) and variable importance of
391+
# projection (bottom) plots
392+
#--------------------------------------------------------------------------------------------------#
393+
394+
344395
#--------------------------------------------------------------------------------------------------#
345396
### Step 14.
346397
### Permutation analysis to derive uncertainty estimates
@@ -386,6 +437,10 @@ dev.copy(png,file.path(outdir,paste0(inVar,'_Jackknife_Regression_Coefficients.p
386437
height=2100, width=3800, res=340)
387438
dev.off();
388439

440+
#--------------------------------------------------------------------------------------------------#
441+
# Figure S7. The calibration model jackknife PLSR regression coefficients
442+
#--------------------------------------------------------------------------------------------------#
443+
389444
# JK validation plot
390445
RMSEP <- sqrt(mean(val.plsr.output$PLSR_Residuals^2))
391446
pecr_RMSEP <- RMSEP/mean(val.plsr.output[,inVar])*100
@@ -413,9 +468,17 @@ dev.off();
413468
#--------------------------------------------------------------------------------------------------#
414469

415470

471+
#--------------------------------------------------------------------------------------------------#
472+
# Figure S8. Independent validation results for the LMA PLSR model with associated jackknife
473+
# uncertainty estimate 95% prediction intervals for each estimate LMA value. The %RMSEP is the
474+
# model prediction performance standardized to the percentage of the response range, in this case
475+
# the range of LMA values
476+
#--------------------------------------------------------------------------------------------------#
477+
478+
416479
#---------------- Output jackknife results --------------------------------------------------------#
417-
### Step 15.
418-
# JK Coefficents
480+
### Step 15. Outputs the final PLSR model jackknife coefficients
481+
# JK Coefficients
419482
out.jk.coefs <- data.frame(Iteration=seq(1,length(Jackknife_intercept),1),
420483
Intercept=Jackknife_intercept,t(Jackknife_coef))
421484
head(out.jk.coefs)[1:6]
@@ -425,7 +488,7 @@ write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,'_Jackkife_PLSR_Coeffi
425488

426489

427490
#---------------- Export Model Output -------------------------------------------------------------#
428-
### Step 16.
491+
### Step 16. Create and write all relevant PLSR model output to disk in .csv format
429492
print(paste("Output directory: ", getwd()))
430493

431494
# Observed versus predicted

0 commit comments

Comments
 (0)