zoonproject · ali-johnston · Jun 17, 2016
diff --git a/R/StandardiseCov.R b/R/StandardiseCov.R
@@ -0,0 +1,62 @@
+#' @name StandardiseCov
+#'
+#' @title Scale numeric covariates
+#'
+#' @description The module scales numeric covariates. By default conducts a regular standardisation on all numeric covariates: values minus the mean and divide by the standard deviation. If Gelman = TRUE then divides by 2 sd as suggested by Gelman. Excluded covariates are not standardised.
+#'
+#' @details 
+#'
+#' @param .data \strong{Internal parameter, do not use in the workflow function}. \code{.data} is a list of a data frame and a raster object returned from occurrence modules and covariate modules respectively. \code{.data} is passed automatically in workflow from the occurrence and covariate modules to the process module(s) and should not be passed by the user.
+#'
+#' @param Gelman If TRUE divides by two rather than one standard deviation.
+#'
+#' @param exclude Variable names of numeric covariates to exclude from standardisation
+#'
+#' @family process
+#'
+#' @author Alison Johnston & Carsten F. Dormann, \email{alison.johnston@@bto.org}
+#'
+#' @section Data type: presence-only, presence/absence, abundance, proportion
+#'
+#' @section Version: 0
+#'
+#' @section Date submitted:  2016-06-16
+StandardiseCov <- function (.data, Gelman = FALSE, exclude = NULL) 
+{
+    df <- .data$df
+    ras <- .data$ras
+
+    #### step 1. error checking
+    if(!is.logical(Gelman)) stop("StandardiseCov: Gelman parameter can only be TRUE or FALSE")
+    if(!all(exclude %in% names(ras))) warning("StandardiseCov: Excluded terms are not all covariate names")
+
+    #### step 2. identify numeric covariates and exclude those specified
+    numericLayer <- NA
+    for (i in 1:nlayers(ras)) numericLayer[i] <- is.numeric(ras[i]) & 
+        !names(ras)[i] %in% exclude
+    if (sum(numericLayer) == 0) 
+        stop("StandardiseCov not used, as no numeric covariates")
+    numericNames <- names(ras)[which(as.logical(numericLayer))]
+
+    #### step 3. standardise all numeric covariates
+
+    # specify whether to divide by 1 or 2 standard deviations
+    # Gelman's suggestion to scale by 2 SDs (http://andrewgelman.com/2006/06/21/standardizing_r/)
+    sd_mult <- ifelse(Gelman==TRUE, 2, 1)
+
+    for (cov in numericNames) {
+        m <- cellStats(ras[[cov]], "mean", na.rm = TRUE)
+        s <- cellStats(ras[[cov]], "sd", na.rm = TRUE) * sd_mult
+        if (nlayers(ras) == 1) {
+            ras <- (ras - m)/s
+        }
+        else {
+            ras[[cov]] <- (ras[[cov]] - m)/s
+        }
+    }
+
+    #### step 4. put new values into df as well
+    layer <- extract(ras, df[, c("longitude", "latitude")])
+    df <- cbind(df[, 1:5], layer)
+    return(list(df = df, ras = ras))
+}