Skip to content

Commit

Permalink
updated all necessary
Browse files Browse the repository at this point in the history
  • Loading branch information
stineb committed Mar 28, 2024
1 parent 1ac7065 commit bad9741
Show file tree
Hide file tree
Showing 13 changed files with 1,807 additions and 272 deletions.
55 changes: 41 additions & 14 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,16 +1,43 @@
Package: RTemplate
Title: Your project
Version: 0.1
Authors@R:
person(
given = "John",
family = "Doe",
email = "[email protected]",
role = c("aut", "cre"),
comment = c(ORCID = "0000-0000-0000-0000")
)
Description: What the package does (one paragraph).
Package: rgeco
Version: 0.9
Title: GECO group tools
Authors@R: c(
person(
family = "Hufkens",
given = "Koen",
email = "[email protected]",
comment = c(ORCID = "0000-0002-5070-8109"),
role = c("aut", "cre")),
person(
family = "Benjamin",
given = "Stocker",
email = "[email protected]",
comment = c(ORCID = "0000-0003-2697-9096"),
role = c("aut")),
person(given = "Geocomputation and Earth Observation, University of Bern",
role = c("cph", "fnd"))
)
Description: A collection of diverse functions to serve data wrangling and visualisations for research by the research group for Geocomputation and Earth Observation (GECO).
URL: https://github.com/geco-bern/GECOr
BugReports: https://github.com/geco-bern/GECOr/issues
Depends:
R (>= 4.2)
Imports:
terra,
dplyr,
tidyr,
raster,
fs,
cowplot,
scico,
ncmeta
Suggests:
knitr,
rmarkdown,
covr,
testthat
License: AGPL-3
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
LazyData: true
ByteCompile: true
RoxygenNote: 7.2.3
VignetteBuilder: knitr
5 changes: 5 additions & 0 deletions R-proj-template.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,8 @@ LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
216 changes: 216 additions & 0 deletions R/collect_drivers_rsofun.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
#' Collect all drivers
#'
#' Collect all drivers for site-level simulations
#' into a nested data frame with one row for each site.
#'
#' @param site_info A data frame containing site meta info (rows for sites).
#' Required columns are: \code{"sitename", "year_start",
#' "year_end", "lon", "lat", "elv"}. See \code{\link{prepare_setup_sofun}} for
#' details.
#' @param params_siml A nested data frame with rows for each site containing
#' simulation parameters for SOFUN. See \code{\link{run_pmodel_f_bysite}} or
#' \code{\link{run_biomee_f_bysite}}.
#' @param meteo A nested data frame with rows for each site and meteorological
#' forcing data time series nested inside a column named \code{"data"}.
#' @param fapar A nested data frame with rows for each site and fAPAR
#' forcing data time series nested inside a column named \code{"data"}.
#' @param co2 A nested data frame with rows for each site and CO2
#' forcing data time series nested inside a column named \code{"data"}.
#' @param params_soil Soil texture data descriptor, a data frame with columns
#' \code{"layer", "fsand", "fclay", "forg" } and \code{"fgravel"}.
#'
#' @return A \code{rsofun} input data frame (see \link{p_model_drivers} for a detailed
#' description of its structure and contents).
#' @export

collect_drivers_sofun <- function(
site_info,
params_siml,
meteo,
fapar,
co2,
params_soil
){

# declare variable bindings for CRAN compliance
# these are internal variables created in mutate
# statements mostly which need to be defined or
# they will raise a NOTE in CRAN checks
data <- sitename <- df_count <- n_not_missing <-
ppfd <- rain <- rain_doy <- ppfd_doy <-
snow <- snow_doy <- prec <- prec_doy <-
temp <- temp_doy <- patm <- patm_doy <-
vpd <- vpd_doy <- ccov <- ccov_doy <-
fapar_doy <- co2_doy <- tmin <- tmin_doy <-
tmax <- tmax_doy <- doy <- forcing <- . <- NULL

# complement the setup settings
site_info <- prepare_setup_sofun(
site_info = site_info,
params_siml = params_siml)

# check if all required variables are available
if (!("snow" %in% names(meteo$data[[1]]))) {
warning("Variable 'snow' missing in meteo data frame.
Assuming zero for all dates. \n")
meteo <- meteo %>% mutate(data = purrr::map(data,
~dplyr::mutate(., snow = 0)))
}

if (!("rain" %in% names(meteo$data[[1]]))) {
warning("Variable 'rain' missing in meteo data frame.
Assuming equal to 'prec' for all dates. \n")
meteo <- meteo %>%
dplyr::mutate(data = purrr::map(data,
~dplyr::mutate(., rain = prec)))
}

if (!("tmin" %in% names(meteo$data[[1]]))) {
warning("Variable 'tmin' missing in meteo data frame.
Assuming equal to 'temp' for all dates.
(same goes for tmax as assumed paired)\n")
meteo <- meteo %>%
dplyr::mutate(data = purrr::map(data,
~dplyr::mutate(., tmin = temp)),
data = purrr::map(data,
~dplyr::mutate(., tmax = temp)))
}

vars_req <- c("ppfd", "rain", "snow", "prec",
"temp", "patm", "vpd", "ccov", "tmin", "tmax")

vars_missing <- vars_req[
!(vars_req %in% names(meteo %>% tidyr::unnest(data)))
]

if (length(vars_missing)) {
stop(paste("Aborting. Variables missing in meteo data frame:",
paste(vars_missing, collapse = ", ")))
}

# create mega-df containing all forcing data and parameters that
# vary by site (not model parameters!)
names_metainfo <- names(site_info)[-which(names(site_info) %in%
c("sitename", "params_siml"))]
df_mega <- site_info %>%
tidyr::nest(site_info = names_metainfo) %>%
dplyr::left_join(
meteo %>%
dplyr::rename(meteo = data),
by = "sitename"
) %>%
dplyr::left_join(
fapar %>%
dplyr::rename(fapar = data),
by = "sitename"
) %>%
dplyr::left_join(
co2 %>%
dplyr::rename(co2 = data),
by = "sitename"
) %>%
dplyr::mutate(
params_soil = purrr::map(as.list(seq(nrow(.))),
~return(params_soil)))

# use only interpolated fapar and combine meteo data and fapar
# into a single nested column 'forcing'
df_mega <- df_mega %>%
dplyr::mutate(fapar = purrr::map(fapar, ~dplyr::select(., date, fapar))) %>%
dplyr::mutate(co2 = purrr::map(co2 , ~dplyr::select(., date, co2))) %>%
dplyr::mutate(forcing = purrr::map2(meteo, fapar, ~dplyr::left_join( .x, .y, by = "date"))) %>%
dplyr::mutate(forcing = purrr::map2(forcing, co2, ~dplyr::left_join( .x, .y, by = "date"))) %>%
dplyr::select(-meteo, -fapar, -co2)

# drop sites for which forcing data is missing for all dates
count_notna <- function(df) {
df %>%
dplyr::ungroup() %>%
dplyr::summarise(dplyr::across(
c("ppfd", "rain", "snow", "prec", "temp",
"patm", "vpd", "ccov", "fapar", "co2",
"tmin","tmax"), ~sum(!is.na(.)))) %>%
tidyr::pivot_longer(cols = 1:12, names_to = "var",
values_to = "n_not_missing")
}

df_missing <- df_mega %>%
dplyr::mutate(df_count = purrr::map(forcing, ~count_notna(.))) %>%
dplyr::select(sitename, df_count) %>%
tidyr::unnest(df_count) %>%
dplyr::filter(n_not_missing < 365)

if (nrow(df_missing) > 0) {
warning("Missing values found in forcing data frame:")
print(df_missing)
warning("Respective sites are dropped from all drivers data frame.")
df_mega <- df_mega %>%
dplyr::filter(!(sitename %in% pull(df_missing, sitename)))
}

## interpolate to fill gaps in forcing time series
myapprox <- function(vec){
if(all(is.na(vec))){
return(vec)
} else {
stats::approx(vec, xout = 1:length(vec))$y
}
}

fill_na_forcing <- function(df) {

# dummy variable for CRAN compliance
ppdf_doy <- NULL

vars <- names(df)[-which(names(df) == "date")]
df <- df %>%
dplyr::mutate_at(vars, myapprox)

## fill remaining gaps with mean seasonal cycle
add_doy <- function(string){paste0(string, "_doy")}

df_meandoy <- df %>%
dplyr::mutate(doy = lubridate::yday(date)) %>%
dplyr::group_by(doy) %>%
dplyr::summarise(
dplyr::across(
tidyselect::vars_select_helpers$where(is.double),
~mean(.x, na.rm = TRUE)
)
) %>%
dplyr::rename_with(.fn = add_doy, .cols = dplyr::one_of(
"ppfd", "rain", "snow", "prec", "temp", "patm",
"vpd", "ccov", "fapar", "co2", "tmin", "tmax")) %>%
dplyr::select(
doy,
dplyr::one_of(
"ppfd_doy", "rain_doy", "snow_doy", "prec_doy", "temp_doy",
"patm_doy", "vpd_doy", "ccov_doy", "fapar_doy", "co2_doy",
"tmin_doy", "tmax_doy"))

df <- df %>%
dplyr::mutate(doy = lubridate::yday(date)) %>%
dplyr::left_join(df_meandoy, by = "doy") %>%
dplyr::mutate(ppfd = ifelse(is.na(ppfd), ppfd_doy, ppfd),
rain = ifelse(is.na(rain), rain_doy, rain),
snow = ifelse(is.na(snow), snow_doy, snow),
prec = ifelse(is.na(prec), prec_doy, prec),
temp = ifelse(is.na(temp), temp_doy, temp),
patm = ifelse(is.na(patm), patm_doy, patm),
vpd = ifelse(is.na(vpd), vpd_doy, vpd),
ccov = ifelse(is.na(ccov), ccov_doy, ccov),
fapar = ifelse(is.na(fapar), fapar_doy, fapar),
co2 = ifelse(is.na(co2), co2_doy, co2),
tmin = ifelse(is.na(tmin), tmin_doy, tmin),
tmax = ifelse(is.na(tmax), tmax_doy, tmax)) %>%
dplyr::select(-ends_with("_doy"))

return(df)
}

df_mega <- df_mega %>%
mutate(forcing = purrr::map(forcing, ~fill_na_forcing(.))) %>%
dplyr::select(sitename, forcing, params_siml, site_info, params_soil)

return(df_mega)
}
42 changes: 42 additions & 0 deletions R/extract_nc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#' Extracts point data from a raster file
#'
#' Extracts point data from a raster file (e.g. a NetCDF file)
#'
#' @param df A data frame containing columns \code{lon} and \code{lat}
#' specifying longitude and latitude values of points for which data is
#' to be extracted (points organised by rows).
#' @param filn A character string specifying the path to a raster file that can be read with
#' \code{raster::brick} or \code{raster::raster}.
#' @return A data frame of the same number of rows as \code{df} with extracted data
#' nested in column \code{data}.
#' @export
#'
extract_nc <- function(df, filn, get_time = FALSE){

rasta <- raster::brick(filn)

df <- raster::extract(
rasta,
sp::SpatialPoints(dplyr::select(df, lon, lat)), # , proj4string = rasta@crs
sp = TRUE
) %>%
as_tibble() %>%
tidyr::nest(data = c(-lon, -lat)) %>%
right_join(df, by = c("lon", "lat")) %>%
mutate( data = purrr::map(data, ~dplyr::slice(., 1)) ) %>%
dplyr::mutate(data = purrr::map(data, ~t(.))) %>%
dplyr::mutate(data = purrr::map(data, ~as_tibble(.)))

## xxx todo: use argument df = TRUE in the extract() function call in order to
## return a data frame directly (and not having to rearrange the data afterwards)
## xxx todo: implement the GWR method for interpolating using elevation as a
## covariate here.

if (get_time){
timevals <- raster::getZ(rasta)
df <- df %>%
mutate( data = purrr::map(data, ~bind_cols(., tibble(date = timevals))))
}

return(df)
}
Loading

0 comments on commit bad9741

Please sign in to comment.