From 9bda33c6d72a1d74d580ebba5391992a58f6afa5 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 26 Apr 2023 12:41:13 -0400 Subject: [PATCH 1/3] pass ref_time_value to function f in epix_slide --- R/grouped_epi_archive.R | 6 +++--- R/methods-epi_archive.R | 11 ++++++----- R/utils.R | 5 +++-- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R index ac093e6f..ccac7c6d 100644 --- a/R/grouped_epi_archive.R +++ b/R/grouped_epi_archive.R @@ -222,7 +222,7 @@ grouped_epi_archive = # Check that `f` takes enough args if (!missing(f) && is.function(f)) { - check_sufficient_f_args(f) + check_sufficient_f_args(f, 3L) } # Validate and pre-process `before`: @@ -278,7 +278,7 @@ grouped_epi_archive = comp_effective_key_vars, new_col) { # Carry out the specified computation - comp_value = f(.data_group, .group_key, ...) + comp_value = f(.data_group, .group_key, ref_time_value, ...) if (all_versions) { # Extract data from archive so we can do length checks below. When @@ -294,7 +294,7 @@ grouped_epi_archive = # Note: this mirrors how `epi_slide` does things if we're using # unique keys, but can diverge if using nonunique keys. The # `epi_slide` approach of counting occurrences of the - # `ref_time_value` in the `time_value` column, which helps lines + # `ref_time_value` in the `time_value` column, which helps line # up the computation results with corresponding rows of the # input data, wouldn't quite apply here: we'd want to line up # with rows (from the same group) with `version` matching the diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 348c5c01..41ba585c 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -658,11 +658,12 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr #' determined by the `before` parameter described below. One time step is #' typically one day or one week; see [`epi_slide`] details for more #' explanation. If a function, `f` must take `x`, an `epi_df` with the same -#' column names as the archive's `DT`, minus the `version` column; followed by -#' `g`, a one-row tibble containing the values of the grouping variables for -#' the associated group; followed by any number of named arguments. If a -#' formula, `f` can operate directly on columns accessed via `.x$var`, as in -#' `~ mean(.x$var)` to compute a mean of a column `var` for each +#' column names as the archive's `DT`, minus the `version` column; followed +#' by `g`, a one-row tibble containing the values of the grouping variables +#' for the associated group; followed by `t`, a Date containing the +#' reference time value to use; followed by any number of named arguments. +#' If a formula, `f` can operate directly on columns accessed via `.x$var`, +#' as in `~ mean(.x$var)` to compute a mean of a column `var` for each #' `ref_time_value`-group combination. If `f` is missing, then `...` will #' specify the computation. #' @param ... Additional arguments to pass to the function or formula specified diff --git a/R/utils.R b/R/utils.R index c953b385..7316b7dc 100644 --- a/R/utils.R +++ b/R/utils.R @@ -104,10 +104,11 @@ Warn = function(msg, ...) rlang::warn(break_str(msg, init = "Warning: "), ...) #' #' @param f Function; specifies a computation to slide over an `epi_df` or #' `epi_archive` in `epi_slide` or `epix_slide`. +#' @param n_mandatory_f_args Integer; specifies the number of arguments `f` +#' is required to take before any `...` arg. Defaults to 2. #' #' @noRd -check_sufficient_f_args <- function(f) { - n_mandatory_f_args <- 2 +check_sufficient_f_args <- function(f, n_mandatory_f_args = 2L) { arg_names = names(formals(args(f))) if ("..." %in% arg_names) { # Keep all arg names before `...` From c6329b1e62197a008d7f0a42db38ecfd952a3be2 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 27 Apr 2023 15:40:34 -0400 Subject: [PATCH 2/3] bind ref_time_value to copy of execution env for f as formula or dots --- NAMESPACE | 1 + R/grouped_epi_archive.R | 8 ++++++-- man/epix_slide.Rd | 11 ++++++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 10847e6c..a44561cf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -89,6 +89,7 @@ importFrom(rlang,"!!") importFrom(rlang,.data) importFrom(rlang,.env) importFrom(rlang,arg_match) +importFrom(rlang,child_env) importFrom(rlang,enquo) importFrom(rlang,enquos) importFrom(rlang,is_quosure) diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R index ccac7c6d..aa456910 100644 --- a/R/grouped_epi_archive.R +++ b/R/grouped_epi_archive.R @@ -186,7 +186,7 @@ grouped_epi_archive = #' object. See the documentation for the wrapper function [`epix_slide()`] for #' details. #' @importFrom data.table key address -#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms +#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms child_env slide = function(f, ..., before, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", @@ -364,7 +364,9 @@ grouped_epi_archive = # If f is not missing, then just go ahead, slide by group if (!missing(f)) { if (rlang::is_formula(f)) f = rlang::as_function(f) + f_init_env <- environment(f) x = purrr::map_dfr(ref_time_values, function(ref_time_value) { + environment(f) <- child_env(.parent = f_init_env, ref_time_value = ref_time_value) # Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`, # `epi_archive` if `all_versions` is `TRUE`: as_of_raw = private$ungrouped$as_of(ref_time_value, min_time_value = ref_time_value - before, all_versions = all_versions) @@ -437,10 +439,12 @@ grouped_epi_archive = } quo = quos[[1]] - f = function(x, quo, ...) rlang::eval_tidy(quo, x) + quo_init_env <- environment(quo) new_col = sym(names(rlang::quos_auto_name(quos))) x = purrr::map_dfr(ref_time_values, function(ref_time_value) { + environment(quo) <- child_env(.parent = quo_init_env, ref_time_value = ref_time_value) + f = function(x, quo, ...) rlang::eval_tidy(quo, x) # Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`, # `epi_archive` if `all_versions` is `TRUE`: as_of_raw = private$ungrouped$as_of(ref_time_value, min_time_value = ref_time_value - before, all_versions = all_versions) diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index 54d16e86..f8d0e2da 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -28,11 +28,12 @@ sliding (a.k.a. "rolling") time window for each data group. The window is determined by the \code{before} parameter described below. One time step is typically one day or one week; see \code{\link{epi_slide}} details for more explanation. If a function, \code{f} must take \code{x}, an \code{epi_df} with the same -column names as the archive's \code{DT}, minus the \code{version} column; followed by -\code{g}, a one-row tibble containing the values of the grouping variables for -the associated group; followed by any number of named arguments. If a -formula, \code{f} can operate directly on columns accessed via \code{.x$var}, as in -\code{~ mean(.x$var)} to compute a mean of a column \code{var} for each +column names as the archive's \code{DT}, minus the \code{version} column; followed +by \code{g}, a one-row tibble containing the values of the grouping variables +for the associated group; followed by \code{t}, a Date containing the +reference time value to use; followed by any number of named arguments. +If a formula, \code{f} can operate directly on columns accessed via \code{.x$var}, +as in \code{~ mean(.x$var)} to compute a mean of a column \code{var} for each \code{ref_time_value}-group combination. If \code{f} is missing, then \code{...} will specify the computation.} From 51bc3f09212044ea2b0039506d2608ce0c3de0e1 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 27 Apr 2023 17:03:28 -0400 Subject: [PATCH 3/3] describe how to use ref_time_value in non-func computations --- R/methods-epi_archive.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 41ba585c..53ba8801 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -665,7 +665,8 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr #' If a formula, `f` can operate directly on columns accessed via `.x$var`, #' as in `~ mean(.x$var)` to compute a mean of a column `var` for each #' `ref_time_value`-group combination. If `f` is missing, then `...` will -#' specify the computation. +#' specify the computation. The reference time value can be accessed via +#' `ref_time_value` in a formula or in a computation defined in `...`. #' @param ... Additional arguments to pass to the function or formula specified #' via `f`. Alternatively, if `f` is missing, then `...` is interpreted as an #' expression for tidy evaluation. See details of [`epi_slide`].