Skip to content

Commit d8a95c7

Browse files
authored
Merge pull request #313 from cmu-delphi/ndefries/epix-slide-pass-reftimevalue-without-env
Pass `ref_time_value` to `epix_slide` for functions and formulas
2 parents 5436aa3 + 91d270a commit d8a95c7

10 files changed

+273
-51
lines changed

NAMESPACE

+12
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,22 @@ importFrom(rlang,"!!")
9292
importFrom(rlang,.data)
9393
importFrom(rlang,.env)
9494
importFrom(rlang,arg_match)
95+
importFrom(rlang,caller_arg)
96+
importFrom(rlang,caller_env)
97+
importFrom(rlang,check_dots_empty0)
9598
importFrom(rlang,enquo)
9699
importFrom(rlang,enquos)
100+
importFrom(rlang,f_env)
101+
importFrom(rlang,f_rhs)
102+
importFrom(rlang,global_env)
103+
importFrom(rlang,is_environment)
104+
importFrom(rlang,is_formula)
105+
importFrom(rlang,is_function)
97106
importFrom(rlang,is_missing)
98107
importFrom(rlang,is_quosure)
108+
importFrom(rlang,is_string)
109+
importFrom(rlang,missing_arg)
110+
importFrom(rlang,new_function)
99111
importFrom(rlang,quo_is_missing)
100112
importFrom(rlang,sym)
101113
importFrom(rlang,syms)

NEWS.md

+15
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,16 @@ inter-release development versions will include an additional ".9999" suffix.
66

77
## Breaking changes:
88

9+
* Changes to `epix_slide`:
10+
* The `f` computation is now required to take at least three arguments. `f`
11+
must take an `epi_df` with the same column names as the archive's `DT`,
12+
minus the `version` column; followed by a one-row tibble containing the
13+
values of the grouping variables for the associated group; followed by a
14+
reference time value, usually as a `Date` object; followed by any number
15+
of named arguments.
16+
17+
## New features:
18+
919
* `epix_slide` has been made more like `dplyr::group_modify`. It will no longer
1020
perform element/row recycling for size stability, accepts slide computation
1121
outputs containing any number of rows, and no longer supports `all_rows`.
@@ -19,6 +29,11 @@ inter-release development versions will include an additional ".9999" suffix.
1929
more closely whether/when/how to output an `epi_df`.
2030
* To keep the old behavior, convert the output of `epix_slide()` to `epi_df`
2131
when desired and set the metadata appropriately.
32+
* `epix_slide` `f` computations passed as functions or formulas now have
33+
access to the reference time value. If `f` is a function, it is passed a
34+
Date containing the reference time value as the third argument. If a
35+
formula, `f` can access the reference time value via `.z` or
36+
`.ref_time_value`.
2237

2338
## Improvements:
2439

R/grouped_epi_archive.R

+3-3
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ grouped_epi_archive =
232232

233233
# Check that `f` takes enough args
234234
if (!missing(f) && is.function(f)) {
235-
assert_sufficient_f_args(f, ...)
235+
assert_sufficient_f_args(f, ..., n_mandatory_f_args = 3L)
236236
}
237237

238238
# Validate and pre-process `before`:
@@ -272,7 +272,7 @@ grouped_epi_archive =
272272
ref_time_value,
273273
new_col) {
274274
# Carry out the specified computation
275-
comp_value = f(.data_group, .group_key, ...)
275+
comp_value = f(.data_group, .group_key, ref_time_value, ...)
276276

277277
if (all_versions) {
278278
# Extract data from archive so we can do length checks below. When
@@ -298,7 +298,7 @@ grouped_epi_archive =
298298

299299
# If f is not missing, then just go ahead, slide by group
300300
if (!missing(f)) {
301-
if (rlang::is_formula(f)) f = rlang::as_function(f)
301+
if (rlang::is_formula(f)) f = as_slide_computation(f)
302302
x = purrr::map_dfr(ref_time_values, function(ref_time_value) {
303303
# Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`,
304304
# `epi_archive` if `all_versions` is `TRUE`:

R/methods-epi_archive.R

+13-10
Original file line numberDiff line numberDiff line change
@@ -665,14 +665,17 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
665665
#' sliding (a.k.a. "rolling") time window for each data group. The window is
666666
#' determined by the `before` parameter described below. One time step is
667667
#' typically one day or one week; see [`epi_slide`] details for more
668-
#' explanation. If a function, `f` must take `x`, an `epi_df` with the same
669-
#' column names as the archive's `DT`, minus the `version` column; followed by
670-
#' `g`, a one-row tibble containing the values of the grouping variables for
671-
#' the associated group; followed by any number of named arguments. If a
672-
#' formula, `f` can operate directly on columns accessed via `.x$var`, as in
673-
#' `~ mean(.x$var)` to compute a mean of a column `var` for each
674-
#' `ref_time_value`-group combination. If `f` is missing, then `...` will
675-
#' specify the computation.
668+
#' explanation. If a function, `f` must take an `epi_df` with the same
669+
#' column names as the archive's `DT`, minus the `version` column; followed
670+
#' by a one-row tibble containing the values of the grouping variables for
671+
#' the associated group; followed by a reference time value, usually as a
672+
#' `Date` object; followed by any number of named arguments. If a formula,
673+
#' `f` can operate directly on columns accessed via `.x$var` or `.$var`, as
674+
#' in `~ mean (.x$var)` to compute a mean of a column `var` for each
675+
#' group-`ref_time_value` combination. The group key can be accessed via
676+
#' `.y` or `.group_key`, and the reference time value can be accessed via
677+
#' `.z` or `.ref_time_value`. If `f` is missing, then `...` will specify the
678+
#' computation.
676679
#' @param ... Additional arguments to pass to the function or formula specified
677680
#' via `f`. Alternatively, if `f` is missing, then `...` is interpreted as an
678681
#' expression for tidy evaluation. See details of [`epi_slide`].
@@ -827,7 +830,7 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
827830
#' archive_cases_dv_subset %>%
828831
#' group_by(geo_value) %>%
829832
#' epix_slide(
830-
#' function(x, g) {
833+
#' function(x, gk, rtv) {
831834
#' tibble(
832835
#' time_range = if(nrow(x) == 0L) {
833836
#' "0 `time_value`s"
@@ -855,7 +858,7 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
855858
#' archive_cases_dv_subset %>%
856859
#' group_by(geo_value) %>%
857860
#' epix_slide(
858-
#' function(x, g) {
861+
#' function(x, gk, rtv) {
859862
#' tibble(
860863
#' versions_start = if (nrow(x$DT) == 0L) {
861864
#' "NA (0 rows)"

R/slide.R

+6-6
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@
1212
#' sliding (a.k.a. "rolling") time window for each data group. The window is
1313
#' determined by the `before` and `after` parameters described below. One time
1414
#' step is typically one day or one week; see details for more explanation. If
15-
#' a function, `f` must take `x`, a data frame with the same column names as
15+
#' a function, `f` must take a data frame with the same column names as
1616
#' the original object, minus any grouping variables, containing the time
17-
#' window data for one `ref_time_value`-group combination; followed by `g`, a
17+
#' window data for one group-`ref_time_value` combination; followed by a
1818
#' one-row tibble containing the values of the grouping variables for the
1919
#' associated group; followed by any number of named arguments. If a formula,
20-
#' `f` can operate directly on columns accessed via `.x$var`, as in `~
21-
#' mean(.x$var)` to compute a mean of a column `var` for each
22-
#' `ref_time_value`-group combination. If `f` is missing, then `...` will
23-
#' specify the computation.
20+
#' `f` can operate directly on columns accessed via `.x$var` or `.$var`, as
21+
#' in `~mean(.x$var)` to compute a mean of a column `var` for each
22+
#' `ref_time_value`-group combination. The group key can be accessed via `.y`.
23+
#' If `f` is missing, then `...` will specify the computation.
2424
#' @param ... Additional arguments to pass to the function or formula specified
2525
#' via `f`. Alternatively, if `f` is missing, then the `...` is interpreted as
2626
#' an expression for tidy evaluation. See details.

R/utils.R

+108-3
Original file line numberDiff line numberDiff line change
@@ -103,17 +103,19 @@ Warn = function(msg, ...) rlang::warn(break_str(msg, init = "Warning: "), ...)
103103
#' Assert that a sliding computation function takes enough args
104104
#'
105105
#' @param f Function; specifies a computation to slide over an `epi_df` or
106-
#' `epi_archive` in `epi_slide` or `epix_slide`.
106+
#' `epi_archive` in `epi_slide` or `epix_slide`.
107107
#' @param ... Dots that will be forwarded to `f` from the dots of `epi_slide` or
108108
#' `epix_slide`.
109+
#' @param n_mandatory_f_args Integer; specifies the number of arguments `f`
110+
#' is required to take before any `...` arg. Defaults to 2.
109111
#'
110112
#' @importFrom rlang is_missing
111113
#' @importFrom purrr map_lgl
112114
#' @importFrom utils tail
113115
#'
114116
#' @noRd
115-
assert_sufficient_f_args <- function(f, ...) {
116-
mandatory_f_args_labels <- c("window data", "group key")
117+
assert_sufficient_f_args <- function(f, ..., n_mandatory_f_args = 2L) {
118+
mandatory_f_args_labels <- c("window data", "group key", "reference time value")[seq(n_mandatory_f_args)]
117119
n_mandatory_f_args <- length(mandatory_f_args_labels)
118120
args = formals(args(f))
119121
args_names = names(args)
@@ -181,6 +183,109 @@ assert_sufficient_f_args <- function(f, ...) {
181183
}
182184
}
183185

186+
#' Convert to function
187+
#'
188+
#' @description
189+
#' `as_slide_computation()` transforms a one-sided formula into a function.
190+
#' This powers the lambda syntax in packages like purrr.
191+
#'
192+
#' This code and documentation borrows heavily from [`rlang::as_function`]
193+
#' (https://github.com/r-lib/rlang/blob/c55f6027928d3104ed449e591e8a225fcaf55e13/R/fn.R#L343-L427).
194+
#'
195+
#' This code extends `rlang::as_function` to create functions that take three
196+
#' arguments. The arguments can be accessed via the idiomatic `.x`, `.y`,
197+
#' etc, positional references (`..1`, `..2`, etc), and also by `epi
198+
#' [x]_slide`-specific names.
199+
#'
200+
#' @source https://github.com/r-lib/rlang/blob/c55f6027928d3104ed449e591e8a225fcaf55e13/R/fn.R#L343-L427
201+
#'
202+
#' @param x A function or formula.
203+
#'
204+
#' If a **function**, it is used as is.
205+
#'
206+
#' If a **formula**, e.g. `~ mean(.x$cases)`, it is converted to a function with up
207+
#' to three arguments: `.x` (single argument), or `.x` and `.y`
208+
#' (two arguments), or `.x`, `.y`, and `.z` (three arguments). The `.`
209+
#' placeholder can be used instead of `.x`, `.group_key` can be used in
210+
#' place of `.y`, and `.ref_time_value` can be used in place of `.z`. This
211+
#' allows you to create very compact anonymous functions (lambdas) with up
212+
#' to three inputs. Functions created from formulas have a special class. Use
213+
#' `rlang::is_lambda()` to test for it.
214+
#'
215+
#' If a **string**, the function is looked up in `env`. Note that
216+
#' this interface is strictly for user convenience because of the
217+
#' scoping issues involved. Package developers should avoid
218+
#' supplying functions by name and instead supply them by value.
219+
#'
220+
#' @param env Environment in which to fetch the function in case `x`
221+
#' is a string.
222+
#' @inheritParams rlang::args_dots_empty
223+
#' @inheritParams rlang::args_error_context
224+
#' @examples
225+
#' f <- as_slide_computation(~ .x + 1)
226+
#' f(10)
227+
#'
228+
#' g <- as_slide_computation(~ -1 * .)
229+
#' g(4)
230+
#'
231+
#' h <- as_slide_computation(~ .x - .group_key)
232+
#' h(6, 3)
233+
#'
234+
#' @importFrom rlang check_dots_empty0 is_function new_function f_env
235+
#' is_environment missing_arg f_rhs is_string is_formula caller_arg
236+
#' caller_env global_env
237+
#'
238+
#' @noRd
239+
as_slide_computation <- function(x,
240+
env = global_env(),
241+
...,
242+
arg = caller_arg(x),
243+
call = caller_env()) {
244+
check_dots_empty0(...)
245+
246+
if (is_function(x)) {
247+
return(x)
248+
}
249+
250+
if (is_formula(x)) {
251+
if (length(x) > 2) {
252+
Abort(sprintf("%s must be a one-sided formula", arg),
253+
class = "epiprocess__as_slide_computation__formula_is_twosided",
254+
epiprocess__x = x,
255+
call = call)
256+
}
257+
258+
env <- f_env(x)
259+
if (!is_environment(env)) {
260+
Abort("Formula must carry an environment.",
261+
class = "epiprocess__as_slide_computation__formula_has_no_env",
262+
epiprocess__x = x,
263+
epiprocess__x_env = env,
264+
arg = arg, call = call)
265+
}
266+
267+
args <- list(
268+
... = missing_arg(),
269+
.x = quote(..1), .y = quote(..2), .z = quote(..3),
270+
. = quote(..1), .group_key = quote(..2), .ref_time_value = quote(..3)
271+
)
272+
fn <- new_function(args, f_rhs(x), env)
273+
fn <- structure(fn, class = c("epiprocess_slide_computation", "function"))
274+
return(fn)
275+
}
276+
277+
if (is_string(x)) {
278+
return(get(x, envir = env, mode = "function"))
279+
}
280+
281+
Abort(sprintf("Can't convert a %s to a slide computation", class(x)),
282+
class = "epiprocess__as_slide_computation__cant_convert_catchall",
283+
epiprocess__x = x,
284+
epiprocess__x_class = class(x),
285+
arg = arg,
286+
call = call)
287+
}
288+
184289
##########
185290

186291
in_range = function(x, rng) pmin(pmax(x, rng[1]), rng[2])

man/epi_slide.Rd

+6-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/epix_slide.Rd

+13-10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)