Skip to content

Replace n with before (no after) in epix_slide #216

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Nov 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions R/archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ epi_archive =
#' details.
#' @importFrom data.table key
#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms
slide = function(f, ..., n, group_by, ref_time_values,
slide = function(f, ..., before, group_by, ref_time_values,
time_step, new_col_name = "slide_value",
as_list_col = FALSE, names_sep = "_",
all_rows = FALSE) {
Expand All @@ -597,10 +597,22 @@ epi_archive =
ref_time_values = ref_time_values[ref_time_values %in%
unique(self$DT$time_value)]
}


# Validate and pre-process `before`:
if (missing(before)) {
Abort("`before` is required (and must be passed by name);
if you did not want to apply a sliding window but rather
to map `as_of` and `f` across various `ref_time_values`,
pass a large `before` value (e.g., if time steps are days,
`before=365000`).")
}
before <- vctrs::vec_cast(before, integer())
if (length(before) != 1L || is.na(before) || before < 0L) {
Abort("`before` must be length-1, non-NA, non-negative")
}

# If a custom time step is specified, then redefine units
before_num = n-1
if (!missing(time_step)) before_num = time_step(n-1)
if (!missing(time_step)) before <- time_step(before)

# What to group by? If missing, set according to internal keys;
# otherwise, tidyselect.
Expand Down Expand Up @@ -673,12 +685,13 @@ epi_archive =
if (!missing(f)) {
if (rlang::is_formula(f)) f = rlang::as_function(f)

x = purrr::map_dfr(ref_time_values, function(t) {
self$as_of(t, min_time_value = t - before_num) %>%
x = purrr::map_dfr(ref_time_values, function(ref_time_value) {
self$as_of(ref_time_value,
min_time_value = ref_time_value - before) %>%
dplyr::group_by(!!!group_by) %>%
dplyr::group_modify(comp_one_grp,
f = f, ...,
time_value = t,
time_value = ref_time_value,
key_vars = key_vars,
new_col = new_col,
.keep = TRUE) %>%
Expand All @@ -700,12 +713,13 @@ epi_archive =
f = function(x, quo, ...) rlang::eval_tidy(quo, x)
new_col = sym(names(rlang::quos_auto_name(quos)))

x = purrr::map_dfr(ref_time_values, function(t) {
self$as_of(t, min_time_value = t - before_num) %>%
x = purrr::map_dfr(ref_time_values, function(ref_time_value) {
self$as_of(ref_time_value,
min_time_value = ref_time_value - before) %>%
dplyr::group_by(!!!group_by) %>%
dplyr::group_modify(comp_one_grp,
f = f, quo = quo,
time_value = t,
time_value = ref_time_value,
key_vars = key_vars,
new_col = new_col,
.keep = TRUE) %>%
Expand Down
41 changes: 28 additions & 13 deletions R/methods-epi_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -358,10 +358,21 @@ epix_merge = function(x, y,
#' @param ... Additional arguments to pass to the function or formula specified
#' via `f`. Alternatively, if `f` is missing, then the current argument is
#' interpreted as an expression for tidy evaluation.
#' @param n Number of time steps to use in the running window. For example, if
#' `n = 7`, and one time step is one day, then to produce a value on January 7
#' we apply the given function or formula to data in between January 1 and
#' 7.
#' @param before How far `before` each `ref_time_value` should the sliding
#' window extend? If provided, should be a single, non-NA,
#' [integer-compatible][vctrs::vec_cast] number of time steps. This window
#' endpoint is inclusive. For example, if `before = 7`, and one time step is
#' one day, then to produce a value for a `ref_time_value` of January 8, we
#' apply the given function or formula to data (for each group present) with
#' `time_value`s from January 1 onward, as they were reported on January 8.
#' For typical disease surveillance sources, this will not include any data
#' with a `time_value` of January 8, and, depending on the amount of reporting
#' latency, may not include January 7 or even earlier `time_value`s. (If
#' instead the archive were to hold nowcasts instead of regular surveillance
#' data, then we would indeed expect data for `time_value` January 8. If it
#' were to hold forecasts, then we would expect data for `time_value`s after
#' January 8, and the sliding window would extend as far after each
#' `ref_time_value` as needed to include all such `time_value`s.)
#' @param group_by The variable(s) to group by before slide computation. If
#' missing, then the keys in the underlying data table, excluding `time_value`
#' and `version`, will be used for grouping. To omit a grouping entirely, use
Expand Down Expand Up @@ -396,10 +407,14 @@ epix_merge = function(x, y,
#' values.
#'
#' @details Two key distinctions between inputs to the current function and
#' `epi_slide()`:
#' 1. `epix_slide()` uses windows that are **always right-aligned** (in
#' `epi_slide()`, custom alignments could be specified using the `align` or
#' `before` arguments).
#' [`epi_slide()`]:
#' 1. `epix_slide()` doesn't accept an `after` argument; its windows extend
#' from `before` time steps before a given `ref_time_value` through the last
#' `time_value` available as of version `ref_time_value` (typically, this
#' won't include `ref_time_value` itself, as observations about a particular
#' time interval (e.g., day) are only published after that time interval ends);
#' `epi_slide` windows extend from `before` time steps before a
#' `ref_time_value` through `after` time steps after `ref_time_value`.
#' 2. `epix_slide()` uses a `group_by` to specify the grouping upfront (in
#' `epi_slide()`, this would be accomplished by a preceding function call to
#' `dplyr::group_by()`).
Expand All @@ -422,11 +437,11 @@ epix_merge = function(x, y,
#' Finally, this is simply a wrapper around the `slide()` method of the
#' `epi_archive` class, so if `x` is an `epi_archive` object, then:
#' ```
#' epix_slide(x, new_var = comp(old_var), n = 120)
#' epix_slide(x, new_var = comp(old_var), before = 119)
#' ```
#' is equivalent to:
#' ```
#' x$slide(x, new_var = comp(old_var), n = 120)
#' x$slide(new_var = comp(old_var), before = 119)
#' ```
#'
#' @importFrom rlang enquo
Expand All @@ -444,15 +459,15 @@ epix_merge = function(x, y,
#' by = "1 day")
#' epix_slide(x = archive_cases_dv_subset,
#' f = ~ mean(.x$case_rate_7d_av),
#' n = 3,
#' before = 2,
#' group_by = geo_value,
#' ref_time_values = time_values,
#' new_col_name = 'case_rate_3d_av')
epix_slide = function(x, f, ..., n, group_by, ref_time_values,
epix_slide = function(x, f, ..., before, group_by, ref_time_values,
time_step, new_col_name = "slide_value",
as_list_col = FALSE, names_sep = "_", all_rows = FALSE) {
if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.")
return(x$slide(f, ..., n = n,
return(x$slide(f, ..., before = before,
group_by = {{group_by}},
ref_time_values = ref_time_values,
time_step = time_step,
Expand Down
2 changes: 1 addition & 1 deletion man/epi_archive.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 27 additions & 12 deletions man/epix_slide.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 7 additions & 6 deletions tests/testthat/test-epix_fill_through_version.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ test_that("epix_fill_through_version mirrors input when it is sufficiently up to
# edition 3, which is based on `waldo::compare` rather than `base::identical`;
# `waldo::compare` in waldo >=0.3.1 appears (as of 0.4.0) to compare R6
# objects by contents rather than address (in a way that is tested but maybe
# not guaranteed via user docs). Use `local_edition` to ensure we use edition
# 3 here.
local_edition(3)
# not guaranteed via user docs). Use `testthat::local_edition` to ensure we
# use testthat edition 3 here (use `testthat::` to prevent ambiguity with
# `readr`).
testthat::local_edition(3)
expect_identical(ea_orig, ea_trivial_fill_na1)
expect_identical(ea_orig, ea_trivial_fill_na2)
expect_identical(ea_orig, ea_trivial_fill_locf)
Expand All @@ -30,9 +31,9 @@ test_that("epix_fill_through_version can extend observed versions, gives expecte
ea_fill_na = epix_fill_through_version(ea_orig, later_unobserved_version, "na")
ea_fill_locf = epix_fill_through_version(ea_orig, later_unobserved_version, "locf")

# We use edition 3 features here, passing `ignore_attr` to `waldo::compare`.
# Ensure we are using edition 3:
local_edition(3)
# We use testthat edition 3 features here, passing `ignore_attr` to
# `waldo::compare`. Ensure we are using edition 3:
testthat::local_edition(3)
withCallingHandlers({
expect_identical(ea_fill_na$versions_end, later_unobserved_version)
expect_identical(tibble::as_tibble(ea_fill_na$as_of(first_unobserved_version)),
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-epix_merge.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ test_that("epix_merge merges and carries forward updates properly", {
)
# We rely on testthat edition 3 expect_identical using waldo, not identical. See
# test-epix_fill_through_version.R comments for details.
local_edition(3)
testthat::local_edition(3)
expect_identical(xy, xy_expected)
})

Expand Down
65 changes: 65 additions & 0 deletions tests/testthat/test-epix_slide.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
library(dplyr)

test_that("epix_slide only works on an epi_archive",{
expect_error(epix_slide(data.frame(x=1)))
})

x <- tibble::tribble(~version, ~time_value, ~binary,
4, c(1:3), 2^(1:3),
5, c(1:2,4), 2^(4:6),
6, c(1:2,4:5), 2^(7:10),
7, 2:6, 2^(11:15)) %>%
tidyr::unnest(c(time_value,binary))

xx <- bind_cols(geo_value = rep("x",15), x) %>%
as_epi_archive()

test_that("epix_slide works as intended",{
xx1 <- epix_slide(x = xx,
f = ~ sum(.x$binary),
before = 2,
group_by = geo_value,
new_col_name = "sum_binary")

xx2 <- tibble(geo_value = rep("x",3),
# 7 should also be there below; this is a bug on issue #153
time_value = c(4,5,6),
sum_binary = c(2^3+2^2,
2^6+2^3,
2^10+2^9)) %>%
as_epi_df(as_of = 1) # Also a bug (issue #213)

expect_identical(xx1,xx2) # *

xx3 <- xx$slide(f = ~ sum(.x$binary),
before = 2,
group_by = "geo_value",
new_col_name = 'sum_binary')

expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical
})

test_that("epix_slide `before` validation works", {
expect_error(xx$slide(f = ~ sum(.x$binary)),
"`before` is required")
expect_error(xx$slide(f = ~ sum(.x$binary), before=NA),
"`before`.*NA")
expect_error(xx$slide(f = ~ sum(.x$binary), before=-1),
"`before`.*negative")
expect_error(xx$slide(f = ~ sum(.x$binary), before=1.5),
regexp="before",
class="vctrs_error_incompatible_type")
# We might want to allow this at some point (issue #219):
expect_error(xx$slide(f = ~ sum(.x$binary), before=Inf),
regexp="before",
class="vctrs_error_incompatible_type")
# (wrapper shouldn't introduce a value:)
expect_error(epix_slide(xx, f = ~ sum(.x$binary)), "`before` is required")
# These `before` values should be accepted:
expect_error(xx$slide(f = ~ sum(.x$binary), before=0),
NA)
expect_error(xx$slide(f = ~ sum(.x$binary), before=2L),
NA)
expect_error(xx$slide(f = ~ sum(.x$binary), before=365000),
NA)
})
Loading