Skip to content

Replace n with before (no after) in epix_slide #216

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Nov 13, 2022
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions R/archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ epi_archive =
#' details.
#' @importFrom data.table key
#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms
slide = function(f, ..., n, group_by, ref_time_values,
slide = function(f, ..., before, group_by, ref_time_values,
time_step, new_col_name = "slide_value",
as_list_col = FALSE, names_sep = "_",
all_rows = FALSE) {
Expand All @@ -599,8 +599,8 @@ epi_archive =
}

# If a custom time step is specified, then redefine units
before_num = n-1
if (!missing(time_step)) before_num = time_step(n-1)
before_num = before-1
if (!missing(time_step)) before_num = time_step(before-1)

# What to group by? If missing, set according to internal keys;
# otherwise, tidyselect.
Expand Down Expand Up @@ -673,12 +673,13 @@ epi_archive =
if (!missing(f)) {
if (rlang::is_formula(f)) f = rlang::as_function(f)

x = purrr::map_dfr(ref_time_values, function(t) {
self$as_of(t, min_time_value = t - before_num) %>%
x = purrr::map_dfr(ref_time_values, function(ref_time_value) {
self$as_of(ref_time_value,
min_time_value = ref_time_value - before_num) %>%
dplyr::group_by(!!!group_by) %>%
dplyr::group_modify(comp_one_grp,
f = f, ...,
time_value = t,
time_value = ref_time_value,
key_vars = key_vars,
new_col = new_col,
.keep = TRUE) %>%
Expand All @@ -700,12 +701,13 @@ epi_archive =
f = function(x, quo, ...) rlang::eval_tidy(quo, x)
new_col = sym(names(rlang::quos_auto_name(quos)))

x = purrr::map_dfr(ref_time_values, function(t) {
self$as_of(t, min_time_value = t - before_num) %>%
x = purrr::map_dfr(ref_time_values, function(ref_time_value) {
self$as_of(ref_time_value,
min_time_value = ref_time_value - before_num) %>%
dplyr::group_by(!!!group_by) %>%
dplyr::group_modify(comp_one_grp,
f = f, quo = quo,
time_value = t,
time_value = ref_time_value,
key_vars = key_vars,
new_col = new_col,
.keep = TRUE) %>%
Expand Down
18 changes: 9 additions & 9 deletions R/methods-epi_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -358,10 +358,10 @@ epix_merge = function(x, y,
#' @param ... Additional arguments to pass to the function or formula specified
#' via `f`. Alternatively, if `f` is missing, then the current argument is
#' interpreted as an expression for tidy evaluation.
#' @param n Number of time steps to use in the running window. For example, if
#' `n = 7`, and one time step is one day, then to produce a value on January 7
#' we apply the given function or formula to data in between January 1 and
#' 7.
#' @param before Number of time steps to use in the running window. For example,
#' if `before = 7`, and one time step is one day, then to produce a value on
#' January 7 we apply the given function or formula to data in between January
#' 1 and 7.
#' @param group_by The variable(s) to group by before slide computation. If
#' missing, then the keys in the underlying data table, excluding `time_value`
#' and `version`, will be used for grouping. To omit a grouping entirely, use
Expand Down Expand Up @@ -422,11 +422,11 @@ epix_merge = function(x, y,
#' Finally, this is simply a wrapper around the `slide()` method of the
#' `epi_archive` class, so if `x` is an `epi_archive` object, then:
#' ```
#' epix_slide(x, new_var = comp(old_var), n = 120)
#' epix_slide(x, new_var = comp(old_var), before = 120)
#' ```
#' is equivalent to:
#' ```
#' x$slide(x, new_var = comp(old_var), n = 120)
#' x$slide(new_var = comp(old_var), before = 120)
#' ```
#'
#' @importFrom rlang enquo
Expand All @@ -444,15 +444,15 @@ epix_merge = function(x, y,
#' by = "1 day")
#' epix_slide(x = archive_cases_dv_subset,
#' f = ~ mean(.x$case_rate_7d_av),
#' n = 3,
#' before = 3,
#' group_by = geo_value,
#' ref_time_values = time_values,
#' new_col_name = 'case_rate_3d_av')
epix_slide = function(x, f, ..., n, group_by, ref_time_values,
epix_slide = function(x, f, ..., before, group_by, ref_time_values,
time_step, new_col_name = "slide_value",
as_list_col = FALSE, names_sep = "_", all_rows = FALSE) {
if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.")
return(x$slide(f, ..., n = n,
return(x$slide(f, ..., before = before,
group_by = {{group_by}},
ref_time_values = ref_time_values,
time_step = time_step,
Expand Down
2 changes: 1 addition & 1 deletion man/epi_archive.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions man/epix_slide.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 40 additions & 0 deletions tests/testthat/test-epix_slide.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
library(dplyr)
library(rlang)

test_that("epix_slide only works on an epi_archive",{
expect_error(epix_slide(data.frame(x=1)))
})

test_that("epix_slide works as intended",{
x <- tibble::tribble(~version, ~time_value,
5, c(1:2,4),
6, c(1:2,4:5),
7, 2:6) %>%
tidyr::unnest(time_value)

xx <- bind_cols(geo_value = rep("x",12),
arrange(x,time_value,version),
binary = 2^(1:12)) %>%
as_epi_archive()

xx1 <- epix_slide(x = xx,
f = ~ sum(.x$binary),
before = 3,
group_by = geo_value,
new_col_name = "sum_binary")

xx2 <- tibble(geo_value = rep("x",2),
time_value = c(5,6),
sum_binary = c(2^7,
2^10+2^8)) %>%
as_epi_df(as_of = 1)

expect_identical(xx1,xx2) # *

xx3 <- xx$slide(f = ~ sum(.x$binary),
before = 3,
group_by = "geo_value",
new_col_name = 'sum_binary')

expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical
})
18 changes: 9 additions & 9 deletions tests/testthat/test-methods-epi_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,21 +71,21 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss
compactify = TRUE)
reference_by_modulus = epix_slide(x = ea,
f = ~ mean(.x$case_rate_7d_av),
n = 3,
before = 3,
group_by = modulus,
ref_time_values = time_values,
new_col_name = 'case_rate_3d_av')
reference_by_both = epix_slide(x = ea,
f = ~ mean(.x$case_rate_7d_av),
n = 3,
before = 3,
group_by = c(geo_value, modulus),
ref_time_values = time_values,
new_col_name = 'case_rate_3d_av')
# test the passing-something-that-must-be-enquosed behavior:
expect_identical(
ea$slide(
f = ~ mean(.x$case_rate_7d_av),
n = 3,
before = 3,
group_by = modulus,
ref_time_values = time_values,
new_col_name = 'case_rate_3d_av'
Expand All @@ -96,7 +96,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss
expect_identical(
epix_slide(x = ea,
f = ~ mean(.x$case_rate_7d_av),
n = 3,
before = 3,
group_by = "modulus",
ref_time_values = time_values,
new_col_name = 'case_rate_3d_av'),
Expand All @@ -105,7 +105,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss
expect_identical(
ea$slide(
f = ~ mean(.x$case_rate_7d_av),
n = 3,
before = 3,
group_by = "modulus",
ref_time_values = time_values,
new_col_name = 'case_rate_3d_av'
Expand All @@ -121,7 +121,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss
expect_identical(
epix_slide(x = ea,
f = ~ mean(.x$case_rate_7d_av),
n = 3,
before = 3,
group_by = tidyselect::all_of(my_group_by),
ref_time_values = time_values,
new_col_name = 'case_rate_3d_av'),
Expand All @@ -130,7 +130,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss
expect_identical(
ea$slide(
f = ~ mean(.x$case_rate_7d_av),
n = 3,
before = 3,
group_by = tidyselect::all_of(my_group_by),
ref_time_values = time_values,
new_col_name = 'case_rate_3d_av'
Expand All @@ -141,15 +141,15 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss
expect_identical(
epix_slide(x = ea,
f = ~ mean(.x$case_rate_7d_av),
n = 3,
before = 3,
ref_time_values = time_values,
new_col_name = 'case_rate_3d_av'),
reference_by_both
)
expect_identical(
ea$slide(
f = ~ mean(.x$case_rate_7d_av),
n = 3,
before = 3,
ref_time_values = time_values,
new_col_name = 'case_rate_3d_av'
),
Expand Down
8 changes: 4 additions & 4 deletions vignettes/advanced.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ df %>%
df %>%
mutate(version = time_value) %>%
as_epi_archive() %>%
epix_slide(x_2dav = mean(x), n = 2, ref_time_values = as.Date("2020-06-02"))
epix_slide(x_2dav = mean(x), before = 2, ref_time_values = as.Date("2020-06-02"))

df %>%
mutate(version = time_value) %>%
as_epi_archive() %>%
epix_slide(~ mean(.x$x), n = 2, ref_time_values = as.Date("2020-06-02"))
epix_slide(~ mean(.x$x), before = 2, ref_time_values = as.Date("2020-06-02"))
```

When the slide computation returns an atomic vector (rather than a single value)
Expand Down Expand Up @@ -153,7 +153,7 @@ df %>%
as_epi_archive() %>%
epix_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)),
ref_time_values = as.Date("2020-06-02"),
n = 2, as_list_col = FALSE, names_sep = NULL)
before = 2, as_list_col = FALSE, names_sep = NULL)
```

## Multi-row outputs
Expand Down Expand Up @@ -354,7 +354,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) {
x %>%
epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value,
args = prob_arx_args(ahead = ahead)),
n = 120, ref_time_values = fc_time_values) %>%
before = 120, ref_time_values = fc_time_values) %>%
mutate(target_date = time_value + ahead, as_of = TRUE,
geo_value = fc_geo_value)
}
Expand Down
7 changes: 4 additions & 3 deletions vignettes/archive.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,9 @@ fc_time_values <- seq(as.Date("2020-08-01"),
as.Date("2021-12-01"),
by = "1 month")

z <- epix_slide(x, fc = prob_arx(x = percent_cli, y = case_rate_7d_av), n = 120,
ref_time_values = fc_time_values, group_by = geo_value)
z <- epix_slide(x, fc = prob_arx(x = percent_cli, y = case_rate_7d_av),
before = 120, ref_time_values = fc_time_values,
group_by = geo_value)

head(z, 10)
```
Expand Down Expand Up @@ -387,7 +388,7 @@ x_latest <- epix_as_of(x, max_version = max(x$DT$version))
k_week_ahead <- function(x, ahead = 7, as_of = TRUE) {
if (as_of) {
x %>%
epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), n = 120,
epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), before = 120,
ref_time_values = fc_time_values, group_by = geo_value) %>%
mutate(target_date = time_value + ahead, as_of = TRUE)
}
Expand Down
2 changes: 1 addition & 1 deletion vignettes/compactify.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ speeds <- rbind(speeds, speed_test(iterate_as_of,"as_of_1000x"))

# Performance of slide
slide_median <- function(my_ea) {
my_ea$slide(median = median(case_rate_7d_av), n = 7)
my_ea$slide(median = median(case_rate_7d_av), before = 7)
}

speeds <- rbind(speeds, speed_test(slide_median,"slide_median"))
Expand Down