Skip to content

Commit 9ba843c

Browse files
committed
Fix all_rows = TRUE to work with move to vctrs
Document behavior when `all_rows = TRUE` and `as_list_col = TRUE`; this behavior might be different from before move to use `vctrs` though. Add tests to cover this case + others, which was caught only via a vignette build failing.
1 parent 2014016 commit 9ba843c

File tree

3 files changed

+156
-17
lines changed

3 files changed

+156
-17
lines changed

R/slide.R

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,15 @@
6262
#' when `as_list_col = FALSE`. Default is "_". Using `NULL` drops the prefix
6363
#' from `new_col_name` entirely.
6464
#' @param all_rows If `all_rows = TRUE`, then all rows of `x` will be kept in
65-
#' the output; otherwise, there will be one row for each time value in `x`
66-
#' that acts as a reference time value. Default is `FALSE`.
65+
#' the output even with `ref_time_values` provided, with some type of missing
66+
#' value marker for the slide computation output column(s) for `time_value`s
67+
#' outside `ref_time_values`; otherwise, there will be one row for each row in
68+
#' `x` that had a `time_value` in `ref_time_values`. Default is `FALSE`. The
69+
#' missing value marker is the result of `vctrs::vec_cast`ing `NA` to the type
70+
#' of the slide computation output. If using `as_list_col = TRUE`, note that
71+
#' the missing marker is a `NULL` entry in the list column; for certain
72+
#' operations, you might want to replace these `NULL` entries with a different
73+
#' `NA` marker.
6774
#' @return An `epi_df` object given by appending a new column to `x`, named
6875
#' according to the `new_col_name` argument.
6976
#'
@@ -306,10 +313,13 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
306313
# If all rows, then pad slide values with NAs, else filter down data group
307314
if (all_rows) {
308315
orig_values = slide_values
309-
slide_values = rep(NA, nrow(.data_group))
310-
slide_values[o] = orig_values
316+
slide_values = vctrs::vec_rep(vctrs::vec_cast(NA, orig_values), nrow(.data_group))
317+
# ^ using vctrs::vec_init would be shorter but docs don't guarantee it
318+
# fills with NA equivalent.
319+
vctrs::vec_slice(slide_values, o) = orig_values
320+
} else {
321+
.data_group = filter(.data_group, o)
311322
}
312-
else .data_group = filter(.data_group, o)
313323
return(mutate(.data_group, !!new_col := slide_values))
314324
}
315325

man/epi_slide.Rd

Lines changed: 9 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-epi_slide.R

Lines changed: 132 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,23 @@
22

33
d <- as.Date("2020-01-01")
44

5-
grouped = dplyr::bind_rows(
5+
ungrouped = dplyr::bind_rows(
66
dplyr::tibble(geo_value = "ak", time_value = d + 1:200, value=1:200),
77
dplyr::tibble(geo_value = "al", time_value = d + 1:5, value=-(1:5))
88
) %>%
9-
as_epi_df() %>%
9+
as_epi_df()
10+
grouped = ungrouped %>%
1011
group_by(geo_value)
11-
1212
f = function(x, g) dplyr::tibble(value=mean(x$value), count=length(x$value))
1313

14+
toy_edf = tibble::tribble(
15+
~geo_value, ~time_value, ~value ,
16+
"a" , 1:10 , 2L^( 1:10),
17+
"b" , 1:10 , 2L^(11:20),
18+
) %>%
19+
tidyr::unchop(c(time_value, value)) %>%
20+
as_epi_df(as_of = 100)
21+
1422
## --- These cases generate errors (or not): ---
1523
test_that("`before` and `after` are both vectors of length 1", {
1624
expect_error(epi_slide(grouped, f, before = c(0,1), after = 0, ref_time_values = d+3),
@@ -88,13 +96,7 @@ test_that("these doesn't produce an error; the error appears only if the ref tim
8896
})
8997

9098
test_that("computation output formats x as_list_col", {
91-
toy_edf = tibble::tribble(
92-
~geo_value, ~time_value, ~value ,
93-
"a" , 1:10 , 2L^( 1:10),
94-
"b" , 1:10 , 2L^(11:20),
95-
) %>%
96-
tidyr::unchop(c(time_value, value)) %>%
97-
as_epi_df(as_of = 100)
99+
# See `toy_edf` definition at top of file.
98100
# We'll try 7d sum with a few formats.
99101
basic_result_from_size1 = tibble::tribble(
100102
~geo_value, ~time_value, ~value , ~slide_value ,
@@ -170,3 +172,123 @@ test_that("epi_slide alerts if the provided f doesn't take enough args", {
170172
expect_warning(epi_slide(grouped, f_x_dots, before = 1L, ref_time_values = d+1),
171173
class = "epiprocess__assert_sufficient_f_args__mandatory_f_args_passed_to_f_dots")
172174
})
175+
176+
test_that("`ref_time_values` + `all_rows = TRUE` works", {
177+
# See `toy_edf` definition at top of file. We'll do variants of a slide
178+
# returning the following:
179+
basic_full_result = tibble::tribble(
180+
~geo_value, ~time_value, ~value , ~slide_value ,
181+
"a" , 1:10 , 2L^( 1:10), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7,rep(7L, 3L)), adaptive=TRUE, na.rm=TRUE),
182+
"b" , 1:10 , 2L^(11:20), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7,rep(7L, 3L)), adaptive=TRUE, na.rm=TRUE),
183+
) %>%
184+
tidyr::unchop(c(time_value, value, slide_value)) %>%
185+
dplyr::arrange(time_value) %>%
186+
as_epi_df(as_of = 100)
187+
# slide computations returning atomic vecs:
188+
expect_identical(
189+
toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value)),
190+
basic_full_result
191+
)
192+
expect_identical(
193+
toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value),
194+
ref_time_values = c(2L, 8L)),
195+
basic_full_result %>% dplyr::filter(time_value %in% c(2L, 8L))
196+
)
197+
expect_identical(
198+
toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value),
199+
ref_time_values = c(2L, 8L), all_rows = TRUE),
200+
basic_full_result %>%
201+
dplyr::mutate(slide_value = dplyr::if_else(time_value %in% c(2L, 8L),
202+
slide_value, NA_integer_))
203+
)
204+
# slide computations returning data frames:
205+
expect_identical(
206+
toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value))),
207+
basic_full_result %>% dplyr::rename(slide_value_value = slide_value)
208+
)
209+
expect_identical(
210+
toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
211+
ref_time_values = c(2L, 8L)),
212+
basic_full_result %>%
213+
dplyr::filter(time_value %in% c(2L, 8L)) %>%
214+
dplyr::rename(slide_value_value = slide_value)
215+
)
216+
expect_identical(
217+
toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
218+
ref_time_values = c(2L, 8L), all_rows = TRUE),
219+
basic_full_result %>%
220+
dplyr::mutate(slide_value = dplyr::if_else(time_value %in% c(2L, 8L),
221+
slide_value, NA_integer_)) %>%
222+
dplyr::rename(slide_value_value = slide_value)
223+
)
224+
# slide computations returning data frames with `as_list_col=TRUE`:
225+
expect_identical(
226+
toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
227+
as_list_col = TRUE),
228+
basic_full_result %>%
229+
dplyr::mutate(slide_value = purrr::map(slide_value, ~ data.frame(value = .x)))
230+
)
231+
expect_identical(
232+
toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
233+
ref_time_values = c(2L, 8L),
234+
as_list_col = TRUE),
235+
basic_full_result %>%
236+
dplyr::mutate(slide_value = purrr::map(slide_value, ~ data.frame(value = .x))) %>%
237+
dplyr::filter(time_value %in% c(2L, 8L))
238+
)
239+
expect_identical(
240+
toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
241+
ref_time_values = c(2L, 8L), all_rows = TRUE,
242+
as_list_col = TRUE),
243+
basic_full_result %>%
244+
dplyr::mutate(slide_value = purrr::map(slide_value, ~ data.frame(value = .x))) %>%
245+
dplyr::mutate(slide_value = dplyr::if_else(time_value %in% c(2L, 8L),
246+
slide_value, list(NULL)))
247+
)
248+
# slide computations returning data frames, `as_list_col = TRUE`, `unnest`:
249+
expect_identical(
250+
toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
251+
as_list_col = TRUE) %>%
252+
unnest(slide_value, names_sep = "_"),
253+
basic_full_result %>% dplyr::rename(slide_value_value = slide_value)
254+
)
255+
expect_identical(
256+
toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
257+
ref_time_values = c(2L, 8L),
258+
as_list_col = TRUE) %>%
259+
unnest(slide_value, names_sep = "_"),
260+
basic_full_result %>%
261+
dplyr::filter(time_value %in% c(2L, 8L)) %>%
262+
dplyr::rename(slide_value_value = slide_value)
263+
)
264+
expect_identical(
265+
toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
266+
ref_time_values = c(2L, 8L), all_rows = TRUE,
267+
as_list_col = TRUE) %>%
268+
unnest(slide_value, names_sep = "_"),
269+
basic_full_result %>%
270+
# XXX unclear exactly what we want in this case. Current approach is
271+
# compatible with `vctrs::vec_detect_missing` but breaks `tidyr::unnest`
272+
# compatibility
273+
dplyr::filter(time_value %in% c(2L, 8L)) %>%
274+
dplyr::rename(slide_value_value = slide_value)
275+
)
276+
rework_nulls = function(slide_values_list) {
277+
vctrs::vec_assign(
278+
slide_values_list,
279+
vctrs::vec_detect_missing(slide_values_list),
280+
list(vctrs::vec_cast(NA, vctrs::vec_ptype_common(!!!slide_values_list)))
281+
)
282+
}
283+
expect_identical(
284+
toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
285+
ref_time_values = c(2L, 8L), all_rows = TRUE,
286+
as_list_col = TRUE) %>%
287+
mutate(slide_value = rework_nulls(slide_value)) %>%
288+
unnest(slide_value, names_sep = "_"),
289+
basic_full_result %>%
290+
dplyr::mutate(slide_value = dplyr::if_else(time_value %in% c(2L, 8L),
291+
slide_value, NA_integer_)) %>%
292+
dplyr::rename(slide_value_value = slide_value)
293+
)
294+
})

0 commit comments

Comments
 (0)