Skip to content

Commit 8dff011

Browse files
authored
Merge pull request #511 from cmu-delphi/arrange-cannonical
Arrange cannonical
2 parents 271fa69 + 4c830ef commit 8dff011

8 files changed

+119
-31
lines changed

NAMESPACE

+3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
S3method("[",epi_df)
44
S3method("names<-",epi_df)
5+
S3method(arrange_canonical,default)
6+
S3method(arrange_canonical,epi_df)
57
S3method(as_epi_df,data.frame)
68
S3method(as_epi_df,epi_df)
79
S3method(as_epi_df,tbl_df)
@@ -45,6 +47,7 @@ S3method(unnest,epi_df)
4547
export("%>%")
4648
export(archive_cases_dv_subset)
4749
export(arrange)
50+
export(arrange_canonical)
4851
export(as_epi_archive)
4952
export(as_epi_df)
5053
export(as_tsibble)

R/key_colnames.R

+3-3
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,20 @@ key_colnames.default <- function(x, ...) {
1818
#' @export
1919
key_colnames.data.frame <- function(x, other_keys = character(0L), ...) {
2020
assert_character(other_keys)
21-
nm <- c("time_value", "geo_value", other_keys)
21+
nm <- c("geo_value", "time_value", other_keys)
2222
intersect(nm, colnames(x))
2323
}
2424

2525
#' @export
2626
key_colnames.epi_df <- function(x, ...) {
2727
other_keys <- attr(x, "metadata")$other_keys
28-
c("time_value", "geo_value", other_keys)
28+
c("geo_value", "time_value", other_keys)
2929
}
3030

3131
#' @export
3232
key_colnames.epi_archive <- function(x, ...) {
3333
other_keys <- attr(x, "metadata")$other_keys
34-
c("time_value", "geo_value", other_keys)
34+
c("geo_value", "time_value", other_keys)
3535
}
3636

3737
kill_time_value <- function(v) {

R/methods-epi_df.R

+44-4
Original file line numberDiff line numberDiff line change
@@ -274,8 +274,8 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
274274
#' daily_edf %>%
275275
#' group_by(geo_value) %>%
276276
#' complete(time_value = full_seq(time_value, period = 1))
277-
#' # Complete has explicit=TRUE by default, but if it's FALSE, then complete only fills the implicit gaps
278-
#' # not those that are explicitly NA
277+
#' # Complete has explicit=TRUE by default, but if it's FALSE, then complete
278+
#' # only fills the implicit gaps, not those that are explicitly NA
279279
#' daily_edf <- tibble::tribble(
280280
#' ~geo_value, ~time_value, ~value,
281281
#' 1, start_date + 1, 1,
@@ -303,11 +303,18 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
303303
#' ) %>%
304304
#' as_epi_df(as_of = start_date + 3)
305305
#' weekly_edf %>%
306-
#' complete(geo_value, time_value = full_seq(time_value, period = 7), fill = list(value = 0))
306+
#' complete(
307+
#' geo_value,
308+
#' time_value = full_seq(time_value, period = 7),
309+
#' fill = list(value = 0)
310+
#' )
307311
#' # With grouping
308312
#' weekly_edf %>%
309313
#' group_by(geo_value) %>%
310-
#' complete(time_value = full_seq(time_value, period = 7), fill = list(value = 0))
314+
#' complete(
315+
#' time_value = full_seq(time_value, period = 7),
316+
#' fill = list(value = 0)
317+
#' )
311318
#' @export
312319
complete.epi_df <- function(data, ..., fill = list(), explicit = TRUE) {
313320
result <- dplyr::dplyr_reconstruct(NextMethod(), data)
@@ -331,3 +338,36 @@ reclass <- function(x, metadata) {
331338
attributes(x)$metadata <- metadata
332339
return(x)
333340
}
341+
342+
#' Arrange an epi_df into a standard order
343+
#'
344+
#' Moves [key_colnames()] to the left, then arranges rows based on that
345+
#' ordering. This function is mainly for use in tests and so that
346+
#' other function output will be in predictable order, where necessary.
347+
#'
348+
#' @param x an `epi_df`. Other objects will produce a warning and return as is.
349+
#' @param ... not used
350+
#'
351+
#' @keywords internal
352+
#' @export
353+
arrange_canonical <- function(x, ...) {
354+
UseMethod("arrange_canonical")
355+
}
356+
357+
#' @export
358+
arrange_canonical.default <- function(x, ...) {
359+
rlang::check_dots_empty()
360+
cli::cli_abort(c(
361+
"`arrange_canonical()` is only meaningful for an {.cls epi_df}."
362+
))
363+
return(x)
364+
}
365+
366+
#' @export
367+
arrange_canonical.epi_df <- function(x, ...) {
368+
rlang::check_dots_empty()
369+
keys <- key_colnames(x)
370+
x %>%
371+
dplyr::relocate(dplyr::all_of(keys), .before = 1) %>%
372+
dplyr::arrange(dplyr::across(dplyr::all_of(keys)))
373+
}

man/arrange_canonical.Rd

+19
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/complete.epi_df.Rd

+11-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/_snaps/revision-latency-functions.md

+16-16
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,19 @@
2828
time_value geo_value n_revisions min_lag max_lag time_near_latest spread
2929
<date> <chr> <dbl> <drtn> <drtn> <drtn> <dbl>
3030
1 2020-01-01 ak 4 2 days 19 days 19 days 101
31-
2 2020-01-01 al 1 0 days 19 days 19 days 99
32-
3 2020-01-02 ak 1 4 days 5 days 4 days 9
33-
4 2020-01-02 al 0 0 days 0 days 0 days 0
34-
5 2020-01-03 ak 0 3 days 3 days 3 days 0
31+
2 2020-01-02 ak 1 4 days 5 days 4 days 9
32+
3 2020-01-03 ak 0 3 days 3 days 3 days 0
33+
4 2020-01-01 al 1 0 days 19 days 19 days 99
34+
5 2020-01-02 al 0 0 days 0 days 0 days 0
3535
6 2020-01-03 al 1 1 days 2 days 2 days 3
3636
7 2020-01-04 al 0 1 days 1 days 1 days 0
3737
rel_spread min_value max_value median_value
3838
<dbl> <dbl> <dbl> <dbl>
3939
1 0.990 1 102 6
40-
2 0.99 1 100 50.5
41-
3 0.09 91 100 95.5
42-
4 0 1 1 1
43-
5 NaN 0 0 0
40+
2 0.09 91 100 95.5
41+
3 NaN 0 0 0
42+
4 0.99 1 100 50.5
43+
5 0 1 1 1
4444
6 0.75 1 4 2.5
4545
7 0 9 9 9
4646

@@ -76,19 +76,19 @@
7676
time_value geo_value n_revisions min_lag max_lag time_near_latest spread
7777
<date> <chr> <dbl> <drtn> <drtn> <drtn> <dbl>
7878
1 2020-01-01 ak 6 2 days 19 days 19 days 101
79-
2 2020-01-01 al 1 0 days 19 days 19 days 99
80-
3 2020-01-02 ak 1 4 days 5 days 4 days 9
81-
4 2020-01-02 al 0 0 days 0 days 0 days 0
82-
5 2020-01-03 ak 0 3 days 3 days 3 days 0
79+
2 2020-01-02 ak 1 4 days 5 days 4 days 9
80+
3 2020-01-03 ak 0 3 days 3 days 3 days 0
81+
4 2020-01-01 al 1 0 days 19 days 19 days 99
82+
5 2020-01-02 al 0 0 days 0 days 0 days 0
8383
6 2020-01-03 al 1 1 days 2 days 2 days 3
8484
7 2020-01-04 al 1 0 days 1 days 1 days 0
8585
rel_spread min_value max_value median_value
8686
<dbl> <dbl> <dbl> <dbl>
8787
1 0.990 1 102 5.5
88-
2 0.99 1 100 50.5
89-
3 0.09 91 100 95.5
90-
4 0 1 1 1
91-
5 NaN 0 0 0
88+
2 0.09 91 100 95.5
89+
3 NaN 0 0 0
90+
4 0.99 1 100 50.5
91+
5 0 1 1 1
9292
6 0.75 1 4 2.5
9393
7 0 9 9 9
9494

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
test_that("canonical arrangement works", {
2+
tib <- tibble(
3+
x = 1:8,
4+
demo_grp = rep(c("b", "b", "a", "a"), times = 2),
5+
geo_value = rep(c("ga", "ca"), each = 4),
6+
time_value = rep(2:1, times = 4)
7+
)
8+
expect_error(arrange_canonical(tib))
9+
10+
tib <- tib %>% as_epi_df(additional_metadata = list(other_keys = "demo_grp"))
11+
expect_equal(names(tib), c("geo_value", "time_value", "x", "demo_grp"))
12+
13+
tib_sorted <- arrange_canonical(tib)
14+
expect_equal(names(tib_sorted), c("geo_value", "time_value", "demo_grp", "x"))
15+
expect_equal(tib_sorted$geo_value, rep(c("ca", "ga"), each = 4))
16+
expect_equal(tib_sorted$time_value, c(1, 1, 2, 2, 1, 1, 2, 2))
17+
expect_equal(tib_sorted$demo_grp, rep(letters[1:2], times = 4))
18+
expect_equal(tib_sorted$x, c(8, 6, 7, 5, 4, 2, 3, 1))
19+
})

tests/testthat/test-utils.R

+4-4
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,8 @@ test_that("guess_period works", {
240240
weekly_dates
241241
)
242242
# On POSIXcts:
243-
daily_posixcts <- as.POSIXct(daily_dates, tz = "ET") + 3600
244-
weekly_posixcts <- as.POSIXct(weekly_dates, tz = "ET") + 3600
243+
daily_posixcts <- as.POSIXct(daily_dates, tz = "US/Aleutian") + 3600
244+
weekly_posixcts <- as.POSIXct(weekly_dates, tz = "US/Aleutian") + 3600
245245
expect_identical(
246246
daily_posixcts[[1L]] + guess_period(daily_posixcts) * (seq_along(daily_posixcts) - 1L),
247247
daily_posixcts
@@ -251,8 +251,8 @@ test_that("guess_period works", {
251251
weekly_posixcts
252252
)
253253
# On POSIXlts:
254-
daily_posixlts <- as.POSIXlt(daily_dates, tz = "ET") + 3600
255-
weekly_posixlts <- as.POSIXlt(weekly_dates, tz = "ET") + 3600
254+
daily_posixlts <- as.POSIXlt(daily_dates, tz = "US/Aleutian") + 3600
255+
weekly_posixlts <- as.POSIXlt(weekly_dates, tz = "US/Aleutian") + 3600
256256
expect_identical(
257257
daily_posixlts[[1L]] + guess_period(daily_posixlts) * (seq_along(daily_posixlts) - 1L),
258258
daily_posixlts

0 commit comments

Comments
 (0)