Merge pull request #511 from cmu-delphi/arrange-cannonical

brookslogan · web-flow · commit 8dff01144dc0 · 2024-08-21T10:34:40.000-07:00
Arrange cannonical
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,8 @@
 
 S3method("[",epi_df)
 S3method("names<-",epi_df)
+S3method(arrange_canonical,default)
+S3method(arrange_canonical,epi_df)
 S3method(as_epi_df,data.frame)
 S3method(as_epi_df,epi_df)
 S3method(as_epi_df,tbl_df)
@@ -45,6 +47,7 @@ S3method(unnest,epi_df)
 export("%>%")
 export(archive_cases_dv_subset)
 export(arrange)
+export(arrange_canonical)
 export(as_epi_archive)
 export(as_epi_df)
 export(as_tsibble)
diff --git a/R/key_colnames.R b/R/key_colnames.R
@@ -18,20 +18,20 @@ key_colnames.default <- function(x, ...) {
 #' @export
 key_colnames.data.frame <- function(x, other_keys = character(0L), ...) {
   assert_character(other_keys)
-  nm <- c("time_value", "geo_value", other_keys)
+  nm <- c("geo_value", "time_value", other_keys)
   intersect(nm, colnames(x))
 }
 
 #' @export
 key_colnames.epi_df <- function(x, ...) {
   other_keys <- attr(x, "metadata")$other_keys
-  c("time_value", "geo_value", other_keys)
+  c("geo_value", "time_value", other_keys)
 }
 
 #' @export
 key_colnames.epi_archive <- function(x, ...) {
   other_keys <- attr(x, "metadata")$other_keys
-  c("time_value", "geo_value", other_keys)
+  c("geo_value", "time_value", other_keys)
 }
 
 kill_time_value <- function(v) {
diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
@@ -274,8 +274,8 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
 #' daily_edf %>%
 #'   group_by(geo_value) %>%
 #'   complete(time_value = full_seq(time_value, period = 1))
-#' # Complete has explicit=TRUE by default, but if it's FALSE, then complete only fills the implicit gaps
-#' # not those that are explicitly NA
+#' # Complete has explicit=TRUE by default, but if it's FALSE, then complete
+#' # only fills the implicit gaps, not those that are explicitly NA
 #' daily_edf <- tibble::tribble(
 #'   ~geo_value, ~time_value, ~value,
 #'   1, start_date + 1, 1,
@@ -303,11 +303,18 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
 #' ) %>%
 #'   as_epi_df(as_of = start_date + 3)
 #' weekly_edf %>%
-#'   complete(geo_value, time_value = full_seq(time_value, period = 7), fill = list(value = 0))
+#'   complete(
+#'     geo_value,
+#'     time_value = full_seq(time_value, period = 7),
+#'     fill = list(value = 0)
+#'   )
 #' # With grouping
 #' weekly_edf %>%
 #'   group_by(geo_value) %>%
-#'   complete(time_value = full_seq(time_value, period = 7), fill = list(value = 0))
+#'   complete(
+#'     time_value = full_seq(time_value, period = 7),
+#'     fill = list(value = 0)
+#'   )
 #' @export
 complete.epi_df <- function(data, ..., fill = list(), explicit = TRUE) {
   result <- dplyr::dplyr_reconstruct(NextMethod(), data)
@@ -331,3 +338,36 @@ reclass <- function(x, metadata) {
   attributes(x)$metadata <- metadata
   return(x)
 }
+
+#' Arrange an epi_df into a standard order
+#'
+#' Moves [key_colnames()] to the left, then arranges rows based on that
+#' ordering. This function is mainly for use in tests and so that
+#' other function output will be in predictable order, where necessary.
+#'
+#' @param x an `epi_df`. Other objects will produce a warning and return as is.
+#' @param ... not used
+#'
+#' @keywords internal
+#' @export
+arrange_canonical <- function(x, ...) {
+  UseMethod("arrange_canonical")
+}
+
+#' @export
+arrange_canonical.default <- function(x, ...) {
+  rlang::check_dots_empty()
+  cli::cli_abort(c(
+    "`arrange_canonical()` is only meaningful for an {.cls epi_df}."
+  ))
+  return(x)
+}
+
+#' @export
+arrange_canonical.epi_df <- function(x, ...) {
+  rlang::check_dots_empty()
+  keys <- key_colnames(x)
+  x %>%
+    dplyr::relocate(dplyr::all_of(keys), .before = 1) %>%
+    dplyr::arrange(dplyr::across(dplyr::all_of(keys)))
+}
diff --git a/man/arrange_canonical.Rd b/man/arrange_canonical.Rd
diff --git a/man/complete.epi_df.Rd b/man/complete.epi_df.Rd
diff --git a/tests/testthat/_snaps/revision-latency-functions.md b/tests/testthat/_snaps/revision-latency-functions.md
@@ -28,19 +28,19 @@
         time_value geo_value n_revisions min_lag max_lag time_near_latest spread
         <date>     <chr>           <dbl> <drtn>  <drtn>  <drtn>            <dbl>
       1 2020-01-01 ak                  4 2 days  19 days 19 days             101
-      2 2020-01-01 al                  1 0 days  19 days 19 days              99
-      3 2020-01-02 ak                  1 4 days   5 days  4 days               9
-      4 2020-01-02 al                  0 0 days   0 days  0 days               0
-      5 2020-01-03 ak                  0 3 days   3 days  3 days               0
+      2 2020-01-02 ak                  1 4 days   5 days  4 days               9
+      3 2020-01-03 ak                  0 3 days   3 days  3 days               0
+      4 2020-01-01 al                  1 0 days  19 days 19 days              99
+      5 2020-01-02 al                  0 0 days   0 days  0 days               0
       6 2020-01-03 al                  1 1 days   2 days  2 days               3
       7 2020-01-04 al                  0 1 days   1 days  1 days               0
         rel_spread min_value max_value median_value
              <dbl>     <dbl>     <dbl>        <dbl>
       1      0.990         1       102          6  
-      2      0.99          1       100         50.5
-      3      0.09         91       100         95.5
-      4      0             1         1          1  
-      5    NaN             0         0          0  
+      2      0.09         91       100         95.5
+      3    NaN             0         0          0  
+      4      0.99          1       100         50.5
+      5      0             1         1          1  
       6      0.75          1         4          2.5
       7      0             9         9          9  
 
@@ -76,19 +76,19 @@
         time_value geo_value n_revisions min_lag max_lag time_near_latest spread
         <date>     <chr>           <dbl> <drtn>  <drtn>  <drtn>            <dbl>
       1 2020-01-01 ak                  6 2 days  19 days 19 days             101
-      2 2020-01-01 al                  1 0 days  19 days 19 days              99
-      3 2020-01-02 ak                  1 4 days   5 days  4 days               9
-      4 2020-01-02 al                  0 0 days   0 days  0 days               0
-      5 2020-01-03 ak                  0 3 days   3 days  3 days               0
+      2 2020-01-02 ak                  1 4 days   5 days  4 days               9
+      3 2020-01-03 ak                  0 3 days   3 days  3 days               0
+      4 2020-01-01 al                  1 0 days  19 days 19 days              99
+      5 2020-01-02 al                  0 0 days   0 days  0 days               0
       6 2020-01-03 al                  1 1 days   2 days  2 days               3
       7 2020-01-04 al                  1 0 days   1 days  1 days               0
         rel_spread min_value max_value median_value
              <dbl>     <dbl>     <dbl>        <dbl>
       1      0.990         1       102          5.5
-      2      0.99          1       100         50.5
-      3      0.09         91       100         95.5
-      4      0             1         1          1  
-      5    NaN             0         0          0  
+      2      0.09         91       100         95.5
+      3    NaN             0         0          0  
+      4      0.99          1       100         50.5
+      5      0             1         1          1  
       6      0.75          1         4          2.5
       7      0             9         9          9  
 
diff --git a/tests/testthat/test-arrange-canonical.R b/tests/testthat/test-arrange-canonical.R
@@ -0,0 +1,19 @@
+test_that("canonical arrangement works", {
+  tib <- tibble(
+    x = 1:8,
+    demo_grp = rep(c("b", "b", "a", "a"), times = 2),
+    geo_value = rep(c("ga", "ca"), each = 4),
+    time_value = rep(2:1, times = 4)
+  )
+  expect_error(arrange_canonical(tib))
+
+  tib <- tib %>% as_epi_df(additional_metadata = list(other_keys = "demo_grp"))
+  expect_equal(names(tib), c("geo_value", "time_value", "x", "demo_grp"))
+
+  tib_sorted <- arrange_canonical(tib)
+  expect_equal(names(tib_sorted), c("geo_value", "time_value", "demo_grp", "x"))
+  expect_equal(tib_sorted$geo_value, rep(c("ca", "ga"), each = 4))
+  expect_equal(tib_sorted$time_value, c(1, 1, 2, 2, 1, 1, 2, 2))
+  expect_equal(tib_sorted$demo_grp, rep(letters[1:2], times = 4))
+  expect_equal(tib_sorted$x, c(8, 6, 7, 5, 4, 2, 3, 1))
+})
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
@@ -240,8 +240,8 @@ test_that("guess_period works", {
     weekly_dates
   )
   # On POSIXcts:
-  daily_posixcts <- as.POSIXct(daily_dates, tz = "ET") + 3600
-  weekly_posixcts <- as.POSIXct(weekly_dates, tz = "ET") + 3600
+  daily_posixcts <- as.POSIXct(daily_dates, tz = "US/Aleutian") + 3600
+  weekly_posixcts <- as.POSIXct(weekly_dates, tz = "US/Aleutian") + 3600
   expect_identical(
     daily_posixcts[[1L]] + guess_period(daily_posixcts) * (seq_along(daily_posixcts) - 1L),
     daily_posixcts
@@ -251,8 +251,8 @@ test_that("guess_period works", {
     weekly_posixcts
   )
   # On POSIXlts:
-  daily_posixlts <- as.POSIXlt(daily_dates, tz = "ET") + 3600
-  weekly_posixlts <- as.POSIXlt(weekly_dates, tz = "ET") + 3600
+  daily_posixlts <- as.POSIXlt(daily_dates, tz = "US/Aleutian") + 3600
+  weekly_posixlts <- as.POSIXlt(weekly_dates, tz = "US/Aleutian") + 3600
   expect_identical(
     daily_posixlts[[1L]] + guess_period(daily_posixlts) * (seq_along(daily_posixlts) - 1L),
     daily_posixlts