Skip to content

Commit f7309f2

Browse files
authored
Merge pull request #183 from cmu-delphi/ml-182-epi_df-additional-metadata
[Issue 182] Update `epi_df` examples, check `additional_metadata` type at construction
2 parents 4703684 + 0c2d263 commit f7309f2

File tree

6 files changed

+58
-21
lines changed

6 files changed

+58
-21
lines changed

R/epi_df.R

+19-8
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,9 @@ NULL
105105
#' then the current day-time will be used.
106106
#' @param additional_metadata List of additional metadata to attach to the
107107
#' `epi_df` object. The metadata will have `geo_type`, `time_type`, and
108-
#' `as_of` fields; named entries from the passed list or will be included as
109-
#' well.
108+
#' `as_of` fields; named entries from the passed list will be included as
109+
#' well. If your tibble has additional keys, be sure to specify them as a
110+
#' character vector in the `other_keys` component of `additional_metadata`.
110111
#' @param ... Additional arguments passed to methods.
111112
#' @return An `epi_df` object.
112113
#'
@@ -117,7 +118,11 @@ new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of,
117118
if (!is.data.frame(x)) {
118119
Abort("`x` must be a data frame.")
119120
}
120-
121+
122+
if (!is.list(additional_metadata)) {
123+
Abort("`additional_metadata` must be a list type.")
124+
}
125+
121126
# If geo type is missing, then try to guess it
122127
if (missing(geo_type)) {
123128
geo_type = guess_geo_type(x$geo_value)
@@ -184,8 +189,9 @@ new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of,
184189
#' then the current day-time will be used.
185190
#' @param additional_metadata List of additional metadata to attach to the
186191
#' `epi_df` object. The metadata will have `geo_type`, `time_type`, and
187-
#' `as_of` fields; named entries from the passed list or will be included as
188-
#' well.
192+
#' `as_of` fields; named entries from the passed list will be included as
193+
#' well. If your tibble has additional keys, be sure to specify them as a
194+
#' character vector in the `other_keys` component of `additional_metadata`.
189195
#' @param ... Additional arguments passed to methods.
190196
#' @return An `epi_df` object.
191197
#'
@@ -230,7 +236,7 @@ new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of,
230236
#'
231237
#' ex2 <- ex2_input %>% dplyr::rename(geo_value = state, time_value = reported_date) %>%
232238
#' as_epi_df(geo_type = "state", as_of = "2020-06-03",
233-
#' additional_metadata = c(other_keys = "pol"))
239+
#' additional_metadata = list(other_keys = "pol"))
234240
#'
235241
#' attr(ex2,"metadata")
236242
#'
@@ -244,8 +250,13 @@ new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of,
244250
#'
245251
#' ex3 <- ex3_input %>%
246252
#' tsibble::as_tsibble() %>% # needed to add the additional metadata
247-
#' dplyr::mutate(state = rep("MA",6)) %>%
248-
#' as_epi_df(additional_metadata = c(other_keys = "state"))
253+
#' # add 2 extra keys
254+
#' dplyr::mutate(
255+
#' state = rep("MA",6),
256+
#' pol = rep(c("blue", "swing", "swing"), each = 2)) %>%
257+
#' # the 2 extra keys we added have to be specified in the other_keys
258+
#' # component of additional_metadata.
259+
#' as_epi_df(additional_metadata = list(other_keys = c("state", "pol")))
249260
#'
250261
#' attr(ex3,"metadata")
251262
as_epi_df = function(x, ...) {

man/as_epi_df.Rd

+11-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/new_epi_df.Rd

+3-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-epi_df.R

+15
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,18 @@ test_that("new_epi_df works as intended", {
2424
expect_identical(attributes(epi_tib)$metadata$time_type, "day")
2525
expect_true(lubridate::is.POSIXt(attributes(epi_tib)$metadata$as_of))
2626
})
27+
28+
test_that("as_epi_df errors when additional_metadata is not a list", {
29+
# This is the 3rd example from as_epi_df
30+
ex_input <- jhu_csse_county_level_subset %>%
31+
dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") %>%
32+
dplyr::slice_tail(n = 6) %>%
33+
tsibble::as_tsibble() %>%
34+
dplyr::mutate(
35+
state = rep("MA",6),
36+
pol = rep(c("blue", "swing", "swing"), each = 2))
37+
38+
expect_error(
39+
as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")),
40+
"`additional_metadata` must be a list type.")
41+
})

tests/testthat/test-methods-epi_df.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ toy_epi_df <- tibble::tibble(
88
), times = 2),
99
geo_value = rep(c("ca", "hi"), each = 5),
1010
indicator_var = as.factor(rep(1:2, times = 5)),
11-
) %>% as_epi_df(additional_metadata = c(other_keys = "indicator_var"))
11+
) %>% as_epi_df(additional_metadata = list(other_keys = "indicator_var"))
1212

1313
att_toy = attr(toy_epi_df, "metadata")
1414

vignettes/epiprocess.Rmd

+9-5
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ head(ex2)
180180
181181
ex2 <- ex2 %>% rename(geo_value = state, time_value = reported_date) %>%
182182
as_epi_df(geo_type = "state", as_of = "2020-06-03",
183-
additional_metadata = c(other_keys = "pol"))
183+
additional_metadata = list(other_keys = "pol"))
184184
185185
attr(ex2,"metadata")
186186
```
@@ -200,17 +200,21 @@ ex3 <- jhu_csse_county_level_subset %>%
200200
attr(ex3,"metadata") # geo_type is county currently
201201
```
202202

203-
Now we add state (MA) as a new column and a key to the metadata. Reminder that lower case state name abbreviations are what we would expect if this were a `geo_value` column.
204-
```{r}
203+
Now we add `state` (MA) and `pol` as new columns to the data and as new keys to the metadata. Reminder that lower case state name abbreviations are what we would expect if this were a `geo_value` column.
205204

205+
```{r}
206206
ex3 <- ex3 %>%
207207
as_tibble() %>% # needed to add the additional metadata
208-
mutate(state = rep(tolower("MA"),6)) %>%
209-
as_epi_df(additional_metadata = c(other_keys = "state"))
208+
mutate(
209+
state = rep(tolower("MA"),6),
210+
pol = rep(c("blue", "swing", "swing"), each = 2)) %>%
211+
as_epi_df(additional_metadata = list(other_keys = c("state", "pol")))
210212
211213
attr(ex3,"metadata")
212214
```
213215

216+
Note that the two additional keys we added, `state` and `pol`, are specified as a character vector in the `other_keys` component of the `additional_metadata` list. They must be specified in this manner so that downstream actions on the `epi_df`, like model fitting and prediction, can recognize and use these keys.
217+
214218
Currently `other_keys` metadata in `epi_df` doesn't impact `epi_slide()`, contrary to `other_keys` in `as_epi_archive` which affects how the update data is interpreted.
215219

216220
## Working with `epi_df` objects downstream

0 commit comments

Comments
 (0)