Skip to content

Commit 18d1725

Browse files
authored
Merge pull request #433 from cmu-delphi/380-add-forecast_date-recipe
380 add forecast date recipe
2 parents eb02b76 + 4814377 commit 18d1725

23 files changed

+151
-317
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: epipredict
22
Title: Basic epidemiology forecasting methods
3-
Version: 0.1.6
3+
Version: 0.1.7
44
Authors@R: c(
55
person("Daniel J.", "McDonald", , "[email protected]", role = c("aut", "cre")),
66
person("Ryan", "Tibshirani", , "[email protected]", role = "aut"),

NAMESPACE

-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ S3method(bake,step_population_scaling)
2929
S3method(bake,step_training_window)
3030
S3method(detect_layer,frosting)
3131
S3method(detect_layer,workflow)
32-
S3method(epi_recipe,default)
3332
S3method(epi_recipe,epi_df)
3433
S3method(epi_recipe,formula)
3534
S3method(extract_argument,epi_workflow)

NEWS.md

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicat
1919
- Make key column inference more consistent within the package and with current `epiprocess`.
2020
- Fix `quantile_reg()` producing error when asked to output just median-level predictions.
2121
- (temporary) ahead negative is allowed for `step_epi_ahead` until we have `step_epi_shift`
22+
- Add `reference_date` as an argument to `epi_recipe()`
2223

2324
## Bug fixes
2425
- Shifting no columns results in no error for either `step_epi_ahead` and `step_epi_lag`

R/epi_recipe.R

+98-87
Original file line numberDiff line numberDiff line change
@@ -13,30 +13,34 @@ epi_recipe <- function(x, ...) {
1313
}
1414

1515

16-
#' @rdname epi_recipe
17-
#' @export
18-
epi_recipe.default <- function(x, ...) {
19-
cli_abort(paste(
20-
"`x` must be an {.cls epi_df} or a {.cls formula},",
21-
"not a {.cls {class(x)[[1]]}}."
22-
))
23-
}
2416

2517
#' @rdname epi_recipe
2618
#' @inheritParams recipes::recipe
2719
#' @param roles A character string (the same length of `vars`) that
2820
#' describes a single role that the variable will take. This value could be
2921
#' anything but common roles are `"outcome"`, `"predictor"`,
3022
#' `"time_value"`, and `"geo_value"`
23+
#' @param reference_date Either a date of the same class as the `time_value`
24+
#' column in the `epi_df` or `NULL`. If a date, it gives the date to which all
25+
#' operations are relative. Typically, in real-time tasks this is the date that
26+
#' the model is created (and presumably trained). In forecasting, this is
27+
#' often the same as the most recent date of
28+
#' data availability, but when data is "latent" (reported after the date to
29+
#' which it corresponds), or if performing a nowcast, the `reference_date` may
30+
#' be later than this. Setting `reference_date`
31+
#' to a value BEFORE the most recent data is not a true "forecast",
32+
#' because future data is being used to create the model, but this may be
33+
#' reasonable in model building, nowcasting (predicting finalized values from
34+
#' preliminary data), or if producing a backcast. If `NULL`, it will be set
35+
#' to the `as_of` date of the `epi_df`.
3136
#' @param ... Further arguments passed to or from other methods (not currently
3237
#' used).
3338
#' @param formula A model formula. No in-line functions should be used here
3439
#' (e.g. `log(x)`, `x:y`, etc.) and minus signs are not allowed. These types of
3540
#' transformations should be enacted using `step` functions in this package.
3641
#' Dots are allowed as are simple multivariate outcome terms (i.e. no need for
3742
#' `cbind`; see Examples).
38-
#' @param x,data A data frame, tibble, or epi_df of the *template* data set
39-
#' (see below). This is always coerced to the first row to avoid memory issues
43+
#' @param x,data An epi_df of the *template* data set (see below).
4044
#' @inherit recipes::recipe return
4145
#'
4246
#' @export
@@ -56,100 +60,107 @@ epi_recipe.default <- function(x, ...) {
5660
#' step_naomit(all_outcomes(), skip = TRUE)
5761
#'
5862
#' r
59-
epi_recipe.epi_df <-
60-
function(x, formula = NULL, ..., vars = NULL, roles = NULL) {
61-
attr(x, "decay_to_tibble") <- FALSE
62-
if (!is.null(formula)) {
63-
if (!is.null(vars)) {
64-
cli_abort(paste0(
65-
"This `vars` specification will be ignored ",
63+
epi_recipe.epi_df <- function(x,
64+
reference_date = NULL,
65+
formula = NULL,
66+
...,
67+
vars = NULL,
68+
roles = NULL) {
69+
attr(x, "decay_to_tibble") <- FALSE
70+
if (!is.null(formula)) {
71+
if (!is.null(vars)) {
72+
cli_abort(paste0(
73+
"This `vars` specification will be ignored ",
74+
"when a formula is used"
75+
))
76+
}
77+
if (!is.null(roles)) {
78+
cli_abort(
79+
paste0(
80+
"This `roles` specification will be ignored ",
6681
"when a formula is used"
67-
))
68-
}
69-
if (!is.null(roles)) {
70-
cli_abort(
71-
paste0(
72-
"This `roles` specification will be ignored ",
73-
"when a formula is used"
74-
)
7582
)
76-
}
77-
78-
obj <- epi_recipe.formula(formula, x, ...)
79-
return(obj)
80-
}
81-
if (is.null(vars)) vars <- colnames(x)
82-
if (any(table(vars) > 1)) {
83-
cli_abort("`vars` should have unique members")
84-
}
85-
if (any(!(vars %in% colnames(x)))) {
86-
cli_abort("1 or more elements of `vars` are not in the data")
83+
)
8784
}
8885

89-
keys <- key_colnames(x) # we know x is an epi_df
86+
obj <- epi_recipe.formula(formula, x, ...)
87+
return(obj)
88+
}
89+
if (is.null(vars)) vars <- colnames(x)
90+
if (any(table(vars) > 1)) {
91+
cli_abort("`vars` should have unique members")
92+
}
93+
if (any(!(vars %in% colnames(x)))) {
94+
cli_abort("1 or more elements of `vars` are not in the data")
95+
}
9096

91-
var_info <- tibble(variable = vars)
92-
key_roles <- c("geo_value", rep("key", length(keys) - 2), "time_value")
97+
keys <- key_colnames(x) # we know x is an epi_df
9398

94-
## Check and add roles when available
95-
if (!is.null(roles)) {
96-
if (length(roles) != length(vars)) {
97-
cli_abort(paste0(
98-
"The number of roles should be the same as the number of ",
99-
"variables."
100-
))
101-
}
102-
var_info$role <- roles
103-
} else {
104-
var_info <- var_info %>% filter(!(variable %in% keys))
105-
var_info$role <- "raw"
106-
}
107-
## Now we add the keys when necessary
108-
var_info <- dplyr::union(
109-
var_info,
110-
tibble::tibble(variable = keys, role = key_roles)
111-
)
99+
var_info <- tibble(variable = vars)
100+
key_roles <- c("geo_value", rep("key", length(keys) - 2), "time_value")
112101

113-
## Add types
114-
var_info <- full_join(recipes:::get_types(x), var_info, by = "variable")
115-
var_info$source <- "original"
116-
117-
## arrange to easy order
118-
var_info <- var_info %>%
119-
arrange(factor(
120-
role,
121-
levels = union(
122-
c("predictor", "outcome", "time_value", "geo_value", "key"),
123-
unique(role)
124-
) # anything else
102+
## Check and add roles when available
103+
if (!is.null(roles)) {
104+
if (length(roles) != length(vars)) {
105+
cli_abort(paste0(
106+
"The number of roles should be the same as the number of ",
107+
"variables."
125108
))
126-
127-
## Return final object of class `recipe`
128-
out <- list(
129-
var_info = var_info,
130-
term_info = var_info,
131-
steps = NULL,
132-
template = x[1, ],
133-
max_time_value = max(x$time_value),
134-
levels = NULL,
135-
retained = NA
136-
)
137-
class(out) <- c("epi_recipe", "recipe")
138-
out
109+
}
110+
var_info$role <- roles
111+
} else {
112+
var_info <- var_info %>% filter(!(variable %in% keys))
113+
var_info$role <- "raw"
139114
}
115+
## Now we add the keys when necessary
116+
var_info <- dplyr::union(
117+
var_info,
118+
tibble::tibble(variable = keys, role = key_roles)
119+
)
120+
121+
## Add types
122+
var_info <- full_join(recipes:::get_types(x), var_info, by = "variable")
123+
var_info$source <- "original"
124+
125+
## arrange to easy order
126+
var_info <- var_info %>%
127+
arrange(factor(
128+
role,
129+
levels = union(
130+
c("predictor", "outcome", "time_value", "geo_value", "key"),
131+
unique(role)
132+
) # anything else
133+
))
134+
135+
## Return final object of class `recipe`
136+
max_time_value <- max(x$time_value)
137+
reference_date <- reference_date %||% attr(x, "metadata")$as_of
138+
out <- list(
139+
var_info = var_info,
140+
term_info = var_info,
141+
steps = NULL,
142+
template = x[1, ],
143+
max_time_value = max_time_value,
144+
reference_date = reference_date,
145+
levels = NULL,
146+
retained = NA
147+
)
148+
class(out) <- c("epi_recipe", "recipe")
149+
out
150+
}
140151

141152

142153
#' @rdname epi_recipe
143154
#' @export
144-
epi_recipe.formula <- function(formula, data, ...) {
155+
epi_recipe.formula <- function(formula, data, reference_date = NULL, ...) {
145156
# we ensure that there's only 1 row in the template
146157
data <- data[1, ]
147158
# check for minus:
148159
if (!epiprocess::is_epi_df(data)) {
149-
cli_abort(paste(
150-
"`epi_recipe()` has been called with a non-{.cls epi_df} object.",
151-
"Use `recipe()` instead."
152-
))
160+
cli_abort(
161+
"`epi_recipe()` has been called with a non-{.cls epi_df} object.
162+
Use `recipe()` instead."
163+
)
153164
}
154165

155166
attr(data, "decay_to_tibble") <- FALSE

README.md

+11-10
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ covid_case_death_rates
7878
#> An `epi_df` object, 20,496 x 4 with metadata:
7979
#> * geo_type = state
8080
#> * time_type = day
81-
#> * as_of = 2022-05-31 12:08:25.791826
81+
#> * as_of = 2022-05-31
8282
#>
8383
#> # A tibble: 20,496 × 4
8484
#> geo_value time_value case_rate death_rate
@@ -92,7 +92,7 @@ covid_case_death_rates
9292
#> 7 co 2020-12-31 35.8 0.649
9393
#> 8 ct 2020-12-31 52.1 0.819
9494
#> 9 dc 2020-12-31 31.0 0.601
95-
#> 10 de 2020-12-31 65.2 0.807
95+
#> 10 de 2020-12-31 64.3 0.912
9696
#> # ℹ 20,486 more rows
9797
```
9898

@@ -113,19 +113,20 @@ two_week_ahead <- arx_forecaster(
113113
two_week_ahead
114114
#> ══ A basic forecaster of type ARX Forecaster ═══════════════════════════════
115115
#>
116-
#> This forecaster was fit on 2024-11-11 11:38:31.
116+
#> This forecaster was fit on 2025-01-23 14:01:04.
117117
#>
118118
#> Training data was an <epi_df> with:
119119
#> • Geography: state,
120120
#> • Time type: day,
121-
#> • Using data up-to-date as of: 2022-05-31 12:08:25.
121+
#> • Using data up-to-date as of: 2022-05-31.
122+
#> • With the last data available on 2021-12-31
122123
#>
123124
#> ── Predictions ─────────────────────────────────────────────────────────────
124125
#>
125126
#> A total of 56 predictions are available for
126127
#> • 56 unique geographic regions,
127128
#> • At forecast date: 2021-12-31,
128-
#> • For target date: 2022-01-14.
129+
#> • For target date: 2022-01-14,
129130
#>
130131
```
131132

@@ -161,11 +162,11 @@ two_week_ahead$epi_workflow
161162
#>
162163
#> Coefficients:
163164
#> (Intercept) lag_0_case_rate lag_1_case_rate lag_2_case_rate
164-
#> -0.0073358 0.0030365 0.0012467 0.0009536
165+
#> -0.0072151 0.0030311 0.0012525 0.0009551
165166
#> lag_3_case_rate lag_7_case_rate lag_14_case_rate lag_0_death_rate
166-
#> 0.0011425 0.0012481 0.0003041 0.1351769
167+
#> 0.0011488 0.0012238 0.0003301 0.1348459
167168
#> lag_7_death_rate lag_14_death_rate
168-
#> 0.1471127 0.1062473
169+
#> 0.1468325 0.1056316
169170
#>
170171
#> ── Postprocessor ───────────────────────────────────────────────────────────
171172
#>
@@ -188,11 +189,11 @@ two_week_ahead$predictions
188189
#> # A tibble: 56 × 5
189190
#> geo_value .pred .pred_distn forecast_date target_date
190191
#> <chr> <dbl> <dist> <date> <date>
191-
#> 1 ak 0.449 quantiles(0.45)[2] 2021-12-31 2022-01-14
192+
#> 1 ak 0.448 quantiles(0.45)[2] 2021-12-31 2022-01-14
192193
#> 2 al 0.574 quantiles(0.57)[2] 2021-12-31 2022-01-14
193194
#> 3 ar 0.673 quantiles(0.67)[2] 2021-12-31 2022-01-14
194195
#> 4 as 0 quantiles(0.12)[2] 2021-12-31 2022-01-14
195-
#> 5 az 0.679 quantiles(0.68)[2] 2021-12-31 2022-01-14
196+
#> 5 az 0.678 quantiles(0.68)[2] 2021-12-31 2022-01-14
196197
#> 6 ca 0.575 quantiles(0.57)[2] 2021-12-31 2022-01-14
197198
#> 7 co 0.862 quantiles(0.86)[2] 2021-12-31 2022-01-14
198199
#> 8 ct 1.07 quantiles(1.07)[2] 2021-12-31 2022-01-14

epipredict.Rproj

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Version: 1.0
2+
ProjectId: 51641a05-0347-438c-a50e-466a31e886c2
23

34
RestoreWorkspace: No
45
SaveWorkspace: No

man/epi_recipe.Rd

+24-6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/figures/lifecycle-archived.svg

-21
This file was deleted.

0 commit comments

Comments
 (0)