@@ -13,30 +13,34 @@ epi_recipe <- function(x, ...) {
13
13
}
14
14
15
15
16
- # ' @rdname epi_recipe
17
- # ' @export
18
- epi_recipe.default <- function (x , ... ) {
19
- cli_abort(paste(
20
- " `x` must be an {.cls epi_df} or a {.cls formula}," ,
21
- " not a {.cls {class(x)[[1]]}}."
22
- ))
23
- }
24
16
25
17
# ' @rdname epi_recipe
26
18
# ' @inheritParams recipes::recipe
27
19
# ' @param roles A character string (the same length of `vars`) that
28
20
# ' describes a single role that the variable will take. This value could be
29
21
# ' anything but common roles are `"outcome"`, `"predictor"`,
30
22
# ' `"time_value"`, and `"geo_value"`
23
+ # ' @param reference_date Either a date of the same class as the `time_value`
24
+ # ' column in the `epi_df` or `NULL`. If a date, it gives the date to which all
25
+ # ' operations are relative. Typically, in real-time tasks this is the date that
26
+ # ' the model is created (and presumably trained). In forecasting, this is
27
+ # ' often the same as the most recent date of
28
+ # ' data availability, but when data is "latent" (reported after the date to
29
+ # ' which it corresponds), or if performing a nowcast, the `reference_date` may
30
+ # ' be later than this. Setting `reference_date`
31
+ # ' to a value BEFORE the most recent data is not a true "forecast",
32
+ # ' because future data is being used to create the model, but this may be
33
+ # ' reasonable in model building, nowcasting (predicting finalized values from
34
+ # ' preliminary data), or if producing a backcast. If `NULL`, it will be set
35
+ # ' to the `as_of` date of the `epi_df`.
31
36
# ' @param ... Further arguments passed to or from other methods (not currently
32
37
# ' used).
33
38
# ' @param formula A model formula. No in-line functions should be used here
34
39
# ' (e.g. `log(x)`, `x:y`, etc.) and minus signs are not allowed. These types of
35
40
# ' transformations should be enacted using `step` functions in this package.
36
41
# ' Dots are allowed as are simple multivariate outcome terms (i.e. no need for
37
42
# ' `cbind`; see Examples).
38
- # ' @param x,data A data frame, tibble, or epi_df of the *template* data set
39
- # ' (see below). This is always coerced to the first row to avoid memory issues
43
+ # ' @param x,data An epi_df of the *template* data set (see below).
40
44
# ' @inherit recipes::recipe return
41
45
# '
42
46
# ' @export
@@ -56,100 +60,107 @@ epi_recipe.default <- function(x, ...) {
56
60
# ' step_naomit(all_outcomes(), skip = TRUE)
57
61
# '
58
62
# ' r
59
- epi_recipe.epi_df <-
60
- function (x , formula = NULL , ... , vars = NULL , roles = NULL ) {
61
- attr(x , " decay_to_tibble" ) <- FALSE
62
- if (! is.null(formula )) {
63
- if (! is.null(vars )) {
64
- cli_abort(paste0(
65
- " This `vars` specification will be ignored " ,
63
+ epi_recipe.epi_df <- function (x ,
64
+ reference_date = NULL ,
65
+ formula = NULL ,
66
+ ... ,
67
+ vars = NULL ,
68
+ roles = NULL ) {
69
+ attr(x , " decay_to_tibble" ) <- FALSE
70
+ if (! is.null(formula )) {
71
+ if (! is.null(vars )) {
72
+ cli_abort(paste0(
73
+ " This `vars` specification will be ignored " ,
74
+ " when a formula is used"
75
+ ))
76
+ }
77
+ if (! is.null(roles )) {
78
+ cli_abort(
79
+ paste0(
80
+ " This `roles` specification will be ignored " ,
66
81
" when a formula is used"
67
- ))
68
- }
69
- if (! is.null(roles )) {
70
- cli_abort(
71
- paste0(
72
- " This `roles` specification will be ignored " ,
73
- " when a formula is used"
74
- )
75
82
)
76
- }
77
-
78
- obj <- epi_recipe.formula(formula , x , ... )
79
- return (obj )
80
- }
81
- if (is.null(vars )) vars <- colnames(x )
82
- if (any(table(vars ) > 1 )) {
83
- cli_abort(" `vars` should have unique members" )
84
- }
85
- if (any(! (vars %in% colnames(x )))) {
86
- cli_abort(" 1 or more elements of `vars` are not in the data" )
83
+ )
87
84
}
88
85
89
- keys <- key_colnames(x ) # we know x is an epi_df
86
+ obj <- epi_recipe.formula(formula , x , ... )
87
+ return (obj )
88
+ }
89
+ if (is.null(vars )) vars <- colnames(x )
90
+ if (any(table(vars ) > 1 )) {
91
+ cli_abort(" `vars` should have unique members" )
92
+ }
93
+ if (any(! (vars %in% colnames(x )))) {
94
+ cli_abort(" 1 or more elements of `vars` are not in the data" )
95
+ }
90
96
91
- var_info <- tibble(variable = vars )
92
- key_roles <- c(" geo_value" , rep(" key" , length(keys ) - 2 ), " time_value" )
97
+ keys <- key_colnames(x ) # we know x is an epi_df
93
98
94
- # # Check and add roles when available
95
- if (! is.null(roles )) {
96
- if (length(roles ) != length(vars )) {
97
- cli_abort(paste0(
98
- " The number of roles should be the same as the number of " ,
99
- " variables."
100
- ))
101
- }
102
- var_info $ role <- roles
103
- } else {
104
- var_info <- var_info %> % filter(! (variable %in% keys ))
105
- var_info $ role <- " raw"
106
- }
107
- # # Now we add the keys when necessary
108
- var_info <- dplyr :: union(
109
- var_info ,
110
- tibble :: tibble(variable = keys , role = key_roles )
111
- )
99
+ var_info <- tibble(variable = vars )
100
+ key_roles <- c(" geo_value" , rep(" key" , length(keys ) - 2 ), " time_value" )
112
101
113
- # # Add types
114
- var_info <- full_join(recipes ::: get_types(x ), var_info , by = " variable" )
115
- var_info $ source <- " original"
116
-
117
- # # arrange to easy order
118
- var_info <- var_info %> %
119
- arrange(factor (
120
- role ,
121
- levels = union(
122
- c(" predictor" , " outcome" , " time_value" , " geo_value" , " key" ),
123
- unique(role )
124
- ) # anything else
102
+ # # Check and add roles when available
103
+ if (! is.null(roles )) {
104
+ if (length(roles ) != length(vars )) {
105
+ cli_abort(paste0(
106
+ " The number of roles should be the same as the number of " ,
107
+ " variables."
125
108
))
126
-
127
- # # Return final object of class `recipe`
128
- out <- list (
129
- var_info = var_info ,
130
- term_info = var_info ,
131
- steps = NULL ,
132
- template = x [1 , ],
133
- max_time_value = max(x $ time_value ),
134
- levels = NULL ,
135
- retained = NA
136
- )
137
- class(out ) <- c(" epi_recipe" , " recipe" )
138
- out
109
+ }
110
+ var_info $ role <- roles
111
+ } else {
112
+ var_info <- var_info %> % filter(! (variable %in% keys ))
113
+ var_info $ role <- " raw"
139
114
}
115
+ # # Now we add the keys when necessary
116
+ var_info <- dplyr :: union(
117
+ var_info ,
118
+ tibble :: tibble(variable = keys , role = key_roles )
119
+ )
120
+
121
+ # # Add types
122
+ var_info <- full_join(recipes ::: get_types(x ), var_info , by = " variable" )
123
+ var_info $ source <- " original"
124
+
125
+ # # arrange to easy order
126
+ var_info <- var_info %> %
127
+ arrange(factor (
128
+ role ,
129
+ levels = union(
130
+ c(" predictor" , " outcome" , " time_value" , " geo_value" , " key" ),
131
+ unique(role )
132
+ ) # anything else
133
+ ))
134
+
135
+ # # Return final object of class `recipe`
136
+ max_time_value <- max(x $ time_value )
137
+ reference_date <- reference_date %|| % attr(x , " metadata" )$ as_of
138
+ out <- list (
139
+ var_info = var_info ,
140
+ term_info = var_info ,
141
+ steps = NULL ,
142
+ template = x [1 , ],
143
+ max_time_value = max_time_value ,
144
+ reference_date = reference_date ,
145
+ levels = NULL ,
146
+ retained = NA
147
+ )
148
+ class(out ) <- c(" epi_recipe" , " recipe" )
149
+ out
150
+ }
140
151
141
152
142
153
# ' @rdname epi_recipe
143
154
# ' @export
144
- epi_recipe.formula <- function (formula , data , ... ) {
155
+ epi_recipe.formula <- function (formula , data , reference_date = NULL , ... ) {
145
156
# we ensure that there's only 1 row in the template
146
157
data <- data [1 , ]
147
158
# check for minus:
148
159
if (! epiprocess :: is_epi_df(data )) {
149
- cli_abort(paste(
150
- " `epi_recipe()` has been called with a non-{.cls epi_df} object." ,
151
- " Use `recipe()` instead."
152
- ))
160
+ cli_abort(
161
+ " `epi_recipe()` has been called with a non-{.cls epi_df} object.
162
+ Use `recipe()` instead."
163
+ )
153
164
}
154
165
155
166
attr(data , " decay_to_tibble" ) <- FALSE
0 commit comments