@@ -14,7 +14,35 @@ knitr::opts_chunk$set(
14
14
)
15
15
```
16
16
17
- # epipredict
17
+ ``` {r coloration, include=FALSE, echo=FALSE}
18
+ base <- "#002676"
19
+ primary <- "#941120"
20
+ secondary <- "#f9c80e"
21
+ tertiary <- "#177245"
22
+ fourth_colour <- "#A393BF"
23
+ fifth_colour <- "#2e8edd"
24
+ colvec <- c(
25
+ base = base, primary = primary, secondary = secondary,
26
+ tertiary = tertiary, fourth_colour = fourth_colour,
27
+ fifth_colour = fifth_colour
28
+ )
29
+ library(epiprocess)
30
+ suppressMessages(library(tidyverse))
31
+ theme_update(legend.position = "bottom", legend.title = element_blank())
32
+ delphi_pal <- function(n) {
33
+ if (n > 6L) warning("Not enough colors in this palette!")
34
+ unname(colvec)[1:n]
35
+ }
36
+ scale_fill_delphi <- function(..., aesthetics = "fill") {
37
+ discrete_scale(aesthetics = aesthetics, palette = delphi_pal, ...)
38
+ }
39
+ scale_color_delphi <- function(..., aesthetics = "color") {
40
+ discrete_scale(aesthetics = aesthetics, palette = delphi_pal, ...)
41
+ }
42
+ scale_colour_delphi <- scale_color_delphi
43
+ ```
44
+
45
+ # Epipredict
18
46
19
47
<!-- badges: start -->
20
48
[ ![ R-CMD-check] ( https://github.com/cmu-delphi/epipredict/actions/workflows/R-CMD-check.yaml/badge.svg )] ( https://github.com/cmu-delphi/epipredict/actions/workflows/R-CMD-check.yaml )
@@ -51,7 +79,8 @@ cases <- pub_covidcast(
51
79
time_type = "day",
52
80
geo_type = "state",
53
81
time_values = epirange(20200601, 20220101),
54
- geo_values = "*") |>
82
+ geo_values = "*"
83
+ ) |>
55
84
select(geo_value, time_value, case_rate = value)
56
85
57
86
deaths <- pub_covidcast(
@@ -60,7 +89,8 @@ deaths <- pub_covidcast(
60
89
time_type = "day",
61
90
geo_type = "state",
62
91
time_values = epirange(20200601, 20220101),
63
- geo_values = "*") |>
92
+ geo_values = "*"
93
+ ) |>
64
94
select(geo_value, time_value, death_rate = value)
65
95
cases_deaths <-
66
96
full_join(cases, deaths, by = c("time_value", "geo_value")) |>
@@ -81,36 +111,39 @@ First, to eliminate some of the noise coming from daily reporting, we do 7 day a
81
111
82
112
[ ^ 1 ] : This makes it so that any given day of the processed timeseries only depends on the previous week, which means that we avoid leaking future values when making a forecast.
83
113
84
- * Basic. Has data, calls forecaster with default arguments.
85
- * Intermediate. Wants to examine changes to the arguments, take advantage of
86
- built in flexibility.
87
- * Advanced. Wants to write their own forecasters. Maybe willing to build up
88
- from some components.
89
-
90
- The Advanced user should find their task to be relatively easy. Examples of
91
- these tasks are illustrated in the [ vignettes and articles] ( https://cmu-delphi.github.io/epipredict ) .
92
-
93
- See also the (in progress) [ Forecasting Book] ( https://cmu-delphi.github.io/delphi-tooling-book/ ) .
94
-
95
- ## Intermediate example
96
-
97
- The package comes with some built-in historical data for illustration, but
98
- up-to-date versions of this could be downloaded with the
99
- [ ` {epidatr} ` package] ( https://cmu-delphi.github.io/epidatr/ )
100
- and processed using
101
- [ ` {epiprocess} ` ] ( https://cmu-delphi.github.io/epiprocess/ ) .[ ^ 1 ]
102
-
103
- [ ^ 1 ] : Other epidemiological signals for non-Covid related illnesses are also
104
- available with [ ` {epidatr} ` ] ( https://github.com/cmu-delphi/epidatr ) which
105
- interfaces directly to Delphi's
106
- [ Epidata API] ( https://cmu-delphi.github.io/delphi-epidata/ )
107
-
108
- ``` {r epidf, message=FALSE}
109
- library(epipredict)
110
- covid_case_death_rates
114
+ ``` {r smooth}
115
+ cases_deaths <-
116
+ cases_deaths |>
117
+ group_by(geo_value) |>
118
+ epi_slide(
119
+ cases_7dav = mean(case_rate, na.rm = TRUE),
120
+ death_rate_7dav = mean(death_rate, na.rm = TRUE),
121
+ .window_size = 7
122
+ ) |>
123
+ ungroup() |>
124
+ mutate(case_rate = NULL, death_rate = NULL) |>
125
+ rename(case_rate = cases_7dav, death_rate = death_rate_7dav)
111
126
```
112
127
113
- To create and train a simple auto-regressive forecaster to predict the death rate two weeks into the future using past (lagged) deaths and cases, we could use the following function.
128
+ Then trimming outliers, most especially negative values:
129
+ ``` {r outlier}
130
+ cases_deaths <-
131
+ cases_deaths |>
132
+ group_by(geo_value) |>
133
+ mutate(
134
+ outlr_death_rate = detect_outlr_rm(time_value, death_rate, detect_negatives = TRUE),
135
+ outlr_case_rate = detect_outlr_rm(time_value, case_rate, detect_negatives = TRUE)
136
+ ) |>
137
+ unnest(cols = starts_with("outlr"), names_sep = "_") |>
138
+ ungroup() |>
139
+ mutate(
140
+ death_rate = outlr_death_rate_replacement,
141
+ case_rate = outlr_case_rate_replacement
142
+ ) |>
143
+ select(geo_value, time_value, case_rate, death_rate)
144
+ cases_deaths
145
+ ```
146
+ </details >
114
147
115
148
After having downloaded and cleaned the data in ` cases_deaths ` , we plot a subset
116
149
of the states, noting the actual forecast date:
@@ -121,8 +154,8 @@ of the states, noting the actual forecast date:
121
154
forecast_date_label <-
122
155
tibble(
123
156
geo_value = rep(plot_locations, 2),
124
- source = c(rep("case_rate",4), rep("death_rate", 4)),
125
- dates = rep(forecast_date - 7* 2, 2 * length(plot_locations)),
157
+ source = c(rep("case_rate", 4), rep("death_rate", 4)),
158
+ dates = rep(forecast_date - 7 * 2, 2 * length(plot_locations)),
126
159
heights = c(rep(150, 4), rep(1.0, 4))
127
160
)
128
161
processed_data_plot <-
@@ -134,7 +167,8 @@ processed_data_plot <-
134
167
facet_grid(source ~ geo_value, scale = "free") +
135
168
geom_vline(aes(xintercept = forecast_date)) +
136
169
geom_text(
137
- data = forecast_date_label, aes(x=dates, label = "forecast\ndate", y = heights), size = 3, hjust = "right") +
170
+ data = forecast_date_label, aes(x = dates, label = "forecast\ndate", y = heights), size = 3, hjust = "right"
171
+ ) +
138
172
scale_x_date(date_breaks = "3 months", date_labels = "%Y %b") +
139
173
theme(axis.text.x = element_text(angle = 90, hjust = 1))
140
174
```
@@ -185,7 +219,8 @@ narrow_data_plot <-
185
219
facet_grid(source ~ geo_value, scale = "free") +
186
220
geom_vline(aes(xintercept = forecast_date)) +
187
221
geom_text(
188
- data = forecast_date_label, aes(x=dates, label = "forecast\ndate", y = heights), size = 3, hjust = "right") +
222
+ data = forecast_date_label, aes(x = dates, label = "forecast\ndate", y = heights), size = 3, hjust = "right"
223
+ ) +
189
224
scale_x_date(date_breaks = "3 months", date_labels = "%Y %b") +
190
225
theme(axis.text.x = element_text(angle = 90, hjust = 1))
191
226
```
@@ -203,7 +238,8 @@ forecast_plot <-
203
238
epipredict:::plot_bands(
204
239
restricted_predictions,
205
240
levels = 0.9,
206
- fill = primary) +
241
+ fill = primary
242
+ ) +
207
243
geom_point(data = restricted_predictions, aes(y = .data$value), color = secondary)
208
244
```
209
245
0 commit comments