-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscore_flusight_forecasts.R
113 lines (89 loc) · 3.05 KB
/
score_flusight_forecasts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#Script to query FluSight scores
#Set-up and load forecast data
library(hubUtils)
library(scoringutils)
library(covidHubUtils)
library(lubridate)
library(dplyr)
library(ggplot2)
library(plotly)
library(here)
setwd(here::here())
current_ref_date <- lubridate::ceiling_date(Sys.Date(), "week") - lubridate::days(1)
hub_path <- "../FluSight-forecast-hub"
hub_con <- connect_hub(hub_path)
raw_forecasts <- hub_con |>
dplyr::filter(
output_type == "quantile"
) |>
dplyr::collect() |>
as_model_out_tbl()
table(raw_forecasts$model_id)
head(raw_forecasts)
#create log of forecast data
log_forecasts <- raw_forecasts |>
dplyr::mutate(value_log=log_shift(value,offset=1))|>
dplyr::select(-value)|>
dplyr::rename(value=value_log)
head(log_forecasts)
#Load raw target data
raw_truth <- readr::read_csv("https://raw.githubusercontent.com/cdcepi/FluSight-forecast-hub/main/target-data/target-hospital-admissions.csv")
head(raw_truth)
#create log of target data
log_truth <- raw_truth |>
dplyr::mutate(value_log=log_shift(value,offset=1))|>
dplyr::select(-value)|>
dplyr::rename(value=value_log)
head(log_truth)
#merge together raw forecast and target data
raw_data <- raw_forecasts |>
dplyr::filter(horizon > -1) |>
dplyr::left_join(
raw_truth |> dplyr::select(target_end_date = date, location, location_name, true_value = value),
by = c("location", "target_end_date")
) |>
dplyr::rename(model=model_id, quantile=output_type_id, prediction=value) |>
dplyr::mutate(quantile = as.numeric(quantile))
head(raw_data)
#confirm things set up correctly
raw_data|>
scoringutils::check_forecasts()
#merge together log forecast and target data
log_data <- log_forecasts |>
dplyr::filter(horizon > -1) |>
dplyr::left_join(
raw_truth |> dplyr::select(target_end_date = date, location,location_name, true_value = value),
by = c("location", "target_end_date")
) |>
dplyr::rename(model=model_id, quantile=output_type_id, prediction=value) |>
dplyr::mutate(quantile = as.numeric(quantile))
head(log_data)
#confirm things set up correctly
log_data|>
scoringutils::check_forecasts()
#score raw data
raw_scores <- raw_data |>
scoringutils::score()
head(raw_scores)
#add interval scores
scores_raw <- raw_scores |>
add_coverage(ranges = c(50, 80, 95), by = c("model", "reference_date")) |>
summarise_scores(by = c("model", "reference_date"))
head(scores_raw)
#score log data
log_scores <- log_data |>
scoringutils::score()
head(log_scores)
#add interval scores
scores_log <- log_scores |>
add_coverage(ranges = c(50, 80, 95), by = c("model", "reference_date")) |>
summarise_scores(by = c("model", "reference_date"))
head(scores_log)
#write rda
setwd("~/github/flu-hosp-models-2021-2022/reports")
save(raw_data, file = "raw_data.rda")
save(log_data, file = "log_data.rda")
save(raw_scores, file = "raw_scores.rda")
save(log_scores, file = "log_scores.rda")
save(raw_truth, file = "raw_truth.rda")
save(log_truth, file = "log_truth.rda")