diff --git a/tests/testthat/test-forecasters-data.R b/scripts/test_proj.R similarity index 65% rename from tests/testthat/test-forecasters-data.R rename to scripts/test_proj.R index 4eb97964..69bb93f0 100644 --- a/tests/testthat/test-forecasters-data.R +++ b/scripts/test_proj.R @@ -1,8 +1,106 @@ -suppressPackageStartupMessages(source(here::here("R", "load_all.R"))) +# Test project to test our forecasters on synthetic data. +suppressPackageStartupMessages(source("R/load_all.R")) -testthat::skip("Optional, long-running tests skipped.") +# ================================ GLOBALS ================================= +# Variables prefixed with 'g_' are globals needed by the targets pipeline (they +# need to persist during the actual targets run, since the commands are frozen +# as expressions). + +# Setup targets config. +set_targets_config() +g_aheads <- -1:3 +g_submission_directory <- Sys.getenv("FLU_SUBMISSION_DIRECTORY", "cache") +g_insufficient_data_geos <- c("as", "mp", "vi", "gu") +g_excluded_geos <- c("as", "gu", "mh") +g_time_value_adjust <- 3 +g_fetch_args <- epidatr::fetch_args_list(return_empty = FALSE, timeout_seconds = 400) +g_disease <- "flu" +g_external_object_name <- glue::glue("exploration/2024-2025_{g_disease}_hosp_forecasts.parquet") +# needed for windowed_seasonal +g_very_latent_locations <- list(list( + c("source"), + c("flusurv", "ILI+") +)) +# Date to cut the truth data off at, so we don't have too much of the past for +# plotting. +g_truth_data_date <- "2023-09-01" +# Whether we're running in backtest mode. +# If TRUE, we don't run the report notebook, which is (a) slow and (b) should be +# preserved as an ASOF snapshot of our production results for that week. +# If TRUE, we run a scoring notebook, which scores the historical forecasts +# against the truth data and compares them to the ensemble. +# If FALSE, we run the weekly report notebook. +g_backtest_mode <- as.logical(Sys.getenv("BACKTEST_MODE", FALSE)) +if (!g_backtest_mode) { + # This is the as_of for the forecast. If run on our typical schedule, it's + # today, which is a Wednesday. Sometimes, if we're doing a delayed forecast, + # it's a Thursday. It's used for stamping the data and for determining the + # appropriate as_of when creating the forecast. + g_forecast_generation_dates <- Sys.Date() + # Usually, the forecast_date is the same as the generation date, but you can + # override this. It should be a Wednesday. + g_forecast_dates <- round_date(g_forecast_generation_dates, "weeks", week_start = 3) +} else { + g_forecast_generation_dates <- c(as.Date(c("2024-11-22", "2024-11-27", "2024-12-04", "2024-12-11", "2024-12-18", "2024-12-26", "2025-01-02")), seq.Date(as.Date("2025-01-08"), Sys.Date(), by = 7L)) + g_forecast_dates <- seq.Date(as.Date("2024-11-20"), Sys.Date(), by = 7L) +} + +# TODO: Forecaster definitions. We should have a representative from each forecaster. +g_linear <- function(epi_data, ahead, extra_data, ...) { + epi_data %>% + filter(source == "nhsn") %>% + forecaster_baseline_linear( + ahead, ..., + residual_tail = 0.99, + residual_center = 0.35, + no_intercept = TRUE + ) +} +g_climate_base <- function(epi_data, ahead, extra_data, ...) { + epi_data %>% + filter(source == "nhsn") %>% + climatological_model(ahead, ...) +} +g_climate_geo_agged <- function(epi_data, ahead, extra_data, ...) { + epi_data %>% + filter(source == "nhsn") %>% + climatological_model(ahead, ..., geo_agg = TRUE) +} +g_windowed_seasonal <- function(epi_data, ahead, extra_data, ...) { + scaled_pop_seasonal( + epi_data, + outcome = "value", + ahead = ahead * 7, + ..., + trainer = epipredict::quantile_reg(), + seasonal_method = "window", + pop_scaling = FALSE, + lags = c(0, 7), + keys_to_ignore = g_very_latent_locations + ) %>% + mutate(target_end_date = target_end_date + 3) +} +g_windowed_seasonal_extra_sources <- function(epi_data, ahead, extra_data, ...) { + fcst <- + epi_data %>% + left_join(extra_data, by = join_by(geo_value, time_value)) %>% + scaled_pop_seasonal( + outcome = "value", + ahead = ahead * 7, + extra_sources = "nssp", + ..., + seasonal_method = "window", + trainer = epipredict::quantile_reg(), + drop_non_seasons = TRUE, + pop_scaling = FALSE, + lags = list(c(0, 7), c(0, 7)), + keys_to_ignore = g_very_latent_locations + ) %>% + select(-source) %>% + mutate(target_end_date = target_end_date + 3) %>% + fcst +} -# A list of forecasters to be tested. Add here to test new forecasters. forecasters <- tibble::tribble( ~forecaster, ~forecaster_args, ~forecaster_args_names, ~fc_name, ~outcome, ~extra_sources, ~ahead, scaled_pop, list(TRUE), list("pop_scaling"), "scaled_pop", "a", "", 1, @@ -10,39 +108,9 @@ forecasters <- tibble::tribble( flatline_fc, list(), list(), "flatline_fc", "a", "", 1, smoothed_scaled, list(list(c(0, 7, 14), c(0)), 14, 7), list("lags", "sd_width", "sd_mean_width"), "smoothed_scaled", "a", "", 1, ) -# Which forecasters expect the data to be non-identical? -expects_nonequal <- c("scaled_pop", "smoothed_scaled") - -#' A wrapper for a common call to slide a forecaster over a dataset. -#' -#' @param dataset The dataset to be used for the forecast. -#' @param ii The row of the forecasters table to be used. -#' @param outcome The name of the target column in the dataset. -#' @param extra_sources Any extra columns used for prediction that aren't -#' default. -#' @param expect_linreg_warnings Whether to expect and then suppress warnings -#' from linear_reg. -#' -#' Notes: -#' - n_training_pad is set to avoid warnings from the trainer. -#' - linear_reg doesn't like exactly equal data when training and throws a -#' warning. wrapperfun is used to suppress that. -default_slide_forecaster <- function(dataset, ii, expect_linreg_warnings = TRUE) { - if (any(forecasters$fc_name[[ii]] %in% expects_nonequal) && expect_linreg_warnings) { - wrapperfun <- function(x) { - suppressWarnings(expect_warning(x, regexp = "prediction from rank-deficient fit")) - } - } else { - wrapperfun <- identity - } - args <- forecasters %>% - select(-fc_name) %>% - slice(ii) %>% - purrr::transpose() %>% - pluck(1) - wrapperfun(res <- inject(slide_forecaster(epi_archive = dataset, n_training_pad = 30, !!!args))) - return(res) -} + + +### Datasets TODO: Convert to targets? # Some arbitrary magic numbers used to generate data. synth_mean <- 25 @@ -76,10 +144,12 @@ different_constants <- rbind( ) %>% arrange(version, time_value) %>% epiprocess::as_epi_archive() + different_constants_truth <- different_constants$DT %>% tibble() %>% rename("true_value" = "a", "target_end_date" = "time_value") %>% select(-version) + for (ii in seq_len(nrow(forecasters))) { test_that(paste( forecasters$fc_name[[ii]], diff --git a/test_proj/.gitignore b/test_proj/.gitignore new file mode 100644 index 00000000..94bd9234 --- /dev/null +++ b/test_proj/.gitignore @@ -0,0 +1,10 @@ +# CAUTION: do not edit this file by hand! +# _targets/objects/ may have large data files, +# and _targets/meta/process may have sensitive information. +# It is good pratice to either commit nothing from _targets/, +# or if your data is not too sensitive, +# commit only _targets/meta/meta. +* +!.gitignore +!meta +meta/* diff --git a/tests/testthat/test-forecasters-basics.R b/tests/testthat/test-forecasters-basics.R deleted file mode 100644 index b4a43eb5..00000000 --- a/tests/testthat/test-forecasters-basics.R +++ /dev/null @@ -1,242 +0,0 @@ -suppressPackageStartupMessages(source(here::here("R", "load_all.R"))) -testthat::local_edition(3) -# TODO better way to do this than copypasta -forecasters <- list( - list("scaled_pop", scaled_pop), - list("flatline_fc", flatline_fc), - list("smoothed_scaled", smoothed_scaled, lags = list(c(0, 2, 5), c(0))) - # TODO: flusion is broken? - # list("flusion", flusion), - # TODOO: no_recent_outcome cannot be run without aux_data/apportionment.csv present - # list("no_recent_outcome", no_recent_outcome) -) -for (forecaster in forecasters) { - test_that(paste(forecaster[[1]], "gets the date and columns right"), { - jhu <- epidatasets::covid_case_death_rates %>% - dplyr::filter(time_value >= as.Date("2021-11-01")) - # the as_of for this is wildly far in the future - attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3 - - res <- forecaster[[2]](jhu, "case_rate", c("death_rate"), -2L) - - expect_equal( - names(res), - c("geo_value", "forecast_date", "target_end_date", "quantile", "value") - ) - expect_true(all( - res$target_end_date == - as.Date("2022-01-01") - )) - }) - - test_that(paste(forecaster[[1]], "handles only using 1 column correctly"), { - skip("TODO: fix broken test, no_recent_outcome has an error") - jhu <- epidatasets::covid_case_death_rates %>% - dplyr::filter(time_value >= as.Date("2021-11-01")) - # the as_of for this is wildly far in the future - attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3 - if (forecaster[[1]] == "smoothed_scaled") { - expect_no_error(res <- forecaster[[2]](jhu, "case_rate", "", -2L, lags = forecaster$lags)) - } else { - expect_no_error(suppressWarnings(res <- forecaster[[2]](jhu, "case_rate", "", -2L))) - } - }) - - test_that(paste(forecaster[[1]], "deals with no as_of"), { - skip("TODO: fix broken test, smoothed_scaled has an error") - jhu <- epidatasets::covid_case_death_rates %>% - dplyr::filter(time_value >= as.Date("2021-11-01")) - # what if we have no as_of date? assume they mean the last available data - attributes(jhu)$metadata$as_of <- NULL - expect_snapshot(error = FALSE, res <- forecaster[[2]](jhu, "case_rate", extra_sources = "death_rate", ahead = 2L)) - }) - - test_that(paste(forecaster[[1]], "handles last second NA's"), { - # if the last entries are NA, we should still predict - # TODO: currently this checks that we DON'T predict - jhu <- epidatasets::covid_case_death_rates %>% - dplyr::filter(time_value >= as.Date("2021-11-01")) - geo_values <- jhu$geo_value %>% unique() - one_day_nas <- tibble( - geo_value = geo_values, - time_value = as.Date("2022-01-01"), - case_rate = NA, - death_rate = runif(length(geo_values)) - ) - second_day_nas <- one_day_nas %>% - mutate(time_value = as.Date("2022-01-02")) - jhu_nad <- jhu %>% - as_tibble() %>% - bind_rows(one_day_nas, second_day_nas) %>% - epiprocess::as_epi_df() - attributes(jhu_nad)$metadata$as_of <- max(jhu_nad$time_value) + 3 - expect_no_error(nas_forecast <- forecaster[[2]](jhu_nad, "case_rate", c("death_rate"), ahead = 1)) - # TODO: this shouldn't actually be null, it should be a bit further delayed - # predicting from 3 days later - expect_equal(nas_forecast$forecast_date %>% unique(), as.Date("2022-01-05")) - # predicting 1 day into the future - expect_equal(nas_forecast$target_end_date %>% unique(), as.Date("2022-01-06")) - # (nearly) every state and quantile has a prediction - # as, vi and mp don't currently have populations for flusion, so they're not getting forecast - max_n_geos <- length(jhu$geo_value %>% unique()) - if (forecaster[[1]] != "flatline_fc") { - # flatline_fc is a little bit broken on last-minute `NA`'s right now - expect_true(any(nrow(nas_forecast) == length(covidhub_probs()) * (max_n_geos - c(0, 1, 3)))) - } - }) - - test_that(paste(forecaster[[1]], "handles unused extra sources with NAs"), { - # if there is an extra source we aren't using, we should ignore any NA's it has - jhu <- epidatasets::covid_case_death_rates %>% - dplyr::filter(time_value >= as.Date("2021-11-01")) - jhu_nad <- jhu %>% - as_tibble() %>% - mutate(some_other_predictor = rep(c(NA, 3), times = 1708)) %>% - epiprocess::as_epi_df() - attributes(jhu_nad)$metadata$as_of <- max(jhu$time_value) + 3 - # should run fine - expect_no_error(nas_forecast <- forecaster[[2]](jhu_nad, "case_rate", c("death_rate"))) - expect_equal(nas_forecast$forecast_date %>% unique(), max(jhu$time_value) + 3) - # there's an actual full set of predictions - max_n_geos <- length(jhu$geo_value %>% unique()) - expect_true(any(nrow(nas_forecast) == length(covidhub_probs()) * (max_n_geos - c(0, 1, 3)))) - }) - - ################################# - # any forecaster specific tests - if (forecaster[[1]] == "scaled_pop" || forecaster[[1]] == "smoothed_scaled") { - test_that(paste(forecaster[[1]], "scaled and unscaled don't make the same predictions"), { - jhu <- epidatasets::covid_case_death_rates %>% - dplyr::filter(time_value >= as.Date("2021-11-01")) - # the as_of for this is wildly far in the future - attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3 - res <- forecaster[[2]](jhu, "case_rate", c("death_rate"), -2L) - # confirm scaling produces different results - res_unscaled <- forecaster[[2]](jhu, - "case_rate", - c("death_rate"), - -2L, - pop_scaling = FALSE, - ) - expect_false(res_unscaled %>% - full_join(res, - by = join_by(geo_value, forecast_date, target_end_date, quantile), - suffix = c(".unscaled", ".scaled") - ) %>% - mutate(equal = value.unscaled == value.scaled) %>% - summarize(all(equal), .groups = "drop") %>% pull(`all(equal)`)) - }) - } else if (forecaster[[1]] == "smoothed_scaled") { - testthat("smoothed_scaled handles variable lags correctly", { - jhu <- epidatasets::covid_case_death_rates %>% - dplyr::filter(time_value >= as.Date("2021-11-01")) - # the as_of for this is wildly far in the future - attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3 - expect_no_error(res <- forecaster[[2]](jhu, "case_rate", c("death_rate"), -2L, lags = list(c(0, 3, 5, 7), c(0), c(0, 3, 5, 7), c(0)))) - }) - } - # TODO confirming that it produces exactly the same result as arx_forecaster - # test case where extra_sources is "empty" - # test case where the epi_df is empty - test_that(paste(forecaster[[1]], "empty epi_df predicts nothing"), { - jhu <- epidatasets::covid_case_death_rates %>% - dplyr::filter(time_value >= as.Date("2021-11-01")) - # the as_of for this is wildly far in the future - attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3 - res <- forecaster[[2]](jhu, "case_rate", c("death_rate"), -2L) - # the as_of for this is wildly far in the future - attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3 - null_jhu <- jhu %>% filter(time_value < as.Date("0009-01-01")) - expect_no_error(null_res <- forecaster[[2]](null_jhu, "case_rate", c("death_rate"))) - expect_identical(names(null_res), names(res)) - expect_equal(nrow(null_res), 0) - expect_identical(null_res, tibble(geo_value = character(), forecast_date = lubridate::Date(), target_end_date = lubridate::Date(), quantile = numeric(), value = numeric())) - }) -} - -# unique tests -test_that("flatline_fc same across aheads", { - jhu <- epidatasets::covid_case_death_rates %>% - dplyr::filter(time_value >= as.Date("2021-11-01")) - attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3 - resM2 <- flatline_fc(jhu, "case_rate", c("death_rate"), -2L) %>% - filter(quantile == 0.5) %>% - select(-target_end_date) - resM1 <- flatline_fc(jhu, "case_rate", c("death_rate"), -1L) %>% - filter(quantile == 0.5) %>% - select(-target_end_date) - res1 <- flatline_fc(jhu, "case_rate", c("death_rate"), 1L) %>% - filter(quantile == 0.5) %>% - select(-target_end_date) - expect_equal(resM2, resM1) - expect_equal(resM2, res1) -}) - -test_that("ensemble_average", { - jhu <- epidatasets::covid_case_death_rates %>% - dplyr::filter(time_value >= as.Date("2021-11-01")) - # the as_of for this is wildly far in the future - attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3 - # create some forecasts to ensemble - meta_res <- list( - scaled_pop(jhu, "case_rate", c("death_rate"), -2L), - scaled_pop(jhu, "case_rate", c("death_rate"), -2L, lags = c(0, 1, 2, 3, 5, 7, 11, 13, 17)), - scaled_pop(jhu, "case_rate", c("death_rate"), -2L, pop_scaling = FALSE), - flatline_fc(jhu, "case_rate", ahead = -2L) - ) - ave_ens <- ensemble_average(jhu, meta_res, "case_rate") - # target date correct - expect_true(all( - ave_ens$target_end_date == - as.Date("2022-01-01") - )) - expect_equal( - names(ave_ens), - c("geo_value", "forecast_date", "target_end_date", "quantile", "value") - ) - expect_true(all( - ave_ens$target_end_date == - as.Date("2022-01-01") - )) - # make sure that key direction doesn't matter when generating ensembles - ave_ens_reversed <- ensemble_average(jhu, meta_res, "case_rate") - expect_true(all.equal(ave_ens_reversed, ave_ens)) - # make sure it produces the expected median of a random row - sampled_rows_all <- purrr::map_vec( - meta_res, - ~ filter(.x, geo_value == "ca" & forecast_date == "2022-01-03" & quantile == .3) - ) - sampled_row_by_hand <- sampled_rows_all %>% - summarize(value = median(value), .by = c("geo_value", "forecast_date", "target_end_date", "quantile")) - sampled_row_by_function <- ave_ens %>% filter(geo_value == "ca" & forecast_date == "2022-01-03" & quantile == .3) - expect_equal(sampled_row_by_function, sampled_row_by_hand) - - mean_ens <- ensemble_average(jhu, meta_res, "case_rate", ensemble_args = list(average_type = mean)) - are_equal <- mean_ens %>% - full_join(ave_ens, - by = join_by(geo_value, forecast_date, target_end_date, quantile) - ) %>% - mutate(eq = value.x != value.y) %>% - pull(eq) - # expect the mean and median to be generally not equal - expect_true(mean(are_equal) > 0.9) - # any ensemble specific tests - # test case where extra_sources is "empty" - # test case where the epi_df is empty - null_jhu <- jhu %>% filter(time_value < as.Date("0009-01-01")) - expect_no_error(null_ave_ens <- ensemble_average(null_jhu, meta_res, "case_rate")) - # ensemble_average doesn't actually depend on the input data - expect_true(all.equal(ave_ens, null_ave_ens)) - - # carry on as if nothing was missing if one of the forecasters is missing entries - one_partially_missing <- rlang::list2(meta_res[[1]][1:900, 1:5], !!!meta_res[2:4]) - expect_no_error(partial_ave_ens <- ensemble_average(jhu, one_partially_missing)) - # the entries that are present for all of them are the same - left_join(partial_ave_ens, ave_ens, by = c("geo_value", "forecast_date", "target_end_date", "quantile")) %>% - summarize(all.equal(value.x, value.y), .groups = "drop") - left_join(partial_ave_ens, ave_ens, by = c("geo_value", "forecast_date", "target_end_date", "quantile")) %>% - mutate(eq = value.x == value.y) %>% - pull(eq) - expect_true(all.equal(partial_ave_ens[1:900, 1:5], ave_ens[1:900, 1:5])) - expect_identical(names(null_ave_ens), names(ave_ens)) -})