diff --git a/tests/testthat/test-forecasters-data.R b/scripts/test_proj.R
similarity index 65%
rename from tests/testthat/test-forecasters-data.R
rename to scripts/test_proj.R
index 4eb97964..69bb93f0 100644
--- a/tests/testthat/test-forecasters-data.R
+++ b/scripts/test_proj.R
@@ -1,8 +1,106 @@
-suppressPackageStartupMessages(source(here::here("R", "load_all.R")))
+# Test project to test our forecasters on synthetic data.
+suppressPackageStartupMessages(source("R/load_all.R"))
 
-testthat::skip("Optional, long-running tests skipped.")
+# ================================ GLOBALS =================================
+# Variables prefixed with 'g_' are globals needed by the targets pipeline (they
+# need to persist during the actual targets run, since the commands are frozen
+# as expressions).
+
+# Setup targets config.
+set_targets_config()
+g_aheads <- -1:3
+g_submission_directory <- Sys.getenv("FLU_SUBMISSION_DIRECTORY", "cache")
+g_insufficient_data_geos <- c("as", "mp", "vi", "gu")
+g_excluded_geos <- c("as", "gu", "mh")
+g_time_value_adjust <- 3
+g_fetch_args <- epidatr::fetch_args_list(return_empty = FALSE, timeout_seconds = 400)
+g_disease <- "flu"
+g_external_object_name <- glue::glue("exploration/2024-2025_{g_disease}_hosp_forecasts.parquet")
+# needed for windowed_seasonal
+g_very_latent_locations <- list(list(
+  c("source"),
+  c("flusurv", "ILI+")
+))
+# Date to cut the truth data off at, so we don't have too much of the past for
+# plotting.
+g_truth_data_date <- "2023-09-01"
+# Whether we're running in backtest mode.
+# If TRUE, we don't run the report notebook, which is (a) slow and (b) should be
+# preserved as an ASOF snapshot of our production results for that week.
+# If TRUE, we run a scoring notebook, which scores the historical forecasts
+# against the truth data and compares them to the ensemble.
+# If FALSE, we run the weekly report notebook.
+g_backtest_mode <- as.logical(Sys.getenv("BACKTEST_MODE", FALSE))
+if (!g_backtest_mode) {
+  # This is the as_of for the forecast. If run on our typical schedule, it's
+  # today, which is a Wednesday. Sometimes, if we're doing a delayed forecast,
+  # it's a Thursday. It's used for stamping the data and for determining the
+  # appropriate as_of when creating the forecast.
+  g_forecast_generation_dates <- Sys.Date()
+  # Usually, the forecast_date is the same as the generation date, but you can
+  # override this. It should be a Wednesday.
+  g_forecast_dates <- round_date(g_forecast_generation_dates, "weeks", week_start = 3)
+} else {
+  g_forecast_generation_dates <- c(as.Date(c("2024-11-22", "2024-11-27", "2024-12-04", "2024-12-11", "2024-12-18", "2024-12-26", "2025-01-02")), seq.Date(as.Date("2025-01-08"), Sys.Date(), by = 7L))
+  g_forecast_dates <- seq.Date(as.Date("2024-11-20"), Sys.Date(), by = 7L)
+}
+
+# TODO: Forecaster definitions. We should have a representative from each forecaster.
+g_linear <- function(epi_data, ahead, extra_data, ...) {
+  epi_data %>%
+    filter(source == "nhsn") %>%
+    forecaster_baseline_linear(
+      ahead, ...,
+      residual_tail = 0.99,
+      residual_center = 0.35,
+      no_intercept = TRUE
+    )
+}
+g_climate_base <- function(epi_data, ahead, extra_data, ...) {
+  epi_data %>%
+    filter(source == "nhsn") %>%
+    climatological_model(ahead, ...)
+}
+g_climate_geo_agged <- function(epi_data, ahead, extra_data, ...) {
+  epi_data %>%
+    filter(source == "nhsn") %>%
+    climatological_model(ahead, ..., geo_agg = TRUE)
+}
+g_windowed_seasonal <- function(epi_data, ahead, extra_data, ...) {
+  scaled_pop_seasonal(
+    epi_data,
+    outcome = "value",
+    ahead = ahead * 7,
+    ...,
+    trainer = epipredict::quantile_reg(),
+    seasonal_method = "window",
+    pop_scaling = FALSE,
+    lags = c(0, 7),
+    keys_to_ignore = g_very_latent_locations
+  ) %>%
+    mutate(target_end_date = target_end_date + 3)
+}
+g_windowed_seasonal_extra_sources <- function(epi_data, ahead, extra_data, ...) {
+  fcst <-
+    epi_data %>%
+    left_join(extra_data, by = join_by(geo_value, time_value)) %>%
+    scaled_pop_seasonal(
+      outcome = "value",
+      ahead = ahead * 7,
+      extra_sources = "nssp",
+      ...,
+      seasonal_method = "window",
+      trainer = epipredict::quantile_reg(),
+      drop_non_seasons = TRUE,
+      pop_scaling = FALSE,
+      lags = list(c(0, 7), c(0, 7)),
+      keys_to_ignore = g_very_latent_locations
+    ) %>%
+    select(-source) %>%
+    mutate(target_end_date = target_end_date + 3) %>%
+  fcst
+}
 
-# A list of forecasters to be tested. Add here to test new forecasters.
 forecasters <- tibble::tribble(
   ~forecaster, ~forecaster_args, ~forecaster_args_names, ~fc_name, ~outcome, ~extra_sources, ~ahead,
   scaled_pop, list(TRUE), list("pop_scaling"), "scaled_pop", "a", "", 1,
@@ -10,39 +108,9 @@ forecasters <- tibble::tribble(
   flatline_fc, list(), list(), "flatline_fc", "a", "", 1,
   smoothed_scaled, list(list(c(0, 7, 14), c(0)), 14, 7), list("lags", "sd_width", "sd_mean_width"), "smoothed_scaled", "a", "", 1,
 )
-# Which forecasters expect the data to be non-identical?
-expects_nonequal <- c("scaled_pop", "smoothed_scaled")
-
-#' A wrapper for a common call to slide a forecaster over a dataset.
-#'
-#' @param dataset The dataset to be used for the forecast.
-#' @param ii The row of the forecasters table to be used.
-#' @param outcome The name of the target column in the dataset.
-#' @param extra_sources Any extra columns used for prediction that aren't
-#' default.
-#' @param expect_linreg_warnings Whether to expect and then suppress warnings
-#' from linear_reg.
-#'
-#' Notes:
-#' - n_training_pad is set to avoid warnings from the trainer.
-#' - linear_reg doesn't like exactly equal data when training and throws a
-#'   warning. wrapperfun is used to suppress that.
-default_slide_forecaster <- function(dataset, ii, expect_linreg_warnings = TRUE) {
-  if (any(forecasters$fc_name[[ii]] %in% expects_nonequal) && expect_linreg_warnings) {
-    wrapperfun <- function(x) {
-      suppressWarnings(expect_warning(x, regexp = "prediction from rank-deficient fit"))
-    }
-  } else {
-    wrapperfun <- identity
-  }
-  args <- forecasters %>%
-    select(-fc_name) %>%
-    slice(ii) %>%
-    purrr::transpose() %>%
-    pluck(1)
-  wrapperfun(res <- inject(slide_forecaster(epi_archive = dataset, n_training_pad = 30, !!!args)))
-  return(res)
-}
+
+
+### Datasets TODO: Convert to targets?
 
 # Some arbitrary magic numbers used to generate data.
 synth_mean <- 25
@@ -76,10 +144,12 @@ different_constants <- rbind(
 ) %>%
   arrange(version, time_value) %>%
   epiprocess::as_epi_archive()
+
 different_constants_truth <- different_constants$DT %>%
   tibble() %>%
   rename("true_value" = "a", "target_end_date" = "time_value") %>%
   select(-version)
+
 for (ii in seq_len(nrow(forecasters))) {
   test_that(paste(
     forecasters$fc_name[[ii]],
diff --git a/test_proj/.gitignore b/test_proj/.gitignore
new file mode 100644
index 00000000..94bd9234
--- /dev/null
+++ b/test_proj/.gitignore
@@ -0,0 +1,10 @@
+# CAUTION: do not edit this file by hand!
+# _targets/objects/ may have large data files,
+# and _targets/meta/process may have sensitive information.
+# It is good pratice to either commit nothing from _targets/,
+# or if your data is not too sensitive,
+# commit only _targets/meta/meta.
+*
+!.gitignore
+!meta
+meta/*
diff --git a/tests/testthat/test-forecasters-basics.R b/tests/testthat/test-forecasters-basics.R
deleted file mode 100644
index b4a43eb5..00000000
--- a/tests/testthat/test-forecasters-basics.R
+++ /dev/null
@@ -1,242 +0,0 @@
-suppressPackageStartupMessages(source(here::here("R", "load_all.R")))
-testthat::local_edition(3)
-# TODO better way to do this than copypasta
-forecasters <- list(
-  list("scaled_pop", scaled_pop),
-  list("flatline_fc", flatline_fc),
-  list("smoothed_scaled", smoothed_scaled, lags = list(c(0, 2, 5), c(0)))
-  # TODO: flusion is broken?
-  # list("flusion", flusion),
-  # TODOO: no_recent_outcome cannot be run without aux_data/apportionment.csv present
-  # list("no_recent_outcome", no_recent_outcome)
-)
-for (forecaster in forecasters) {
-  test_that(paste(forecaster[[1]], "gets the date and columns right"), {
-    jhu <- epidatasets::covid_case_death_rates %>%
-      dplyr::filter(time_value >= as.Date("2021-11-01"))
-    # the as_of for this is wildly far in the future
-    attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
-
-    res <- forecaster[[2]](jhu, "case_rate", c("death_rate"), -2L)
-
-    expect_equal(
-      names(res),
-      c("geo_value", "forecast_date", "target_end_date", "quantile", "value")
-    )
-    expect_true(all(
-      res$target_end_date ==
-        as.Date("2022-01-01")
-    ))
-  })
-
-  test_that(paste(forecaster[[1]], "handles only using 1 column correctly"), {
-    skip("TODO: fix broken test, no_recent_outcome has an error")
-    jhu <- epidatasets::covid_case_death_rates %>%
-      dplyr::filter(time_value >= as.Date("2021-11-01"))
-    # the as_of for this is wildly far in the future
-    attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
-    if (forecaster[[1]] == "smoothed_scaled") {
-      expect_no_error(res <- forecaster[[2]](jhu, "case_rate", "", -2L, lags = forecaster$lags))
-    } else {
-      expect_no_error(suppressWarnings(res <- forecaster[[2]](jhu, "case_rate", "", -2L)))
-    }
-  })
-
-  test_that(paste(forecaster[[1]], "deals with no as_of"), {
-    skip("TODO: fix broken test, smoothed_scaled has an error")
-    jhu <- epidatasets::covid_case_death_rates %>%
-      dplyr::filter(time_value >= as.Date("2021-11-01"))
-    # what if we have no as_of date? assume they mean the last available data
-    attributes(jhu)$metadata$as_of <- NULL
-    expect_snapshot(error = FALSE, res <- forecaster[[2]](jhu, "case_rate", extra_sources = "death_rate", ahead = 2L))
-  })
-
-  test_that(paste(forecaster[[1]], "handles last second NA's"), {
-    # if the last entries are NA, we should still predict
-    # TODO: currently this checks that we DON'T predict
-    jhu <- epidatasets::covid_case_death_rates %>%
-      dplyr::filter(time_value >= as.Date("2021-11-01"))
-    geo_values <- jhu$geo_value %>% unique()
-    one_day_nas <- tibble(
-      geo_value = geo_values,
-      time_value = as.Date("2022-01-01"),
-      case_rate = NA,
-      death_rate = runif(length(geo_values))
-    )
-    second_day_nas <- one_day_nas %>%
-      mutate(time_value = as.Date("2022-01-02"))
-    jhu_nad <- jhu %>%
-      as_tibble() %>%
-      bind_rows(one_day_nas, second_day_nas) %>%
-      epiprocess::as_epi_df()
-    attributes(jhu_nad)$metadata$as_of <- max(jhu_nad$time_value) + 3
-    expect_no_error(nas_forecast <- forecaster[[2]](jhu_nad, "case_rate", c("death_rate"), ahead = 1))
-    # TODO: this shouldn't actually be null, it should be a bit further delayed
-    # predicting from 3 days later
-    expect_equal(nas_forecast$forecast_date %>% unique(), as.Date("2022-01-05"))
-    # predicting 1 day into the future
-    expect_equal(nas_forecast$target_end_date %>% unique(), as.Date("2022-01-06"))
-    # (nearly) every state and quantile has a prediction
-    # as, vi and mp don't currently have populations for flusion, so they're not getting forecast
-    max_n_geos <- length(jhu$geo_value %>% unique())
-    if (forecaster[[1]] != "flatline_fc") {
-      # flatline_fc is a little bit broken on last-minute `NA`'s right now
-      expect_true(any(nrow(nas_forecast) == length(covidhub_probs()) * (max_n_geos - c(0, 1, 3))))
-    }
-  })
-
-  test_that(paste(forecaster[[1]], "handles unused extra sources with NAs"), {
-    # if there is an extra source we aren't using, we should ignore any NA's it has
-    jhu <- epidatasets::covid_case_death_rates %>%
-      dplyr::filter(time_value >= as.Date("2021-11-01"))
-    jhu_nad <- jhu %>%
-      as_tibble() %>%
-      mutate(some_other_predictor = rep(c(NA, 3), times = 1708)) %>%
-      epiprocess::as_epi_df()
-    attributes(jhu_nad)$metadata$as_of <- max(jhu$time_value) + 3
-    # should run fine
-    expect_no_error(nas_forecast <- forecaster[[2]](jhu_nad, "case_rate", c("death_rate")))
-    expect_equal(nas_forecast$forecast_date %>% unique(), max(jhu$time_value) + 3)
-    # there's an actual full set of predictions
-    max_n_geos <- length(jhu$geo_value %>% unique())
-    expect_true(any(nrow(nas_forecast) == length(covidhub_probs()) * (max_n_geos - c(0, 1, 3))))
-  })
-
-  #################################
-  # any forecaster specific tests
-  if (forecaster[[1]] == "scaled_pop" || forecaster[[1]] == "smoothed_scaled") {
-    test_that(paste(forecaster[[1]], "scaled and unscaled don't make the same predictions"), {
-      jhu <- epidatasets::covid_case_death_rates %>%
-        dplyr::filter(time_value >= as.Date("2021-11-01"))
-      # the as_of for this is wildly far in the future
-      attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
-      res <- forecaster[[2]](jhu, "case_rate", c("death_rate"), -2L)
-      # confirm scaling produces different results
-      res_unscaled <- forecaster[[2]](jhu,
-        "case_rate",
-        c("death_rate"),
-        -2L,
-        pop_scaling = FALSE,
-      )
-      expect_false(res_unscaled %>%
-        full_join(res,
-          by = join_by(geo_value, forecast_date, target_end_date, quantile),
-          suffix = c(".unscaled", ".scaled")
-        ) %>%
-        mutate(equal = value.unscaled == value.scaled) %>%
-        summarize(all(equal), .groups = "drop") %>% pull(`all(equal)`))
-    })
-  } else if (forecaster[[1]] == "smoothed_scaled") {
-    testthat("smoothed_scaled handles variable lags correctly", {
-      jhu <- epidatasets::covid_case_death_rates %>%
-        dplyr::filter(time_value >= as.Date("2021-11-01"))
-      # the as_of for this is wildly far in the future
-      attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
-      expect_no_error(res <- forecaster[[2]](jhu, "case_rate", c("death_rate"), -2L, lags = list(c(0, 3, 5, 7), c(0), c(0, 3, 5, 7), c(0))))
-    })
-  }
-  # TODO confirming that it produces exactly the same result as arx_forecaster
-  # test case where extra_sources is "empty"
-  # test case where the epi_df is empty
-  test_that(paste(forecaster[[1]], "empty epi_df predicts nothing"), {
-    jhu <- epidatasets::covid_case_death_rates %>%
-      dplyr::filter(time_value >= as.Date("2021-11-01"))
-    # the as_of for this is wildly far in the future
-    attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
-    res <- forecaster[[2]](jhu, "case_rate", c("death_rate"), -2L)
-    # the as_of for this is wildly far in the future
-    attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
-    null_jhu <- jhu %>% filter(time_value < as.Date("0009-01-01"))
-    expect_no_error(null_res <- forecaster[[2]](null_jhu, "case_rate", c("death_rate")))
-    expect_identical(names(null_res), names(res))
-    expect_equal(nrow(null_res), 0)
-    expect_identical(null_res, tibble(geo_value = character(), forecast_date = lubridate::Date(), target_end_date = lubridate::Date(), quantile = numeric(), value = numeric()))
-  })
-}
-
-# unique tests
-test_that("flatline_fc same across aheads", {
-  jhu <- epidatasets::covid_case_death_rates %>%
-    dplyr::filter(time_value >= as.Date("2021-11-01"))
-  attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
-  resM2 <- flatline_fc(jhu, "case_rate", c("death_rate"), -2L) %>%
-    filter(quantile == 0.5) %>%
-    select(-target_end_date)
-  resM1 <- flatline_fc(jhu, "case_rate", c("death_rate"), -1L) %>%
-    filter(quantile == 0.5) %>%
-    select(-target_end_date)
-  res1 <- flatline_fc(jhu, "case_rate", c("death_rate"), 1L) %>%
-    filter(quantile == 0.5) %>%
-    select(-target_end_date)
-  expect_equal(resM2, resM1)
-  expect_equal(resM2, res1)
-})
-
-test_that("ensemble_average", {
-  jhu <- epidatasets::covid_case_death_rates %>%
-    dplyr::filter(time_value >= as.Date("2021-11-01"))
-  # the as_of for this is wildly far in the future
-  attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
-  # create some forecasts to ensemble
-  meta_res <- list(
-    scaled_pop(jhu, "case_rate", c("death_rate"), -2L),
-    scaled_pop(jhu, "case_rate", c("death_rate"), -2L, lags = c(0, 1, 2, 3, 5, 7, 11, 13, 17)),
-    scaled_pop(jhu, "case_rate", c("death_rate"), -2L, pop_scaling = FALSE),
-    flatline_fc(jhu, "case_rate", ahead = -2L)
-  )
-  ave_ens <- ensemble_average(jhu, meta_res, "case_rate")
-  # target date correct
-  expect_true(all(
-    ave_ens$target_end_date ==
-      as.Date("2022-01-01")
-  ))
-  expect_equal(
-    names(ave_ens),
-    c("geo_value", "forecast_date", "target_end_date", "quantile", "value")
-  )
-  expect_true(all(
-    ave_ens$target_end_date ==
-      as.Date("2022-01-01")
-  ))
-  # make sure that key direction doesn't matter when generating ensembles
-  ave_ens_reversed <- ensemble_average(jhu, meta_res, "case_rate")
-  expect_true(all.equal(ave_ens_reversed, ave_ens))
-  # make sure it produces the expected median of a random row
-  sampled_rows_all <- purrr::map_vec(
-    meta_res,
-    ~ filter(.x, geo_value == "ca" & forecast_date == "2022-01-03" & quantile == .3)
-  )
-  sampled_row_by_hand <- sampled_rows_all %>%
-    summarize(value = median(value), .by = c("geo_value", "forecast_date", "target_end_date", "quantile"))
-  sampled_row_by_function <- ave_ens %>% filter(geo_value == "ca" & forecast_date == "2022-01-03" & quantile == .3)
-  expect_equal(sampled_row_by_function, sampled_row_by_hand)
-
-  mean_ens <- ensemble_average(jhu, meta_res, "case_rate", ensemble_args = list(average_type = mean))
-  are_equal <- mean_ens %>%
-    full_join(ave_ens,
-      by = join_by(geo_value, forecast_date, target_end_date, quantile)
-    ) %>%
-    mutate(eq = value.x != value.y) %>%
-    pull(eq)
-  # expect the mean and median to be generally not equal
-  expect_true(mean(are_equal) > 0.9)
-  # any ensemble specific tests
-  # test case where extra_sources is "empty"
-  # test case where the epi_df is empty
-  null_jhu <- jhu %>% filter(time_value < as.Date("0009-01-01"))
-  expect_no_error(null_ave_ens <- ensemble_average(null_jhu, meta_res, "case_rate"))
-  # ensemble_average doesn't actually depend on the input data
-  expect_true(all.equal(ave_ens, null_ave_ens))
-
-  # carry on as if nothing was missing if one of the forecasters is missing entries
-  one_partially_missing <- rlang::list2(meta_res[[1]][1:900, 1:5], !!!meta_res[2:4])
-  expect_no_error(partial_ave_ens <- ensemble_average(jhu, one_partially_missing))
-  # the entries that are present for all of them are the same
-  left_join(partial_ave_ens, ave_ens, by = c("geo_value", "forecast_date", "target_end_date", "quantile")) %>%
-    summarize(all.equal(value.x, value.y), .groups = "drop")
-  left_join(partial_ave_ens, ave_ens, by = c("geo_value", "forecast_date", "target_end_date", "quantile")) %>%
-    mutate(eq = value.x == value.y) %>%
-    pull(eq)
-  expect_true(all.equal(partial_ave_ens[1:900, 1:5], ave_ens[1:900, 1:5]))
-  expect_identical(names(null_ave_ens), names(ave_ens))
-})