From 1ecce6747c42c357bcf0a6ee44a23fb2bdee24f7 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Mon, 12 Aug 2024 13:06:03 -0700
Subject: [PATCH 1/8] add template

---
 vignettes/articles/scorecaster.Rmd | 86 ++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 vignettes/articles/scorecaster.Rmd

diff --git a/vignettes/articles/scorecaster.Rmd b/vignettes/articles/scorecaster.Rmd
new file mode 100644
index 000000000..748fcbe5c
--- /dev/null
+++ b/vignettes/articles/scorecaster.Rmd
@@ -0,0 +1,86 @@
+---
+title: "Implementing a scorecaster for quantile calibration"
+---
+
+```{r setup, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  warning = FALSE,
+  message = FALSE,
+  cache = TRUE
+)
+```
+
+
+```{r packages}
+library(tidyverse)
+library(epipredict)
+```
+
+```{r forecast-output}
+jhu <- case_death_rate_subset
+fc_time_values <- seq(as.Date("2021-03-09"), as.Date("2021-12-01"), by = "4 weeks")
+q_levels <- c(1, 2, 5, 8, 9) / 10
+forecaster <- function(x, aheads = 7) {
+  map(aheads, ~ arx_forecaster(
+    x, "death_rate", c("case_rate", "death_rate"),
+    quantile_reg(quantile_levels = q_levels),
+    arx_args_list(ahead = .x, quantile_levels = q_levels)
+  )$predictions |>
+    mutate(ahead = .x)
+  ) |> list_rbind()
+}
+
+out <- map(
+  .x = fc_time_values,
+  .f = ~forecaster(jhu %>% filter(time_value <= .x), c(7, 14, 21, 28)),
+  .progress = TRUE
+)
+
+out <- out %>% list_rbind()
+out <- left_join(
+  out,
+  jhu,
+  by = c("target_date" = "time_value", "geo_value")
+)
+```
+
+
+```{r necessary-funs}
+quantile_conformal_score <- function(x, actual) {
+  UseMethod("quantile_conformal_score")
+}
+quantile_conformal_score.distribution <- function(x, actual) {
+  l <- vctrs::vec_recycle_common(x = x, actual = actual)
+  map2(
+    .x = vctrs::vec_data(l$x),
+    .y = l$actual,
+    .f = quantile_conformal_score
+  )
+}
+quantile_conformal_score.dist_quantiles <- function(x, actual) {
+  values <- vctrs::field(x, "values")
+  quantile_levels <- vctrs::field(x, "quantile_levels")
+  errs <- (actual - values) * (quantile_levels > 0.5) +
+    (values - actual) * (quantile_levels < 0.5) +
+    abs(actual - values) * (quantile_levels == 0.5)
+  errs
+}
+
+tangent_integrator <- function(x, t, KI = 1000, Csat = 2) {
+  # defaults from https://github.com/aangelopoulos/conformal-time-series/blob/b729c3f5ff633bfc43f0f7ca08199b549c2573ac/tests/configs/ca-COVID-deaths-4wk.yaml#L41
+  x <- x * log(t + 1) / (Csat * (t + 1))
+  up <- x >= pi / 2
+  down <- x <= -pi / 2
+  x[up] <- Inf
+  x[down] <- -Inf
+  mid <- !up & !down
+  x[mid] <- KI * tan(x[mid])
+}
+```
+
+```{r score-fcasts}
+out <- out |>
+  mutate(qc_scores = quantile_conformal_score(.pred_distn, death_rate))
+```

From bbd4b49a768b6705fb11e5c458a1a754f8512c83 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Wed, 21 Aug 2024 10:49:56 -0700
Subject: [PATCH 2/8] brief comments on the scorecaster code

---
 vignettes/articles/scorecaster.Rmd | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/vignettes/articles/scorecaster.Rmd b/vignettes/articles/scorecaster.Rmd
index 748fcbe5c..960b83f87 100644
--- a/vignettes/articles/scorecaster.Rmd
+++ b/vignettes/articles/scorecaster.Rmd
@@ -18,6 +18,8 @@ library(tidyverse)
 library(epipredict)
 ```
 
+First we get some forecasts.
+
 ```{r forecast-output}
 jhu <- case_death_rate_subset
 fc_time_values <- seq(as.Date("2021-03-09"), as.Date("2021-12-01"), by = "4 weeks")
@@ -46,6 +48,7 @@ out <- left_join(
 )
 ```
 
+Now we set up the "quantile conformal score" and the tangent integrator.
 
 ```{r necessary-funs}
 quantile_conformal_score <- function(x, actual) {
@@ -80,7 +83,15 @@ tangent_integrator <- function(x, t, KI = 1000, Csat = 2) {
 }
 ```
 
+Score the forecasts.
+
 ```{r score-fcasts}
 out <- out |>
   mutate(qc_scores = quantile_conformal_score(.pred_distn, death_rate))
 ```
+
+Now we would need a "scorecaster". The paper has code here: 
+https://github.com/aangelopoulos/conformal-time-series/blob/b729c3f5ff633bfc43f0f7ca08199b549c2573ac/tests/datasets/covid-ts-proc/statewide/death-forecasting-perstate-lasso-qr.ipynb
+
+Not quite sure what the model is. Note that `epipredict::quantile_reg()` may work
+(without the $\ell_1$ penalty).

From 98609a003353c8827cb42be788cb7e42d05a8ae3 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Mon, 16 Sep 2024 17:10:22 -0700
Subject: [PATCH 3/8] add draft

---
 R/step_pivot_wider.R | 78 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 R/step_pivot_wider.R

diff --git a/R/step_pivot_wider.R b/R/step_pivot_wider.R
new file mode 100644
index 000000000..af5294b73
--- /dev/null
+++ b/R/step_pivot_wider.R
@@ -0,0 +1,78 @@
+step_pivot_wider <- function(
+    recipe,
+    ...,
+    role = "predictor",
+    id_cols = NULL,
+    id_expand = FALSE,
+    names_from = NULL,
+    values_fill = NA,
+    values_fn = NULL,
+    unused_fn = NULL,
+    skip = FALSE,
+    id = rand_id("pivot_wider")
+) {
+
+  arg_is_chr_scalar(role, id)
+  add_step(
+    recipe,
+    step_pivot_wider_new(
+      terms = enquos(...),
+      role = role,
+      trained = FALSE,
+      id_cols = id_cols,
+      id_expand = id_expand,
+      names_from = names_from %||% kill_time_value(key_colnames(recipe)),
+      values_fill = values_fill,
+      values_fn = values_fn,
+      columns = NULL,
+      skip = skip,
+      id = id
+    )
+  )
+}
+
+step_pivot_wider_new <- function(
+    terms, role, trained, id_cols, id_expand, names_from, values_fill,
+    values_fn, columns, skip, id) {
+  step(
+    subclass = "pivot_wider",
+    terms = terms,
+    role = role,
+    trained = trained,
+    id_cols = id_cols,
+    id_expand = id_expand,
+    names_from = names_from,
+    values_fill = values_fill,
+    values_fn = values_fn,
+    columns = columns,
+    skip = skip,
+    id = id
+  )
+}
+
+prep.step_pivot_wider <- function(x, training, info = NULL, ...) {
+  step_pivot_wider_new(
+    terms = x$terms,
+    role = x$role,
+    trained = TRUE,
+    id_cols = x$id_cols,
+    id_expand = x$id_expand,
+    names_from = x$names_from,
+    values_fill = x$values_fill,
+    values_fn = x$values_fn,
+    columns = recipes_eval_select(x$terms, training, info),
+    skip = x$skip,
+    id = x$id
+  )
+}
+
+bake.step_pivot_wider <- function(object, new_data, ...) {
+  pivotted <- tidyr::pivot_wider(
+    new_data, id_cols = object$id_cols, id_expand = object$id_expand,
+    names_from = object$names_from, values_from = all_of(object$columns),
+    values_fill = object$values_fill, values_fn = object$values_fn
+  )
+  joinby <- union(key_colnames(new_data), object$id_cols))
+  new_data <- left_join(new_data, pivotted, by)
+
+}

From 28006fe1da5cd1627bb5e07a90edab8f4b49a9a8 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Tue, 17 Sep 2024 11:44:45 -0700
Subject: [PATCH 4/8] draft the step

---
 NAMESPACE                              |  3 +
 R/step_pivot_wider.R                   | 93 ++++++++++++++++++++------
 R/utils-arg.R                          | 23 +++++++
 man/step_pivot_wider.Rd                | 80 ++++++++++++++++++++++
 tests/testthat/test-step_pivot_wider.R | 20 ++++++
 5 files changed, 199 insertions(+), 20 deletions(-)
 create mode 100644 man/step_pivot_wider.Rd
 create mode 100644 tests/testthat/test-step_pivot_wider.R

diff --git a/NAMESPACE b/NAMESPACE
index 23c5adeaf..1e728a071 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -24,6 +24,7 @@ S3method(bake,step_epi_lag)
 S3method(bake,step_epi_slide)
 S3method(bake,step_growth_rate)
 S3method(bake,step_lag_difference)
+S3method(bake,step_pivot_wider)
 S3method(bake,step_population_scaling)
 S3method(bake,step_training_window)
 S3method(detect_layer,frosting)
@@ -63,6 +64,7 @@ S3method(prep,step_epi_lag)
 S3method(prep,step_epi_slide)
 S3method(prep,step_growth_rate)
 S3method(prep,step_lag_difference)
+S3method(prep,step_pivot_wider)
 S3method(prep,step_population_scaling)
 S3method(prep,step_training_window)
 S3method(print,alist)
@@ -201,6 +203,7 @@ export(step_epi_naomit)
 export(step_epi_slide)
 export(step_growth_rate)
 export(step_lag_difference)
+export(step_pivot_wider)
 export(step_population_scaling)
 export(step_training_window)
 export(tibble)
diff --git a/R/step_pivot_wider.R b/R/step_pivot_wider.R
index af5294b73..2140f7a5a 100644
--- a/R/step_pivot_wider.R
+++ b/R/step_pivot_wider.R
@@ -1,30 +1,46 @@
+
+
+#' Create new variables by pivotting data
+#'
+#' @inheritParams step_growth_rate
+#' @param ... One or more selector functions to choose variables
+#'  values to pivot. These are the `values_from` argument for [tidyr::pivot_wider()].
+#'  See [recipes::selections()] for more details.
+#' @inheritParams tidyr::pivot_wider
+#'
+#' @template step-return
+#' @export
+#'
+#' @examples
+#' 1+1
 step_pivot_wider <- function(
     recipe,
     ...,
     role = "predictor",
+    names_from,
     id_cols = NULL,
     id_expand = FALSE,
-    names_from = NULL,
     values_fill = NA,
     values_fn = NULL,
-    unused_fn = NULL,
     skip = FALSE,
     id = rand_id("pivot_wider")
 ) {
 
   arg_is_chr_scalar(role, id)
+
   add_step(
     recipe,
     step_pivot_wider_new(
       terms = enquos(...),
       role = role,
       trained = FALSE,
-      id_cols = id_cols,
+      user_id_cols = if (is.null(id_cols)) NULL else enquos(id_cols),
+      edf_id_cols = key_colnames(recipe),
       id_expand = id_expand,
-      names_from = names_from %||% kill_time_value(key_colnames(recipe)),
+      names_from = enquos(names_from),
       values_fill = values_fill,
       values_fn = values_fn,
-      columns = NULL,
+      values_from = NULL,
       skip = skip,
       id = id
     )
@@ -32,47 +48,84 @@ step_pivot_wider <- function(
 }
 
 step_pivot_wider_new <- function(
-    terms, role, trained, id_cols, id_expand, names_from, values_fill,
-    values_fn, columns, skip, id) {
+    terms, role, trained, user_id_cols, edf_id_cols,
+    id_expand, names_from, values_fill,
+    values_fn, values_from, skip, id) {
   step(
     subclass = "pivot_wider",
     terms = terms,
     role = role,
     trained = trained,
-    id_cols = id_cols,
+    user_id_cols = user_id_cols,
+    edf_id_cols = edf_id_cols,
     id_expand = id_expand,
     names_from = names_from,
     values_fill = values_fill,
     values_fn = values_fn,
-    columns = columns,
+    values_from = values_from,
     skip = skip,
     id = id
   )
 }
 
+#' @export
 prep.step_pivot_wider <- function(x, training, info = NULL, ...) {
+  checkmate::assert_subset(x$edf_id_cols, key_colnames(training))
   step_pivot_wider_new(
     terms = x$terms,
     role = x$role,
     trained = TRUE,
-    id_cols = x$id_cols,
+    user_id_cols = recipes_eval_select(x$user_id_cols, training, info),
+    edf_id_cols = key_colnames(training),
     id_expand = x$id_expand,
-    names_from = x$names_from,
+    names_from = recipes_eval_select(x$names_from, training, info),
     values_fill = x$values_fill,
     values_fn = x$values_fn,
-    columns = recipes_eval_select(x$terms, training, info),
+    values_from = recipes_eval_select(x$terms, training, info),
     skip = x$skip,
     id = x$id
   )
 }
 
+#' @export
 bake.step_pivot_wider <- function(object, new_data, ...) {
-  pivotted <- tidyr::pivot_wider(
-    new_data, id_cols = object$id_cols, id_expand = object$id_expand,
-    names_from = object$names_from, values_from = all_of(object$columns),
-    values_fill = object$values_fill, values_fn = object$values_fn
-  )
-  joinby <- union(key_colnames(new_data), object$id_cols))
-  new_data <- left_join(new_data, pivotted, by)
-
+  hardhat::validate_column_names(new_data, object$edf_id_cols)
+  id_cols <- union(object$user_id_cols, object$edf_id_cols)
+  id_cols <- union(id_cols, key_colnames(new_data))
+  browser()
+  if (length(id_cols) == 0L) {
+    pivotted <- tidyr::pivot_wider(
+      new_data,
+      id_expand = object$id_expand,
+      names_from = unname(object$names_from),
+      values_from = unname(object$values_from),
+      values_fill = object$values_fill,
+      values_fn = object$values_fn,
+      names_repair = "unique"
+    )
+    joinby <- intersect(names(pivotted), names(new_data))
+  } else {
+    pivotted <- tidyr::pivot_wider(
+      new_data,
+      id_cols = id_cols,
+      id_expand = object$id_expand,
+      names_from = unname(object$names_from),
+      values_from = unname(object$values_from),
+      values_fill = object$values_fill,
+      values_fn = object$values_fn,
+      names_repair = "unique"
+    )
+    joinby <- id_cols
+  }
+  if (length(joinby) > 0L) {
+    new_data <- left_join(new_data, pivotted, by = joinby)
+  } else if (length(joinby) == 0L && nrow(pivotted) == nrow(new_data)) {
+    new_data <- bind_cols(new_data, pivotted, .name_repair = "unique")
+  } else {
+    cli_abort(c(
+      "Unable to join variables created by `step_pivot_wider()`.",
+      i = "You may want to pass in `id_cols` on step creation."
+    ))
+  }
+  new_data
 }
diff --git a/R/utils-arg.R b/R/utils-arg.R
index b4242eaf9..ff76f88ee 100644
--- a/R/utils-arg.R
+++ b/R/utils-arg.R
@@ -102,3 +102,26 @@ arg_to_date <- function(x, allow_null = FALSE) {
   arg_is_date(x, allow_null = allow_null)
   x
 }
+
+check_tidyselect_cols_exist <- function(selection, data, call = caller_env()) {
+  name_pos <- tidyselect::eval_select(selection, data, error_call = call)
+  if (length(name_pos) == 0L) {
+    return(list(ok = TRUE, missing_names = selection))
+  }
+  hardhat::check_column_names(data, names(name_pos))
+}
+
+validate_tidyselect_cols_exist <- function(selection, data, call = caller_env()) {
+  check <- check_tidyselect_cols_exist(selection, data, call)
+  if (!check$ok) {
+    missing_names <- glue::glue_collapse(
+      glue::single_quote(check$missing_names),
+      sep = ", "
+    )
+    message <- glue::glue(
+      "The {selection} results in missing columns: {missing_names}."
+    )
+    cli_abort(message, call = call)
+  }
+  invisible(data)
+}
diff --git a/man/step_pivot_wider.Rd b/man/step_pivot_wider.Rd
new file mode 100644
index 000000000..1d2358e61
--- /dev/null
+++ b/man/step_pivot_wider.Rd
@@ -0,0 +1,80 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/step_pivot_wider.R
+\name{step_pivot_wider}
+\alias{step_pivot_wider}
+\title{Create new variables by pivotting data}
+\usage{
+step_pivot_wider(
+  recipe,
+  ...,
+  role = "predictor",
+  names_from,
+  id_cols = NULL,
+  id_expand = FALSE,
+  values_fill = NA,
+  values_fn = NULL,
+  skip = FALSE,
+  id = rand_id("pivot_wider")
+)
+}
+\arguments{
+\item{recipe}{A recipe object. The step will be added to the
+sequence of operations for this recipe.}
+
+\item{...}{One or more selector functions to choose variables
+values to pivot. These are the \code{values_from} argument for \code{\link[tidyr:pivot_wider]{tidyr::pivot_wider()}}.
+See \code{\link[recipes:selections]{recipes::selections()}} for more details.}
+
+\item{role}{For model terms created by this step, what analysis role should
+they be assigned? \code{lag} is default a predictor while \code{ahead} is an outcome.}
+
+\item{id_cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> A set of columns that
+uniquely identify each observation. Typically used when you have
+redundant variables, i.e. variables whose values are perfectly correlated
+with existing variables.
+
+Defaults to all columns in \code{data} except for the columns specified through
+\code{names_from} and \code{values_from}. If a tidyselect expression is supplied, it
+will be evaluated on \code{data} after removing the columns specified through
+\code{names_from} and \code{values_from}.}
+
+\item{id_expand}{Should the values in the \code{id_cols} columns be expanded by
+\code{\link[tidyr:expand]{expand()}} before pivoting? This results in more rows, the output will
+contain a complete expansion of all possible values in \code{id_cols}. Implicit
+factor levels that aren't represented in the data will become explicit.
+Additionally, the row values corresponding to the expanded \code{id_cols} will
+be sorted.}
+
+\item{values_fill}{Optionally, a (scalar) value that specifies what each
+\code{value} should be filled in with when missing.
+
+This can be a named list if you want to apply different fill values to
+different value columns.}
+
+\item{values_fn}{Optionally, a function applied to the value in each cell
+in the output. You will typically use this when the combination of
+\code{id_cols} and \code{names_from} columns does not uniquely identify an
+observation.
+
+This can be a named list if you want to apply different aggregations
+to different \code{values_from} columns.}
+
+\item{skip}{A logical. Should the step be skipped when the
+recipe is baked by \code{\link[=bake]{bake()}}? While all operations are baked
+when \code{\link[=prep]{prep()}} is run, some operations may not be able to be
+conducted on new data (e.g. processing the outcome variable(s)).
+Care should be taken when using \code{skip = TRUE} as it may affect
+the computations for subsequent operations.}
+
+\item{id}{A unique identifier for the step}
+}
+\value{
+An updated version of \code{recipe} with the new step added to the
+sequence of any existing operations.
+}
+\description{
+Create new variables by pivotting data
+}
+\examples{
+1+1
+}
diff --git a/tests/testthat/test-step_pivot_wider.R b/tests/testthat/test-step_pivot_wider.R
new file mode 100644
index 000000000..a6829c1f5
--- /dev/null
+++ b/tests/testthat/test-step_pivot_wider.R
@@ -0,0 +1,20 @@
+library(tidyr)
+tib <- expand_grid(
+  tv = 1:4,
+  gv = letters[1:2],
+  cl = letters[2:4]
+)
+tib$val1 <- rnorm(nrow(tib))
+tib$val2 <- rnorm(nrow(tib))
+
+recipe(tib) %>%
+  step_pivot_wider(starts_with(val),
+    names_from = cl,
+    values_fill = list(val1 = 0, val2 = 0)
+  ) %>%
+  prep()
+  bake(new_data = NULL)
+
+test_that("multiplication works", {
+  expect_equal(2 * 2, 4)
+})

From a3ad946748ea67ecd4f5da985cb307779fb61ce9 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Tue, 17 Sep 2024 14:52:28 -0700
Subject: [PATCH 5/8] start testing

---
 R/step_pivot_wider.R                   |  1 -
 tests/testthat/test-step_pivot_wider.R | 40 +++++++++++++++++++-------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/R/step_pivot_wider.R b/R/step_pivot_wider.R
index 2140f7a5a..e03e3bff0 100644
--- a/R/step_pivot_wider.R
+++ b/R/step_pivot_wider.R
@@ -92,7 +92,6 @@ bake.step_pivot_wider <- function(object, new_data, ...) {
   hardhat::validate_column_names(new_data, object$edf_id_cols)
   id_cols <- union(object$user_id_cols, object$edf_id_cols)
   id_cols <- union(id_cols, key_colnames(new_data))
-  browser()
   if (length(id_cols) == 0L) {
     pivotted <- tidyr::pivot_wider(
       new_data,
diff --git a/tests/testthat/test-step_pivot_wider.R b/tests/testthat/test-step_pivot_wider.R
index a6829c1f5..ce16cc77b 100644
--- a/tests/testthat/test-step_pivot_wider.R
+++ b/tests/testthat/test-step_pivot_wider.R
@@ -4,17 +4,35 @@ tib <- expand_grid(
   gv = letters[1:2],
   cl = letters[2:4]
 )
-tib$val1 <- rnorm(nrow(tib))
-tib$val2 <- rnorm(nrow(tib))
+tib$val1 <- 1:nrow(tib) + .1
+tib$val2 <- nrow(tib):1 - .1
 
-recipe(tib) %>%
-  step_pivot_wider(starts_with(val),
-    names_from = cl,
-    values_fill = list(val1 = 0, val2 = 0)
-  ) %>%
-  prep()
-  bake(new_data = NULL)
 
-test_that("multiplication works", {
-  expect_equal(2 * 2, 4)
+test_that("works with recipe, various possible pivots", {
+  out <- recipe(tib) %>%
+    step_pivot_wider(val1, names_from = cl) %>%
+    prep(training = tib) %>% bake(new_data = NULL)
+  expect_snapshot(out)
+
+  out <- recipe(tib) %>%
+    step_pivot_wider(val1, names_from = cl, values_fill = 0) %>%
+    prep(training = tib) %>% bake(new_data = NULL)
+  expect_snapshot(out)
+
+  out <- recipe(tib) %>%
+    step_pivot_wider(starts_with("val"), names_from = cl) %>%
+    prep(training = tib) %>% bake(new_data = NULL)
+  expect_snapshot(out)
+
+  out <- recipe(tib) %>%
+    step_pivot_wider(val1, names_from = gv:cl) %>%
+    prep(training = tib) %>% bake(new_data = NULL)
+  expect_snapshot(out)
+
+  #fails
+  out <- recipe(tib) %>%
+    step_pivot_wider(val1, id_cols = tv:cl, names_from = cl) %>%
+    prep(training = tib) %>% bake(new_data = NULL)
+
+  edf <- tib %>% as_epi_df()
 })

From 292d62d3c536c911e59b6f8bc1243a1be4a73cfc Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Tue, 17 Sep 2024 15:16:55 -0700
Subject: [PATCH 6/8] assign earlier

---
 R/step_pivot_wider.R | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/R/step_pivot_wider.R b/R/step_pivot_wider.R
index e03e3bff0..ac68aa267 100644
--- a/R/step_pivot_wider.R
+++ b/R/step_pivot_wider.R
@@ -28,16 +28,19 @@ step_pivot_wider <- function(
 
   arg_is_chr_scalar(role, id)
 
+  id_cols <- enquos(id_cols)
+  names_from <- enquos(names_from)
+
   add_step(
     recipe,
     step_pivot_wider_new(
       terms = enquos(...),
       role = role,
       trained = FALSE,
-      user_id_cols = if (is.null(id_cols)) NULL else enquos(id_cols),
+      user_id_cols = id_cols,
       edf_id_cols = key_colnames(recipe),
       id_expand = id_expand,
-      names_from = enquos(names_from),
+      names_from = names_from,
       values_fill = values_fill,
       values_fn = values_fn,
       values_from = NULL,
@@ -106,7 +109,7 @@ bake.step_pivot_wider <- function(object, new_data, ...) {
   } else {
     pivotted <- tidyr::pivot_wider(
       new_data,
-      id_cols = id_cols,
+      id_cols = unname(id_cols),
       id_expand = object$id_expand,
       names_from = unname(object$names_from),
       values_from = unname(object$values_from),

From 11551b257a560ab3b6b702c55911d945a68030c8 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Fri, 4 Oct 2024 11:09:47 -0700
Subject: [PATCH 7/8] refactor

---
 R/step_pivot_wider.R | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/R/step_pivot_wider.R b/R/step_pivot_wider.R
index ac68aa267..03c288440 100644
--- a/R/step_pivot_wider.R
+++ b/R/step_pivot_wider.R
@@ -73,13 +73,15 @@ step_pivot_wider_new <- function(
 
 #' @export
 prep.step_pivot_wider <- function(x, training, info = NULL, ...) {
-  checkmate::assert_subset(x$edf_id_cols, key_colnames(training))
+  user_id_cols <- recipes_eval_select(x$user_id_cols, training, info)
+  all_id_cols <- union(user_id_cols, key_colnames(training))
+  hardhat::validate_column_names(training, all_id_cols)
   step_pivot_wider_new(
     terms = x$terms,
     role = x$role,
     trained = TRUE,
-    user_id_cols = recipes_eval_select(x$user_id_cols, training, info),
-    edf_id_cols = key_colnames(training),
+    user_id_cols = user_id_cols,
+    edf_id_cols = all_id_cols,
     id_expand = x$id_expand,
     names_from = recipes_eval_select(x$names_from, training, info),
     values_fill = x$values_fill,
@@ -92,9 +94,9 @@ prep.step_pivot_wider <- function(x, training, info = NULL, ...) {
 
 #' @export
 bake.step_pivot_wider <- function(object, new_data, ...) {
-  hardhat::validate_column_names(new_data, object$edf_id_cols)
-  id_cols <- union(object$user_id_cols, object$edf_id_cols)
-  id_cols <- union(id_cols, key_colnames(new_data))
+  hardhat::validate_column_names(new_data, object$all_id_cols)
+  id_cols <- union(object$all_id_cols, key_colnames(new_data))
+  object$edf_id_cols <- id_cols
   if (length(id_cols) == 0L) {
     pivotted <- tidyr::pivot_wider(
       new_data,
@@ -131,3 +133,14 @@ bake.step_pivot_wider <- function(object, new_data, ...) {
   }
   new_data
 }
+
+#' @export
+print.step_pivot_wider <- function(x, width = max(20, options()$width - 30), ...) {
+  print_epi_step(x$values_from, x$terms, x$trained,
+                 title = "Pivotting variables",
+                 conjunction = "by",
+                 extra_text = x$names_from
+  )
+  invisible(x)
+}
+

From 8b1e48297648592746924dcb092c300f60e39f47 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Fri, 4 Oct 2024 12:14:14 -0700
Subject: [PATCH 8/8] examples run, mostly untested

---
 NAMESPACE                  |  1 +
 R/step_pivot_wider.R       | 61 ++++++++++++++++++++++++++++----------
 man/step_adjust_latency.Rd |  4 +--
 man/step_pivot_wider.Rd    | 34 +++++++++++++--------
 4 files changed, 69 insertions(+), 31 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 85127359b..2dbdf3178 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -98,6 +98,7 @@ S3method(print,step_epi_slide)
 S3method(print,step_growth_rate)
 S3method(print,step_lag_difference)
 S3method(print,step_naomit)
+S3method(print,step_pivot_wider)
 S3method(print,step_population_scaling)
 S3method(print,step_training_window)
 S3method(quantile,dist_quantiles)
diff --git a/R/step_pivot_wider.R b/R/step_pivot_wider.R
index 03c288440..edadd29db 100644
--- a/R/step_pivot_wider.R
+++ b/R/step_pivot_wider.R
@@ -2,23 +2,43 @@
 
 #' Create new variables by pivotting data
 #'
+#' This function typically creates new predictors by sharing values across keys.
+#' So in the most basic case (see examples below), the values of a signal in
+#' one `geo_value` would be used as predictors in all the other locations.
+#'
 #' @inheritParams step_growth_rate
-#' @param ... One or more selector functions to choose variables
+#' @param ... <[`tidy-select`][tidyr_tidy_select]> One or more selector
+#'  functions to choose variables
 #'  values to pivot. These are the `values_from` argument for [tidyr::pivot_wider()].
 #'  See [recipes::selections()] for more details.
+#' @param names_from A selector function to choose which column (or columns) to
+#'  get the name of the output columns from. This is typically `geo_value`
+#'  (the default), and possibly any additional keys in the training data.
+#' @param id_cols <[`tidy-select`][tidyr_tidy_select]> A selector function
+#'  providing a set of columns that uniquely identifies each observation.
+#'  The typical use is for this to be `time_value` and any additional keys
+#'  not selected by `names_from` (this is the default behaviour).
 #' @inheritParams tidyr::pivot_wider
 #'
 #' @template step-return
 #' @export
 #'
 #' @examples
-#' 1+1
+#' jhu <- case_death_rate_subset %>%
+#'   filter(geo_value %in% c("ca", "ny", "pa"), time_value > "2021-12-01")
+#' r <- epi_recipe(jhu)
+#'
+#' r1 <- r %>% step_pivot_wider("death_rate")
+#' bake(prep(r1, jhu), new_data = NULL)
+#'
+#' r2 <- r %>% step_pivot_wider(dplyr::ends_with("rate"))
+#' bake(prep(r2, jhu), new_data = NULL)
 step_pivot_wider <- function(
     recipe,
     ...,
+    names_from = "geo_value",
     role = "predictor",
-    names_from,
-    id_cols = NULL,
+    id_cols = "time_value",
     id_expand = FALSE,
     values_fill = NA,
     values_fn = NULL,
@@ -74,8 +94,13 @@ step_pivot_wider_new <- function(
 #' @export
 prep.step_pivot_wider <- function(x, training, info = NULL, ...) {
   user_id_cols <- recipes_eval_select(x$user_id_cols, training, info)
-  all_id_cols <- union(user_id_cols, key_colnames(training))
-  hardhat::validate_column_names(training, all_id_cols)
+  hardhat::validate_column_names(training, user_id_cols)
+  names_from <- recipes_eval_select(x$names_from, training, info)
+  remaining_ids <- setdiff(
+    union(user_id_cols, names_from), # keys from user
+    key_colnames(training)           # all edf keys
+  )
+  all_id_cols <- union(user_id_cols, remaining_ids)
   step_pivot_wider_new(
     terms = x$terms,
     role = x$role,
@@ -83,7 +108,7 @@ prep.step_pivot_wider <- function(x, training, info = NULL, ...) {
     user_id_cols = user_id_cols,
     edf_id_cols = all_id_cols,
     id_expand = x$id_expand,
-    names_from = recipes_eval_select(x$names_from, training, info),
+    names_from = names_from,
     values_fill = x$values_fill,
     values_fn = x$values_fn,
     values_from = recipes_eval_select(x$terms, training, info),
@@ -94,15 +119,19 @@ prep.step_pivot_wider <- function(x, training, info = NULL, ...) {
 
 #' @export
 bake.step_pivot_wider <- function(object, new_data, ...) {
-  hardhat::validate_column_names(new_data, object$all_id_cols)
-  id_cols <- union(object$all_id_cols, key_colnames(new_data))
-  object$edf_id_cols <- id_cols
+  id_cols <- object$edf_id_cols
+  names_from <- object$names_from
+  values_from <- object$values_from
+  browser()
+  hardhat::validate_column_names(new_data, id_cols)
+  hardhat::validate_column_names(new_data, names_from)
+  hardhat::validate_column_names(new_data, values_from)
   if (length(id_cols) == 0L) {
     pivotted <- tidyr::pivot_wider(
-      new_data,
+      new_data[, c(names_from, values_from)],
       id_expand = object$id_expand,
-      names_from = unname(object$names_from),
-      values_from = unname(object$values_from),
+      names_from = unname(names_from),
+      values_from = unname(values_from),
       values_fill = object$values_fill,
       values_fn = object$values_fn,
       names_repair = "unique"
@@ -110,11 +139,11 @@ bake.step_pivot_wider <- function(object, new_data, ...) {
     joinby <- intersect(names(pivotted), names(new_data))
   } else {
     pivotted <- tidyr::pivot_wider(
-      new_data,
+      new_data[, c(id_cols, names_from, values_from)],
       id_cols = unname(id_cols),
       id_expand = object$id_expand,
-      names_from = unname(object$names_from),
-      values_from = unname(object$values_from),
+      names_from = unname(names_from),
+      values_from = unname(values_from),
       values_fill = object$values_fill,
       values_fn = object$values_fn,
       names_repair = "unique"
diff --git a/man/step_adjust_latency.Rd b/man/step_adjust_latency.Rd
index f0ee41390..678c2d38f 100644
--- a/man/step_adjust_latency.Rd
+++ b/man/step_adjust_latency.Rd
@@ -260,8 +260,8 @@ while this will not:
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{toy_recipe <- epi_recipe(toy_df) \%>\%
    step_epi_lag(a, lag=0) \%>\%
    step_adjust_latency(a, method = "extend_lags")
-#> Warning: If `method` is "extend_lags" or "locf", then the previous `step_epi_lag`s won't work with
-#> modified data.
+#> Warning: If `method` is "extend_lags" or "locf", then the previous `step_epi_lag`s won't
+#> work with modified data.
 }\if{html}{\out{</div>}}
 
 If you create columns that you then apply lags to (such as
diff --git a/man/step_pivot_wider.Rd b/man/step_pivot_wider.Rd
index 1d2358e61..668c4f4dd 100644
--- a/man/step_pivot_wider.Rd
+++ b/man/step_pivot_wider.Rd
@@ -7,9 +7,9 @@
 step_pivot_wider(
   recipe,
   ...,
+  names_from = "geo_value",
   role = "predictor",
-  names_from,
-  id_cols = NULL,
+  id_cols = "time_value",
   id_expand = FALSE,
   values_fill = NA,
   values_fn = NULL,
@@ -21,22 +21,22 @@ step_pivot_wider(
 \item{recipe}{A recipe object. The step will be added to the
 sequence of operations for this recipe.}
 
-\item{...}{One or more selector functions to choose variables
+\item{...}{<\code{\link[=tidyr_tidy_select]{tidy-select}}> One or more selector
+functions to choose variables
 values to pivot. These are the \code{values_from} argument for \code{\link[tidyr:pivot_wider]{tidyr::pivot_wider()}}.
 See \code{\link[recipes:selections]{recipes::selections()}} for more details.}
 
+\item{names_from}{A selector function to choose which column (or columns) to
+get the name of the output columns from. This is typically \code{geo_value}
+(the default), and possibly any additional keys in the training data.}
+
 \item{role}{For model terms created by this step, what analysis role should
 they be assigned? \code{lag} is default a predictor while \code{ahead} is an outcome.}
 
-\item{id_cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> A set of columns that
-uniquely identify each observation. Typically used when you have
-redundant variables, i.e. variables whose values are perfectly correlated
-with existing variables.
-
-Defaults to all columns in \code{data} except for the columns specified through
-\code{names_from} and \code{values_from}. If a tidyselect expression is supplied, it
-will be evaluated on \code{data} after removing the columns specified through
-\code{names_from} and \code{values_from}.}
+\item{id_cols}{<\code{\link[=tidyr_tidy_select]{tidy-select}}> A selector function
+providing a set of columns that uniquely identifies each observation.
+The typical use is for this to be \code{time_value} and any additional keys
+not selected by \code{names_from} (this is the default behaviour).}
 
 \item{id_expand}{Should the values in the \code{id_cols} columns be expanded by
 \code{\link[tidyr:expand]{expand()}} before pivoting? This results in more rows, the output will
@@ -76,5 +76,13 @@ sequence of any existing operations.
 Create new variables by pivotting data
 }
 \examples{
-1+1
+jhu <- case_death_rate_subset \%>\%
+  filter(geo_value \%in\% c("ca", "ny", "pa"), time_value > "2021-12-01")
+r <- epi_recipe(jhu)
+
+r1 <- r \%>\% step_pivot_wider("death_rate")
+bake(prep(r1, jhu), new_data = NULL)
+
+r2 <- r \%>\% step_pivot_wider(dplyr::ends_with("rate"))
+bake(prep(r2, jhu), new_data = NULL)
 }