Skip to content

Commit c77ea78

Browse files
authored
Merge pull request #365 from cmu-delphi/361-dist-quantiles-null
361 dist quantiles null
2 parents f187192 + 5867051 commit c77ea78

14 files changed

+1261
-1059
lines changed

NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ S3method(tidy,layer)
124124
S3method(update,layer)
125125
S3method(vec_ptype_abbr,dist_quantiles)
126126
S3method(vec_ptype_full,dist_quantiles)
127+
S3method(weighted_interval_score,default)
128+
S3method(weighted_interval_score,dist_default)
129+
S3method(weighted_interval_score,dist_quantiles)
130+
S3method(weighted_interval_score,distribution)
127131
export("%>%")
128132
export(Add_model)
129133
export(Remove_model)
@@ -207,6 +211,7 @@ export(update_epi_recipe)
207211
export(update_frosting)
208212
export(update_model)
209213
export(validate_layer)
214+
export(weighted_interval_score)
210215
import(distributional)
211216
import(epiprocess)
212217
import(parsnip)

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,7 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicat
5252
`...` args intended for `predict.model_fit()`
5353
- `bake.epi_recipe()` will now re-infer the geo and time type in case baking the
5454
steps has changed the appropriate values
55+
- produce length 0 `dist_quantiles()`
56+
- add functionality to calculate weighted interval scores for `dist_quantiles()`
5557
- Add `step_epi_slide` to produce generic sliding computations over an `epi_df`
5658
- Add quantile random forests (via `{grf}`) as a parsnip engine

R/dist_quantiles.R

Lines changed: 44 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,21 @@
11
#' @importFrom vctrs field vec_cast new_rcrd
2-
new_quantiles <- function(values = double(), quantile_levels = double()) {
2+
new_quantiles <- function(values = double(1), quantile_levels = double(1)) {
33
arg_is_probabilities(quantile_levels)
44

55
vec_cast(values, double())
66
vec_cast(quantile_levels, double())
7+
values <- unname(values)
8+
if (length(values) == 0L) {
9+
return(new_rcrd(
10+
list(
11+
values = rep(NA_real_, length(quantile_levels)),
12+
quantile_levels = quantile_levels
13+
),
14+
class = c("dist_quantiles", "dist_default")
15+
))
16+
}
717
stopifnot(length(values) == length(quantile_levels))
18+
819
stopifnot(!vctrs::vec_duplicate_any(quantile_levels))
920
if (is.unsorted(quantile_levels)) {
1021
o <- vctrs::vec_order(quantile_levels)
@@ -37,30 +48,49 @@ format.dist_quantiles <- function(x, digits = 2, ...) {
3748

3849
#' A distribution parameterized by a set of quantiles
3950
#'
40-
#' @param values A vector of values
41-
#' @param quantile_levels A vector of probabilities corresponding to `values`
51+
#' @param values A vector (or list of vectors) of values.
52+
#' @param quantile_levels A vector (or list of vectors) of probabilities
53+
#' corresponding to `values`.
54+
#'
55+
#' When creating multiple sets of `values`/`quantile_levels` resulting in
56+
#' different distributions, the sizes must match. See the examples below.
57+
#'
58+
#' @return A vector of class `"distribution"`.
4259
#'
4360
#' @export
4461
#'
4562
#' @examples
46-
#' dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8)))
63+
#' dist_quantiles(1:4, 1:4 / 5)
64+
#' dist_quantiles(list(1:3, 1:4), list(1:3 / 4, 1:4 / 5))
65+
#' dstn <- dist_quantiles(list(1:4, 8:11), c(.2, .4, .6, .8))
66+
#' dstn
67+
#'
4768
#' quantile(dstn, p = c(.1, .25, .5, .9))
4869
#' median(dstn)
4970
#'
5071
#' # it's a bit annoying to inspect the data
5172
#' distributional::parameters(dstn[1])
5273
#' nested_quantiles(dstn[1])[[1]]
5374
#'
54-
#' dist_quantiles(1:4, 1:4 / 5)
5575
#' @importFrom vctrs as_list_of vec_recycle_common new_vctr
5676
dist_quantiles <- function(values, quantile_levels) {
57-
if (!is.list(values)) values <- list(values)
58-
if (!is.list(quantile_levels)) quantile_levels <- list(quantile_levels)
77+
if (!is.list(quantile_levels)) {
78+
assert_numeric(quantile_levels, lower = 0, upper = 1, any.missing = FALSE, min.len = 1L)
79+
quantile_levels <- list(quantile_levels)
80+
}
81+
if (!is.list(values)) {
82+
if (length(values) == 0L) values <- NA_real_
83+
values <- list(values)
84+
}
5985

6086
values <- as_list_of(values, .ptype = double())
6187
quantile_levels <- as_list_of(quantile_levels, .ptype = double())
6288
args <- vec_recycle_common(values = values, quantile_levels = quantile_levels)
63-
qntls <- as_list_of(map2(args$values, args$quantile_levels, new_quantiles))
89+
90+
qntls <- as_list_of(
91+
map2(args$values, args$quantile_levels, new_quantiles),
92+
.ptype = new_quantiles(NA_real_, 0.5)
93+
)
6494
new_vctr(qntls, class = "distribution")
6595
}
6696

@@ -87,59 +117,6 @@ validate_dist_quantiles <- function(values, quantile_levels) {
87117
}
88118

89119

90-
#' Summarize a distribution with a set of quantiles
91-
#'
92-
#' @param x a `distribution` vector
93-
#' @param probs a vector of probabilities at which to calculate quantiles
94-
#' @param ... additional arguments passed on to the `quantile` method
95-
#'
96-
#' @return a `distribution` vector containing `dist_quantiles`
97-
#' @export
98-
#'
99-
#' @examples
100-
#' library(distributional)
101-
#' dstn <- dist_normal(c(10, 2), c(5, 10))
102-
#' extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75))
103-
#'
104-
#' dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8)))
105-
#' # because this distribution is already quantiles, any extra quantiles are
106-
#' # appended
107-
#' extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75))
108-
#'
109-
#' dstn <- c(
110-
#' dist_normal(c(10, 2), c(5, 10)),
111-
#' dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8)))
112-
#' )
113-
#' extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75))
114-
extrapolate_quantiles <- function(x, probs, ...) {
115-
UseMethod("extrapolate_quantiles")
116-
}
117-
118-
#' @export
119-
#' @importFrom vctrs vec_data
120-
extrapolate_quantiles.distribution <- function(x, probs, ...) {
121-
arg_is_probabilities(probs)
122-
dstn <- lapply(vec_data(x), extrapolate_quantiles, probs = probs, ...)
123-
new_vctr(dstn, vars = NULL, class = "distribution")
124-
}
125-
126-
#' @export
127-
extrapolate_quantiles.dist_default <- function(x, probs, ...) {
128-
values <- quantile(x, probs, ...)
129-
new_quantiles(values = values, quantile_levels = probs)
130-
}
131-
132-
#' @export
133-
extrapolate_quantiles.dist_quantiles <- function(x, probs, ...) {
134-
new_values <- quantile(x, probs, ...)
135-
quantile_levels <- field(x, "quantile_levels")
136-
values <- field(x, "values")
137-
new_quantiles(
138-
values = c(values, new_values),
139-
quantile_levels = c(quantile_levels, probs)
140-
)
141-
}
142-
143120
is_dist_quantiles <- function(x) {
144121
is_distribution(x) & all(stats::family(x) == "quantiles")
145122
}
@@ -183,18 +160,20 @@ quantile.dist_quantiles <- function(x, p, ..., middle = c("cubic", "linear")) {
183160
quantile_extrapolate <- function(x, tau_out, middle) {
184161
tau <- field(x, "quantile_levels")
185162
qvals <- field(x, "values")
186-
r <- range(tau, na.rm = TRUE)
163+
nas <- is.na(qvals)
187164
qvals_out <- rep(NA, length(tau_out))
165+
qvals <- qvals[!nas]
166+
tau <- tau[!nas]
188167

189168
# short circuit if we aren't actually extrapolating
190169
# matches to ~15 decimals
191170
if (all(tau_out %in% tau)) {
192171
return(qvals[match(tau_out, tau)])
193172
}
194-
if (length(qvals) < 2) {
195-
cli::cli_abort(c(
173+
if (length(tau) < 2) {
174+
cli::cli_abort(
196175
"Quantile extrapolation is not possible with fewer than 2 quantiles."
197-
))
176+
)
198177
return(qvals_out)
199178
}
200179

R/extrapolate_quantiles.R

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#' Summarize a distribution with a set of quantiles
2+
#'
3+
#' @param x a `distribution` vector
4+
#' @param probs a vector of probabilities at which to calculate quantiles
5+
#' @param replace_na logical. If `x` contains `NA`'s, these are imputed if
6+
#' possible (if `TRUE`) or retained (if `FALSE`). This only effects
7+
#' elements of class `dist_quantiles`.
8+
#' @param ... additional arguments passed on to the `quantile` method
9+
#'
10+
#' @return a `distribution` vector containing `dist_quantiles`. Any elements
11+
#' of `x` which were originally `dist_quantiles` will now have a superset
12+
#' of the original `quantile_values` (the union of those and `probs`).
13+
#' @export
14+
#'
15+
#' @examples
16+
#' library(distributional)
17+
#' dstn <- dist_normal(c(10, 2), c(5, 10))
18+
#' extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75))
19+
#'
20+
#' dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8)))
21+
#' # because this distribution is already quantiles, any extra quantiles are
22+
#' # appended
23+
#' extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75))
24+
#'
25+
#' dstn <- c(
26+
#' dist_normal(c(10, 2), c(5, 10)),
27+
#' dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8)))
28+
#' )
29+
#' extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75))
30+
extrapolate_quantiles <- function(x, probs, replace_na = TRUE, ...) {
31+
UseMethod("extrapolate_quantiles")
32+
}
33+
34+
#' @export
35+
#' @importFrom vctrs vec_data
36+
extrapolate_quantiles.distribution <- function(x, probs, replace_na = TRUE, ...) {
37+
rlang::check_dots_empty()
38+
arg_is_lgl_scalar(replace_na)
39+
arg_is_probabilities(probs)
40+
if (is.unsorted(probs)) probs <- sort(probs)
41+
dstn <- lapply(vec_data(x), extrapolate_quantiles, probs = probs, replace_na = replace_na)
42+
new_vctr(dstn, vars = NULL, class = "distribution")
43+
}
44+
45+
#' @export
46+
extrapolate_quantiles.dist_default <- function(x, probs, replace_na = TRUE, ...) {
47+
values <- quantile(x, probs, ...)
48+
new_quantiles(values = values, quantile_levels = probs)
49+
}
50+
51+
#' @export
52+
extrapolate_quantiles.dist_quantiles <- function(x, probs, replace_na = TRUE, ...) {
53+
orig_probs <- field(x, "quantile_levels")
54+
orig_values <- field(x, "values")
55+
new_probs <- c(orig_probs, probs)
56+
dups <- duplicated(new_probs)
57+
if (!replace_na || !anyNA(orig_values)) {
58+
new_values <- c(orig_values, quantile(x, probs, ...))
59+
} else {
60+
nas <- is.na(orig_values)
61+
orig_values[nas] <- quantile(x, orig_probs[nas], ...)
62+
new_values <- c(orig_values, quantile(x, probs, ...))
63+
}
64+
new_quantiles(new_values[!dups], new_probs[!dups])
65+
}

R/flatline_forecaster.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#' Predict the future with today's value
22
#'
33
#' This is a simple forecasting model for
4-
#' [epiprocess::epi_df][epiprocess::as_epi_df] data. It uses the most recent observation as the
5-
#' forcast for any future date, and produces intervals based on the quantiles
4+
#' [epiprocess::epi_df][epiprocess::as_epi_df] data. It uses the most recent
5+
#' observation as the
6+
#' forecast for any future date, and produces intervals based on the quantiles
67
#' of the residuals of such a "flatline" forecast over all available training
78
#' data.
89
#'

0 commit comments

Comments
 (0)