Skip to content

Commit 8b00b3b

Browse files
authored
Merge pull request #188 from dajmcdon/km-slide-n-replace2.1
Updated epi_slide to use `before` and `after` and added checks
2 parents 245448a + 0d3ea1b commit 8b00b3b

12 files changed

+296
-168
lines changed

DESCRIPTION

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ Imports:
3737
tidyr,
3838
tidyselect,
3939
tsibble,
40-
utils
40+
utils,
41+
vctrs
4142
Suggests:
4243
covidcast,
4344
epidatr,
@@ -46,7 +47,6 @@ Suggests:
4647
outbreaks,
4748
rmarkdown,
4849
testthat (>= 3.0.0),
49-
vctrs,
5050
waldo (>= 0.3.1),
5151
withr
5252
VignetteBuilder:

R/growth_rate.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
#' implicitly defined by the `x` variable; for example, if `x` is a vector of
7474
#' `Date` objects, `h = 7`, and the reference point is January 7, then the
7575
#' sliding window contains all data in between January 1 and 14 (matching the
76-
#' behavior of `epi_slide()` with `n = 2 * h` and `align = "center"`).
76+
#' behavior of `epi_slide()` with `before = h - 1` and `after = h`).
7777
#'
7878
#' @section Additional Arguments:
7979
#' For the global methods, "smooth_spline" and "trend_filter", additional

R/outliers.R

+6-2
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@ detect_outlr = function(x = seq_along(y), y,
128128
#' `y`).
129129
#' @param y Signal values.
130130
#' @param n Number of time steps to use in the rolling window. Default is 21.
131+
#' This value is centrally aligned. When `n` is an odd number, the rolling
132+
#' window extends from `(n-1)/2` time steps before each design point to `(n-1)/2`
133+
#' time steps after. When `n` is even, then the rolling range extends from
134+
#' `n/2-1` time steps before to `n/2` time steps after.
131135
#' @param log_transform Should a log transform be applied before running outlier
132136
#' detection? Default is `FALSE`. If `TRUE`, and zeros are present, then the
133137
#' log transform will be padded by 1.
@@ -179,7 +183,7 @@ detect_outlr_rm = function(x = seq_along(y), y, n = 21,
179183

180184
# Calculate lower and upper thresholds and replacement value
181185
z = z %>%
182-
epi_slide(fitted = median(y), n = n, align = "center") %>%
186+
epi_slide(fitted = median(y), before = floor((n-1)/2), after = ceiling((n-1)/2)) %>%
183187
dplyr::mutate(resid = y - fitted) %>%
184188
roll_iqr(n = n,
185189
detection_multiplier = detection_multiplier,
@@ -332,7 +336,7 @@ roll_iqr = function(z, n, detection_multiplier, min_radius,
332336
if (typeof(z$y) == "integer") as_type = as.integer
333337
else as_type = as.numeric
334338

335-
epi_slide(z, roll_iqr = stats::IQR(resid), n = n, align = "center") %>%
339+
epi_slide(z, roll_iqr = stats::IQR(resid), before = floor((n-1)/2), after = ceiling((n-1)/2)) %>%
336340
dplyr::mutate(
337341
lower = pmax(min_lower,
338342
fitted - pmax(min_radius, detection_multiplier * roll_iqr)),

R/slide.R

+113-74
Original file line numberDiff line numberDiff line change
@@ -6,40 +6,44 @@
66
#'
77
#' @param x The `epi_df` object under consideration.
88
#' @param f Function or formula to slide over variables in `x`. To "slide" means
9-
#' to apply a function or formula over a running window of `n` time steps
10-
#' (where one time step is typically one day or one week; see details for more
11-
#' explanation). If a function, `f` should take `x`, an `epi_df` with the same
9+
#' to apply a function or formula over a rolling window of time steps.
10+
#' The window is determined by the `before` and `after` parameters described
11+
#' below. One time step is typically one day or one week; see details for more
12+
#' explanation. If a function, `f` should take `x`, an `epi_df` with the same
1213
#' names as the non-grouping columns, followed by `g` to refer to the one row
1314
#' tibble with one column per grouping variable that identifies the group,
1415
#' and any number of named arguments (which will be taken from `...`). If a
1516
#' formula, `f` can operate directly on columns accessed via `.x$var`, as
1617
#' in `~ mean(.x$var)` to compute a mean of a column var over a sliding
17-
#' window of n time steps. As well, `.y` may be used in the formula to refer
18+
#' window. As well, `.y` may be used in the formula to refer
1819
#' to the groupings that would be described by `g` if `f` was a function.
1920
#' @param ... Additional arguments to pass to the function or formula specified
2021
#' via `f`. Alternatively, if `f` is missing, then the current argument is
2122
#' interpreted as an expression for tidy evaluation. See details.
22-
#' @param n Number of time steps to use in the running window. For example, if
23-
#' `n = 7`, one time step is one day, and the alignment is "right", then to
24-
#' produce a value on January 7 we apply the given function or formula to data
25-
#' in between January 1 and 7.
23+
#' @param before,after How far `before` and `after` each `ref_time_value` should
24+
#' the sliding window extend? At least one of these two arguments must be
25+
#' provided; the other's default will be 0. Any value provided for either
26+
#' argument must be a single, non-`NA`, non-negative,
27+
#' [integer-compatible][vctrs::vec_cast] number of time steps. Endpoints of the
28+
#' window are inclusive. Common settings:
29+
#' * For trailing/right-aligned windows from `ref_time_value - time_step(k)`
30+
#' to `ref_time_value`: either pass `before=k` by itself, or pass `before=k,
31+
#' after=0`.
32+
#' * For center-aligned windows from `ref_time_value - time_step(k)` to
33+
#' `ref_time_value + time_step(k)`: pass `before=k, after=k`.
34+
#' * For leading/left-aligned windows from `ref_time_value` to `ref_time_value
35+
#' + time_step(k)`: either pass pass `after=k` by itself, or pass `before=0,
36+
#' after=k`.
37+
#' See "Details:" about the definition of a time step, (non)treatment of
38+
#' missing rows within the window, and avoiding warnings about
39+
#' `before`&`after` settings for a certain uncommon use case.
2640
#' @param ref_time_values Time values for sliding computations, meaning, each
2741
#' element of this vector serves as the reference time point for one sliding
2842
#' window. If missing, then this will be set to all unique time values in the
2943
#' underlying data table, by default.
30-
#' @param align One of "right", "center", or "left", indicating the alignment of
31-
#' the sliding window relative to the reference time point. If the alignment
32-
#' is "center" and `n` is even, then one more time point will be used after
33-
#' the reference time point than before. Default is "right".
34-
#' @param before Positive integer less than `n`, specifying the number of time
35-
#' points to use in the sliding window strictly before the reference time
36-
#' point. For example, setting `before = n-1` would be the same as setting
37-
#' `align = "right"`. The `before` argument allows for more flexible
38-
#' specification of alignment than the `align` parameter, and if specified,
39-
#' overrides `align`.
4044
#' @param time_step Optional function used to define the meaning of one time
4145
#' step, which if specified, overrides the default choice based on the
42-
#' `time_value` column. This function must take a positive integer and return
46+
#' `time_value` column. This function must take a non-negative integer and return
4347
#' an object of class `lubridate::period`. For example, we can use `time_step
4448
#' = lubridate::hours` in order to set the time step to be one hour (this
4549
#' would only be meaningful if `time_value` is of class `POSIXct`).
@@ -59,28 +63,44 @@
5963
#' @return An `epi_df` object given by appending a new column to `x`, named
6064
#' according to the `new_col_name` argument.
6165
#'
62-
#' @details To "slide" means to apply a function or formula over a running
63-
#' window of `n` time steps, where the unit (the meaning of one time step) is
66+
#' @details To "slide" means to apply a function or formula over a rolling
67+
#' window of time steps where the window is entered at a reference time and
68+
#' left and right endpoints are given by the `before` and `after` arguments.
69+
#' The unit (the meaning of one time step) is
6470
#' implicitly defined by the way the `time_value` column treats addition and
6571
#' subtraction; for example, if the time values are coded as `Date` objects,
6672
#' then one time step is one day, since `as.Date("2022-01-01") + 1` equals
6773
#' `as.Date("2022-01-02")`. Alternatively, the time step can be set explicitly
6874
#' using the `time_step` argument (which if specified would override the
69-
#' default choice based on `time_value` column). If less than `n` time steps
70-
#' are available at any given reference time value, then `epi_slide()` still
75+
#' default choice based on `time_value` column). If there are not enough time
76+
#' steps available to complete the window at any given reference time, then
77+
#' `epi_slide()` still
7178
#' attempts to perform the computation anyway (it does not require a complete
7279
#' window). The issue of what to do with partial computations (those run on
7380
#' incomplete windows) is therefore left up to the user, either through the
74-
#' specified function or formula `f`, or through post-processing.
75-
#'
76-
#' If `f` is missing, then an expression for tidy evaluation can be specified,
77-
#' for example, as in:
81+
#' specified function or formula `f`, or through post-processing. For a
82+
#' centrally-aligned slide of `n` `time_value`s in a sliding window, set
83+
#' `before = (n-1)/2` and `after = (n-1)/2` when the number of `time_value`s
84+
#' in a sliding window is odd and `before = n/2-1` and `after = n/2` when
85+
#' `n` is even.
86+
#'
87+
#' Sometimes, we want to experiment with various trailing or leading window
88+
#' widths and compare the slide outputs. In the (uncommon) case where
89+
#' zero-width windows are considered, manually pass both the `before` and
90+
#' `after` arguments in order to prevent potential warnings. (E.g., `before=k`
91+
#' with `k=0` and `after` missing may produce a warning. To avoid warnings,
92+
#' use `before=k, after=0` instead; otherwise, it looks too much like a
93+
#' leading window was intended, but the `after` argument was forgotten or
94+
#' misspelled.)
95+
#'
96+
#' If `f` is missing, then an expression for tidy evaluation can be specified,
97+
#' for example, as in:
7898
#' ```
79-
#' epi_slide(x, cases_7dav = mean(cases), n = 7)
99+
#' epi_slide(x, cases_7dav = mean(cases), before = 6)
80100
#' ```
81101
#' which would be equivalent to:
82102
#' ```
83-
#' epi_slide(x, function(x, ...) mean(x$cases), n = 7,
103+
#' epi_slide(x, function(x, ...) mean(x$cases), before = 6,
84104
#' new_col_name = "cases_7dav")
85105
#' ```
86106
#' Thus, to be clear, when the computation is specified via an expression for
@@ -92,32 +112,45 @@
92112
#' @importFrom rlang .data .env !! enquo enquos sym
93113
#' @export
94114
#' @examples
95-
#' # slide a 7-day trailing average formula on cases
96-
#' jhu_csse_daily_subset %>%
115+
#' # slide a 7-day trailing average formula on cases
116+
#' jhu_csse_daily_subset %>%
117+
#' group_by(geo_value) %>%
118+
#' epi_slide(cases_7dav = mean(cases), before = 6) %>%
119+
#' # rmv a nonessential var. to ensure new col is printed
120+
#' dplyr::select(-death_rate_7d_av)
121+
#'
122+
#' # slide a 7-day leading average
123+
#' jhu_csse_daily_subset %>%
124+
#' group_by(geo_value) %>%
125+
#' epi_slide(cases_7dav = mean(cases), after = 6) %>%
126+
#' # rmv a nonessential var. to ensure new col is printed
127+
#' dplyr::select(-death_rate_7d_av)
128+
#'
129+
#' # slide a 7-day centre-aligned average
130+
#' jhu_csse_daily_subset %>%
97131
#' group_by(geo_value) %>%
98-
#' epi_slide(cases_7dav = mean(cases), n = 7,
99-
#' align = "right") %>%
132+
#' epi_slide(cases_7dav = mean(cases), before = 3, after = 3) %>%
100133
#' # rmv a nonessential var. to ensure new col is printed
101134
#' dplyr::select(-death_rate_7d_av)
102-
#'
103-
#' # slide a left-aligned 7-day average
104-
#' jhu_csse_daily_subset %>%
135+
#'
136+
#' # slide a 14-day centre-aligned average
137+
#' jhu_csse_daily_subset %>%
105138
#' group_by(geo_value) %>%
106-
#' epi_slide(cases_7dav = mean(cases), n = 7,
107-
#' align = "left") %>%
139+
#' epi_slide(cases_7dav = mean(cases), before = 6, after = 7) %>%
108140
#' # rmv a nonessential var. to ensure new col is printed
109141
#' dplyr::select(-death_rate_7d_av)
110-
#'
111-
#' # nested new columns
112-
#' jhu_csse_daily_subset %>%
113-
#' group_by(geo_value) %>%
114-
#' epi_slide(a = data.frame(cases_2dav = mean(cases),
115-
#' cases_2dma = mad(cases)),
116-
#' n = 2, as_list_col = TRUE)
117-
epi_slide = function(x, f, ..., n, ref_time_values,
118-
align = c("right", "center", "left"), before, time_step,
142+
#'
143+
#' # nested new columns
144+
#' jhu_csse_daily_subset %>%
145+
#' group_by(geo_value) %>%
146+
#' epi_slide(a = data.frame(cases_2dav = mean(cases),
147+
#' cases_2dma = mad(cases)),
148+
#' before = 1, as_list_col = TRUE)
149+
epi_slide = function(x, f, ..., before, after, ref_time_values,
150+
time_step,
119151
new_col_name = "slide_value", as_list_col = FALSE,
120152
names_sep = "_", all_rows = FALSE) {
153+
121154
# Check we have an `epi_df` object
122155
if (!inherits(x, "epi_df")) Abort("`x` must be of class `epi_df`.")
123156

@@ -133,44 +166,50 @@ epi_slide = function(x, f, ..., n, ref_time_values,
133166
ref_time_values = ref_time_values[ref_time_values %in%
134167
unique(x$time_value)]
135168
}
136-
137-
# If before is missing, then use align to set up alignment
138-
if (missing(before)) {
139-
align = match.arg(align)
140-
if (align == "right") {
141-
before_num = n-1
142-
after_num = 0
143-
}
144-
else if (align == "center") {
145-
before_num = floor((n-1)/2)
146-
after_num = ceiling((n-1)/2)
169+
170+
# Validate and pre-process `before`, `after`:
171+
if (!missing(before)) {
172+
before <- vctrs::vec_cast(before, integer())
173+
if (length(before) != 1L || is.na(before) || before < 0L) {
174+
Abort("`before` must be length-1, non-NA, non-negative")
147175
}
148-
else {
149-
before_num = 0
150-
after_num = n-1
176+
}
177+
if (!missing(after)) {
178+
after <- vctrs::vec_cast(after, integer())
179+
if (length(after) != 1L || is.na(after) || after < 0L) {
180+
Abort("`after` must be length-1, non-NA, non-negative")
151181
}
152182
}
153-
154-
# Otherwise set up alignment based on passed before value
155-
else {
156-
if (before < 0 || before > n-1) {
157-
Abort("`before` must be in between 0 and n-1`.")
183+
if (missing(before)) {
184+
if (missing(after)) {
185+
Abort("Either or both of `before`, `after` must be provided.")
186+
} else if (after == 0L) {
187+
Warn("`before` missing, `after==0`; maybe this was intended to be some
188+
non-zero-width trailing window, but since `before` appears to be
189+
missing, it's interpreted as a zero-width window (`before=0,
190+
after=0`).")
158191
}
159-
160-
before_num = before
161-
after_num = n-1-before
192+
before <- 0L
193+
} else if (missing(after)) {
194+
if (before == 0L) {
195+
Warn("`before==0`, `after` missing; maybe this was intended to be some
196+
non-zero-width leading window, but since `after` appears to be
197+
missing, it's interpreted as a zero-width window (`before=0,
198+
after=0`).")
199+
}
200+
after <- 0L
162201
}
163202

164-
# If a custom time step is specified, then redefine units
203+
# If a custom time step is specified, then redefine units
165204
if (!missing(time_step)) {
166-
before_num = time_step(before_num)
167-
after_num = time_step(after_num)
205+
before <- time_step(before)
206+
after <- time_step(after)
168207
}
169208

170209
# Now set up starts and stops for sliding/hopping
171210
time_range = range(unique(x$time_value))
172-
starts = in_range(ref_time_values - before_num, time_range)
173-
stops = in_range(ref_time_values + after_num, time_range)
211+
starts = in_range(ref_time_values - before, time_range)
212+
stops = in_range(ref_time_values + after, time_range)
174213

175214
if( length(starts) == 0 || length(stops) == 0 ) {
176215
Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check your settings for ref_time_values and align (and before, if specified).")

man/detect_outlr_rm.Rd

+5-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)