Skip to content

Commit 8f25ec9

Browse files
authored
Merge pull request #315 from cmu-delphi/djm/remove-fabletools
Djm/remove fabletools
2 parents 5fb62f3 + 9524643 commit 8f25ec9

File tree

6 files changed

+95
-50
lines changed

6 files changed

+95
-50
lines changed

DESCRIPTION

-3
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@ Imports:
3030
cli,
3131
data.table,
3232
dplyr (>= 1.0.0),
33-
fabletools,
34-
feasts,
35-
generics,
3633
genlasso,
3734
ggplot2,
3835
lifecycle (>= 1.0.1),

NEWS.md

+8
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
44

55
# epiprocess 0.8
66

7+
## Breaking changes
8+
- `detect_outlr_stl(seasonal_period = NULL)` is no longer accepted. Use
9+
`detect_outlr_stl(seasonal_period = <value>, seasonal_as_residual = TRUE)`
10+
instead. See `?detect_outlr_stl` for more details.
11+
712
## Improvements
813

914
- `epi_slide` computations are now 2-4 times faster after changing how
@@ -43,6 +48,9 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
4348
- Added optional `decay_to_tibble` attribute controlling `as_tibble()` behavior
4449
of `epi_df`s to let `{epipredict}` work more easily with other libraries (#471).
4550

51+
## Cleanup
52+
- Removed some external package dependencies.
53+
4654
# epiprocess 0.7.0
4755

4856
## Breaking changes:

R/outliers.R

+59-34
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,10 @@
6464
#' args = list(list(
6565
#' detect_negatives = TRUE,
6666
#' detection_multiplier = 2.5,
67-
#' seasonal_period = NULL
67+
#' seasonal_period = 7,
68+
#' seasonal_as_residual = TRUE
6869
#' )),
69-
#' abbr = "stl_nonseasonal"
70+
#' abbr = "stl_reseasonal"
7071
#' )
7172
#' )
7273
#'
@@ -216,18 +217,28 @@ detect_outlr_rm <- function(x = seq_along(y), y, n = 21,
216217
#' @param n_trend Number of time steps to use in the rolling window for trend.
217218
#' Default is 21.
218219
#' @param n_seasonal Number of time steps to use in the rolling window for
219-
#' seasonality. Default is 21.
220+
#' seasonality. Default is 21. Can also be the string "periodic". See
221+
#' `s.window` in [`stats::stl`].
220222
#' @param n_threshold Number of time steps to use in rolling window for the IQR
221223
#' outlier thresholds.
222-
#' @param seasonal_period Integer specifying period of seasonality. For example,
223-
#' for daily data, a period 7 means weekly seasonality. The default is `NULL`,
224-
#' meaning that no seasonal term will be included in the STL decomposition.
224+
#' @param seasonal_period Integer specifying period of "seasonality". For
225+
#' example, for daily data, a period 7 means weekly seasonality. It must be
226+
#' strictly larger than 1. Also impacts the size of the low-pass filter
227+
#' window; see `l.window` in [`stats::stl`].
228+
#' @param seasonal_as_residual Boolean specifying whether the seasonal(/weekly)
229+
#' component should be treated as part of the residual component instead of as
230+
#' part of the predictions. The default, FALSE, treats them as part of the
231+
#' predictions, so large seasonal(/weekly) components will not lead to
232+
#' flagging points as outliers. `TRUE` may instead consider the extrema of
233+
#' large seasonal variations to be outliers; `n_seasonal` and
234+
#' `seasonal_period` will still have an impact on the result, though, by
235+
#' impacting the estimation of the trend component.
225236
#' @template outlier-detection-options
226237
#' @template detect-outlr-return
227238
#'
228-
#' @details The STL decomposition is computed using the `feasts` package. Once
239+
#' @details The STL decomposition is computed using [`stats::stl()`]. Once
229240
#' computed, the outlier detection method is analogous to the rolling median
230-
#' method in `detect_outlr_rm()`, except with the fitted values and residuals
241+
#' method in [`detect_outlr_rm()`], except with the fitted values and residuals
231242
#' from the STL decomposition taking the place of the rolling median and
232243
#' residuals to the rolling median, respectively.
233244
#'
@@ -252,12 +263,34 @@ detect_outlr_stl <- function(x = seq_along(y), y,
252263
n_trend = 21,
253264
n_seasonal = 21,
254265
n_threshold = 21,
255-
seasonal_period = NULL,
266+
seasonal_period,
267+
seasonal_as_residual = FALSE,
256268
log_transform = FALSE,
257269
detect_negatives = FALSE,
258270
detection_multiplier = 2,
259271
min_radius = 0,
260272
replacement_multiplier = 0) {
273+
if (dplyr::n_distinct(x) != length(y)) {
274+
cli_abort("`x` contains duplicate values. (If being run on a column in an
275+
`epi_df`, did you group by relevant key variables?)")
276+
}
277+
if (length(y) <= 1L) {
278+
cli_abort("`y` has length {length(y)}; that's definitely too little for
279+
STL. (If being run in a `mutate()` or `epi_slide()`, check
280+
whether you grouped by too many variables; you should not be
281+
grouping by `time_value` in particular.)")
282+
}
283+
distinct_x_skips <- unique(diff(x))
284+
if (diff(range(distinct_x_skips)) > 1e-4 * mean(distinct_x_skips)) {
285+
cli_abort("`x` does not appear to have regular spacing; consider filling in
286+
gaps with imputed values (STL does not allow NAs).")
287+
}
288+
if (is.unsorted(x)) { # <- for performance in common (sorted) case
289+
o <- order(x)
290+
x <- x[o]
291+
y <- y[o]
292+
}
293+
261294
# Transform if requested
262295
if (log_transform) {
263296
# Replace all negative values with 0
@@ -266,32 +299,22 @@ detect_outlr_stl <- function(x = seq_along(y), y,
266299
y <- log(y + offset)
267300
}
268301

269-
# Make a tsibble for fabletools, setup and run STL
270-
z_tsibble <- tsibble::tsibble(x = x, y = y, index = x)
271-
272-
stl_formula <- y ~ trend(window = n_trend) +
273-
season(period = seasonal_period, window = n_seasonal)
302+
assert_int(seasonal_period, lower = 2L)
303+
assert_logical(seasonal_as_residual, len = 1L, any.missing = FALSE)
274304

275-
stl_components <- z_tsibble %>%
276-
fabletools::model(feasts::STL(stl_formula, robust = TRUE)) %>%
277-
generics::components() %>%
305+
yts <- stats::ts(y, frequency = seasonal_period)
306+
stl_comp <- stats::stl(yts,
307+
t.window = n_trend, s.window = n_seasonal,
308+
robust = TRUE
309+
)$time.series %>%
278310
tibble::as_tibble() %>%
279-
dplyr::select(.data$trend:.data$remainder) %>% #
280-
dplyr::rename_with(~"seasonal", tidyselect::starts_with("season")) %>%
281311
dplyr::rename(resid = .data$remainder)
282312

283313
# Allocate the seasonal term from STL to either fitted or resid
284-
if (!is.null(seasonal_period)) {
285-
stl_components <- stl_components %>%
286-
dplyr::mutate(
287-
fitted = .data$trend + .data$seasonal
288-
)
314+
if (!seasonal_as_residual) {
315+
stl_comp <- dplyr::mutate(stl_comp, fitted = .data$trend + .data$seasonal)
289316
} else {
290-
stl_components <- stl_components %>%
291-
dplyr::mutate(
292-
fitted = .data$trend,
293-
resid = .data$seasonal + resid
294-
)
317+
stl_comp <- dplyr::mutate(stl_comp, fitted = .data$trend, resid = .data$seasonal + .data$resid)
295318
}
296319

297320
# Detect negatives if requested
@@ -306,10 +329,7 @@ detect_outlr_stl <- function(x = seq_along(y), y,
306329

307330
# Calculate lower and upper thresholds and replacement value
308331
z <- z %>%
309-
dplyr::mutate(
310-
fitted = stl_components$fitted,
311-
resid = stl_components$resid
312-
) %>%
332+
dplyr::mutate(fitted = stl_comp$fitted, resid = stl_comp$resid) %>%
313333
roll_iqr(
314334
n = n_threshold,
315335
detection_multiplier = detection_multiplier,
@@ -337,7 +357,12 @@ roll_iqr <- function(z, n, detection_multiplier, min_radius,
337357
as_type <- as.numeric
338358
}
339359

340-
epi_slide(z, roll_iqr = stats::IQR(resid), before = floor((n - 1) / 2), after = ceiling((n - 1) / 2)) %>%
360+
z %>%
361+
epi_slide(
362+
roll_iqr = stats::IQR(resid),
363+
before = floor((n - 1) / 2),
364+
after = ceiling((n - 1) / 2)
365+
) %>%
341366
dplyr::mutate(
342367
lower = pmax(
343368
min_lower,

man/detect_outlr.Rd

+3-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/detect_outlr_stl.Rd

+19-7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vignettes/outliers.Rmd

+6-4
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,9 @@ methods.
7474
2. Detection based on a seasonal-trend decomposition using LOESS (STL), using
7575
`detect_outlr_stl()`, which is similar to the rolling median method but
7676
replaces the rolling median with fitted values from STL.
77-
3. Detection based on an STL decomposition, but without seasonality term, which
78-
amounts to smoothing using LOESS.
77+
3. Detection based on an STL decomposition, but subtracting out the seasonality
78+
term from its predictions, which may result in the extrema of large seasonal
79+
variations being considered as outliers.
7980

8081
The outlier detection methods are specified using a `tibble` that is passed to
8182
`detect_outlr()`, with one row per method, and whose columms specify the
@@ -108,9 +109,10 @@ detection_methods <- bind_rows(
108109
args = list(list(
109110
detect_negatives = TRUE,
110111
detection_multiplier = 2.5,
111-
seasonal_period = NULL
112+
seasonal_period = 7,
113+
seasonal_as_residual = TRUE
112114
)),
113-
abbr = "stl_nonseasonal"
115+
abbr = "stl_reseasonal"
114116
)
115117
)
116118

0 commit comments

Comments
 (0)