Skip to content

Commit 2014016

Browse files
authored
Merge branch 'dev' into lcb/make_epix_slide_more_like_reframe
2 parents 8985db2 + d7f3521 commit 2014016

24 files changed

+305
-44
lines changed

DESCRIPTION

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ Description: This package introduces a common data structure for epidemiological
2121
work with revisions to these data sets over time, and offers associated
2222
utilities to perform basic signal processing tasks.
2323
License: MIT + file LICENSE
24-
Imports:
24+
Imports:
25+
cli,
2526
data.table,
2627
dplyr (>= 1.0.0),
2728
fabletools,
@@ -48,7 +49,7 @@ Suggests:
4849
knitr,
4950
outbreaks,
5051
rmarkdown,
51-
testthat (>= 3.0.0),
52+
testthat (>= 3.1.5),
5253
waldo (>= 0.3.1),
5354
withr
5455
VignetteBuilder:

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,15 @@ importFrom(dplyr,ungroup)
8686
importFrom(lubridate,days)
8787
importFrom(lubridate,weeks)
8888
importFrom(magrittr,"%>%")
89+
importFrom(purrr,map_lgl)
8990
importFrom(rlang,"!!!")
9091
importFrom(rlang,"!!")
9192
importFrom(rlang,.data)
9293
importFrom(rlang,.env)
9394
importFrom(rlang,arg_match)
9495
importFrom(rlang,enquo)
9596
importFrom(rlang,enquos)
97+
importFrom(rlang,is_missing)
9698
importFrom(rlang,is_quosure)
9799
importFrom(rlang,quo_is_missing)
98100
importFrom(rlang,sym)
@@ -104,3 +106,4 @@ importFrom(tidyr,unnest)
104106
importFrom(tidyselect,eval_select)
105107
importFrom(tidyselect,starts_with)
106108
importFrom(tsibble,as_tsibble)
109+
importFrom(utils,tail)

R/data.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
#'
6666
#' Modifications:
6767
#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From the COVIDcast Doctor Visits API}: The signal `percent_cli` is taken directly from the API without changes.
68-
#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: `case_rate_7d_av` is taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes. The 7-day average signals are computed by Delphi by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
68+
#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: `case_rate_7d_av` signal was computed by Delphi from the original JHU-CSSE data by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
6969
#' * Furthermore, the data is a subset of the full dataset, the signal names slightly altered, and formatted into a tibble.
7070
#'
7171
#' @export

R/grouped_epi_archive.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,11 @@ grouped_epi_archive =
230230
ref_time_values = sort(ref_time_values)
231231
}
232232

233+
# Check that `f` takes enough args
234+
if (!missing(f) && is.function(f)) {
235+
assert_sufficient_f_args(f, ...)
236+
}
237+
233238
# Validate and pre-process `before`:
234239
if (missing(before)) {
235240
Abort("`before` is required (and must be passed by name);

R/methods-epi_archive.R

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,8 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
800800
#' as.Date("2020-06-15"),
801801
#' by = "1 day")
802802
#'
803+
#' # A simple (but not very useful) example (see the archive vignette for a more
804+
#' # realistic one):
803805
#' archive_cases_dv_subset %>%
804806
#' group_by(geo_value) %>%
805807
#' epix_slide(f = ~ mean(.x$case_rate_7d_av),
@@ -811,39 +813,71 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
811813
#' # values. The actual number of `time_value`s in each computation depends on
812814
#' # the reporting latency of the signal and `time_value` range covered by the
813815
#' # archive (2020-06-01 -- 2021-11-30 in this example). In this case, we have
814-
#' # 0 `time_value`s, for ref time 2020-06-01 --> the result is automatically discarded
815-
#' # 1 `time_value`, for ref time 2020-06-02
816-
#' # 2 `time_value`s, for the rest of the results
817-
#' # never 3 `time_value`s, due to data latency
818-
#'
819-
#'
816+
#' # * 0 `time_value`s, for ref time 2020-06-01 --> the result is automatically
817+
#' # discarded
818+
#' # * 1 `time_value`, for ref time 2020-06-02
819+
#' # * 2 `time_value`s, for the rest of the results
820+
#' # * never the 3 `time_value`s we would get from `epi_slide`, since, because
821+
#' # of data latency, we'll never have an observation
822+
#' # `time_value == ref_time_value` as of `ref_time_value`.
823+
#' # The example below shows this type of behavior in more detail.
824+
#'
825+
#' # Examining characteristics of the data passed to each computation with
826+
#' # `all_versions=FALSE`.
827+
#' archive_cases_dv_subset %>%
828+
#' group_by(geo_value) %>%
829+
#' epix_slide(
830+
#' function(x, g) {
831+
#' tibble(
832+
#' time_range = if(nrow(x) == 0L) {
833+
#' "0 `time_value`s"
834+
#' } else {
835+
#' sprintf("%s -- %s", min(x$time_value), max(x$time_value))
836+
#' },
837+
#' n = nrow(x),
838+
#' class1 = class(x)[[1L]]
839+
#' )
840+
#' },
841+
#' before = 5, all_versions = FALSE,
842+
#' ref_time_values = ref_time_values, names_sep=NULL) %>%
843+
#' ungroup() %>%
844+
#' arrange(geo_value, time_value)
820845
#'
821846
#' # --- Advanced: ---
822847
#'
823848
#' # `epix_slide` with `all_versions=FALSE` (the default) applies a
824849
#' # version-unaware computation to several versions of the data. We can also
825850
#' # use `all_versions=TRUE` to apply a version-*aware* computation to several
826-
#' # versions of the data. In this case, each computation should expect an
851+
#' # versions of the data, again looking at characteristics of the data passed
852+
#' # to each computation. In this case, each computation should expect an
827853
#' # `epi_archive` containing the relevant version data:
828854
#'
829855
#' archive_cases_dv_subset %>%
830856
#' group_by(geo_value) %>%
831857
#' epix_slide(
832858
#' function(x, g) {
833859
#' tibble(
834-
#' versions_end = max(x$versions_end),
860+
#' versions_start = if (nrow(x$DT) == 0L) {
861+
#' "NA (0 rows)"
862+
#' } else {
863+
#' toString(min(x$DT$version))
864+
#' },
865+
#' versions_end = x$versions_end,
835866
#' time_range = if(nrow(x$DT) == 0L) {
836867
#' "0 `time_value`s"
837868
#' } else {
838869
#' sprintf("%s -- %s", min(x$DT$time_value), max(x$DT$time_value))
839870
#' },
871+
#' n = nrow(x$DT),
840872
#' class1 = class(x)[[1L]]
841873
#' )
842874
#' },
843-
#' before = 2, all_versions = TRUE,
875+
#' before = 5, all_versions = TRUE,
844876
#' ref_time_values = ref_time_values, names_sep=NULL) %>%
845877
#' ungroup() %>%
846-
#' arrange(geo_value, time_value)
878+
#' # Focus on one geo_value so we can better see the columns above:
879+
#' filter(geo_value == "ca") %>%
880+
#' select(-geo_value)
847881
#'
848882
#' @importFrom rlang enquo !!!
849883
#' @export

R/slide.R

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,12 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
156156

157157
# Check we have an `epi_df` object
158158
if (!inherits(x, "epi_df")) Abort("`x` must be of class `epi_df`.")
159-
159+
160+
# Check that `f` takes enough args
161+
if (!missing(f) && is.function(f)) {
162+
assert_sufficient_f_args(f, ...)
163+
}
164+
160165
# Arrange by increasing time_value
161166
x = arrange(x, time_value)
162167

R/utils.R

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,87 @@ paste_lines = function(lines) {
100100
Abort = function(msg, ...) rlang::abort(break_str(msg, init = "Error: "), ...)
101101
Warn = function(msg, ...) rlang::warn(break_str(msg, init = "Warning: "), ...)
102102

103+
#' Assert that a sliding computation function takes enough args
104+
#'
105+
#' @param f Function; specifies a computation to slide over an `epi_df` or
106+
#' `epi_archive` in `epi_slide` or `epix_slide`.
107+
#' @param ... Dots that will be forwarded to `f` from the dots of `epi_slide` or
108+
#' `epix_slide`.
109+
#'
110+
#' @importFrom rlang is_missing
111+
#' @importFrom purrr map_lgl
112+
#' @importFrom utils tail
113+
#'
114+
#' @noRd
115+
assert_sufficient_f_args <- function(f, ...) {
116+
mandatory_f_args_labels <- c("window data", "group key")
117+
n_mandatory_f_args <- length(mandatory_f_args_labels)
118+
args = formals(args(f))
119+
args_names = names(args)
120+
# Remove named arguments forwarded from `epi[x]_slide`'s `...`:
121+
forwarded_dots_names = names(rlang::call_match(dots_expand = FALSE)[["..."]])
122+
args_matched_in_dots =
123+
# positional calling args will skip over args matched by named calling args
124+
args_names %in% forwarded_dots_names &
125+
# extreme edge case: `epi[x]_slide(<stuff>, dot = 1, `...` = 2)`
126+
args_names != "..."
127+
remaining_args = args[!args_matched_in_dots]
128+
remaining_args_names = names(remaining_args)
129+
# note that this doesn't include unnamed args forwarded through `...`.
130+
dots_i <- which(remaining_args_names == "...") # integer(0) if no match
131+
n_f_args_before_dots <- dots_i - 1L
132+
if (length(dots_i) != 0L) { # `f` has a dots "arg"
133+
# Keep all arg names before `...`
134+
mandatory_args_mapped_names <- remaining_args_names[seq_len(n_f_args_before_dots)]
135+
136+
if (n_f_args_before_dots < n_mandatory_f_args) {
137+
mandatory_f_args_in_f_dots =
138+
tail(mandatory_f_args_labels, n_mandatory_f_args - n_f_args_before_dots)
139+
cli::cli_warn(
140+
"`f` might not have enough positional arguments before its `...`; in the current `epi[x]_slide` call, the {mandatory_f_args_in_f_dots} will be included in `f`'s `...`; if `f` doesn't expect those arguments, it may produce confusing error messages",
141+
class = "epiprocess__assert_sufficient_f_args__mandatory_f_args_passed_to_f_dots",
142+
epiprocess__f = f,
143+
epiprocess__mandatory_f_args_in_f_dots = mandatory_f_args_in_f_dots
144+
)
145+
}
146+
} else { # `f` doesn't have a dots "arg"
147+
if (length(args_names) < n_mandatory_f_args + rlang::dots_n(...)) {
148+
# `f` doesn't take enough args.
149+
if (rlang::dots_n(...) == 0L) {
150+
# common case; try for friendlier error message
151+
Abort(sprintf("`f` must take at least %s arguments", n_mandatory_f_args),
152+
class = "epiprocess__assert_sufficient_f_args__f_needs_min_args",
153+
epiprocess__f = f)
154+
} else {
155+
# less common; highlight that they are (accidentally?) using dots forwarding
156+
Abort(sprintf("`f` must take at least %s arguments plus the %s arguments forwarded through `epi[x]_slide`'s `...`, or a named argument to `epi[x]_slide` was misspelled", n_mandatory_f_args, rlang::dots_n(...)),
157+
class = "epiprocess__assert_sufficient_f_args__f_needs_min_args_plus_forwarded",
158+
epiprocess__f = f)
159+
}
160+
}
161+
}
162+
# Check for args with defaults that are filled with mandatory positional
163+
# calling args. If `f` has fewer than n_mandatory_f_args before `...`, then we
164+
# only need to check those args for defaults. Note that `n_f_args_before_dots` is
165+
# length 0 if `f` doesn't accept `...`.
166+
n_remaining_args_for_default_check = min(c(n_f_args_before_dots, n_mandatory_f_args))
167+
default_check_args = remaining_args[seq_len(n_remaining_args_for_default_check)]
168+
default_check_args_names = names(default_check_args)
169+
has_default_replaced_by_mandatory = map_lgl(default_check_args, ~!is_missing(.x))
170+
if (any(has_default_replaced_by_mandatory)) {
171+
default_check_mandatory_args_labels =
172+
mandatory_f_args_labels[seq_len(n_remaining_args_for_default_check)]
173+
# ^ excludes any mandatory args absorbed by f's `...`'s:
174+
mandatory_args_replacing_defaults =
175+
default_check_mandatory_args_labels[has_default_replaced_by_mandatory]
176+
args_with_default_replaced_by_mandatory =
177+
rlang::syms(default_check_args_names[has_default_replaced_by_mandatory])
178+
cli::cli_abort("`epi[x]_slide` would pass the {mandatory_args_replacing_defaults} to `f`'s {args_with_default_replaced_by_mandatory} argument{?s}, which {?has a/have} default value{?s}; we suspect that `f` doesn't expect {?this arg/these args} at all and may produce confusing error messages. Please add additional arguments to `f` or remove defaults as appropriate.",
179+
class = "epiprocess__assert_sufficient_f_args__required_args_contain_defaults",
180+
epiprocess__f = f)
181+
}
182+
}
183+
103184
##########
104185

105186
in_range = function(x, rng) pmin(pmax(x, rng[1]), rng[2])

data-raw/archive_cases_dv_subset.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ dv_subset <- covidcast(
1212
geo_values = "ca,fl,ny,tx",
1313
issues = epirange(20200601, 20211201)
1414
) %>%
15-
fetch_tbl() %>%
15+
fetch() %>%
1616
select(geo_value, time_value, version = issue, percent_cli = value) %>%
1717
# We're using compactify=FALSE here and below to avoid some testthat test
1818
# failures on tests that were based on a non-compactified version.
@@ -27,7 +27,7 @@ case_rate_subset <- covidcast(
2727
geo_values = "ca,fl,ny,tx",
2828
issues = epirange(20200601, 20211201)
2929
) %>%
30-
fetch_tbl() %>%
30+
fetch() %>%
3131
select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>%
3232
as_epi_archive(compactify=FALSE)
3333

data-raw/incidence_num_outlier_example.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ incidence_num_outlier_example <- covidcast(
1212
geo_values = "fl,nj",
1313
as_of = 20211028
1414
) %>%
15-
fetch_tbl() %>%
15+
fetch() %>%
1616
select(geo_value, time_value, cases = value) %>%
1717
as_epi_df()
1818

data-raw/jhu_csse_county_level_subset.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jhu_csse_county_level_subset <- covidcast(
1717
time_values = epirange(20200601, 20211231),
1818
geo_values = paste(y$geo_value, collapse = ",")
1919
) %>%
20-
fetch_tbl() %>%
20+
fetch() %>%
2121
select(geo_value, time_value, cases = value) %>%
2222
full_join(y, by = "geo_value") %>%
2323
as_epi_df()

0 commit comments

Comments
 (0)