diff --git a/R/archive.R b/R/archive.R index f7a98526..9ab64e3b 100644 --- a/R/archive.R +++ b/R/archive.R @@ -16,7 +16,7 @@ #' @details An `epi_archive` is an R6 class which contains a data table `DT`, of #' class `data.table` from the `data.table` package, with (at least) the #' following columns: -#' +#' #' * `geo_value`: the geographic value associated with each row of measurements. #' * `time_value`: the time value associated with each row of measurements. #' * `version`: the time value specifying the version for each row of @@ -31,7 +31,7 @@ #' on `DT` directly). There can only be a single row per unique combination of #' key variables, and thus the key variables are critical for figuring out how #' to generate a snapshot of data from the archive, as of a given version. -#' +#' #' In general, last observation carried forward (LOCF) is used to data in #' between recorded versions. Currently, deletions must be represented as #' revising a row to a special state (e.g., making the entries `NA` or @@ -43,7 +43,7 @@ #' reference semantics. A primary consequence of this is that objects are not #' copied when modified. You can read more about this in Hadley Wickham's #' [Advanced R](https://adv-r.hadley.nz/r6.html#r6-semantics) book. -#' +#' #' @section Metadata: #' The following pieces of metadata are included as fields in an `epi_archive` #' object: @@ -75,7 +75,7 @@ #' sliding computation at any given reference time point t is performed on #' **data that would have been available as of t**. More details on `slide()` #' are documented in the wrapper function `epix_slide()`. -#' +#' #' @importFrom R6 R6Class #' @export epi_archive = @@ -89,7 +89,7 @@ epi_archive = additional_metadata = NULL, #' @description Creates a new `epi_archive` object. #' @param x A data frame, data table, or tibble, with columns `geo_value`, -#' `time_value`, `version`, and then any additional number of columns. +#' `time_value`, `version`, and then any additional number of columns. #' @param geo_type Type for the geo values. If missing, then the function will #' attempt to infer it from the geo values present; if this fails, then it #' will be set to "custom". @@ -105,12 +105,12 @@ epi_archive = #' @return An `epi_archive` object. #' @importFrom data.table as.data.table key setkeyv initialize = function(x, geo_type, time_type, other_keys, - additional_metadata) { + additional_metadata) { # Check that we have a data frame if (!is.data.frame(x)) { Abort("`x` must be a data frame.") } - + # Check that we have geo_value, time_value, version columns if (!("geo_value" %in% names(x))) { Abort("`x` must contain a `geo_value` column.") @@ -121,7 +121,7 @@ epi_archive = if (!("version" %in% names(x))) { Abort("`x` must contain a `version` column.") } - + # If geo type is missing, then try to guess it if (missing(geo_type)) { geo_type = guess_geo_type(x$geo_value) @@ -131,7 +131,7 @@ epi_archive = if (missing(time_type)) { time_type = guess_time_type(x$time_value) } - + # Finish off with small checks on keys variables and metadata if (missing(other_keys)) other_keys = NULL if (missing(additional_metadata)) additional_metadata = list() @@ -145,7 +145,7 @@ epi_archive = c("geo_type", "time_type"))) { Warn("`additional_metadata` names overlap with existing metadata fields \"geo_type\", \"time_type\".") } - + # Create the data table; if x was an un-keyed data.table itself, # then the call to as.data.table() will fail to set keys, so we # need to check this, then do it manually if needed @@ -163,8 +163,8 @@ epi_archive = cat("An `epi_archive` object, with metadata:\n") cat(sprintf("* %-9s = %s\n", "geo_type", self$geo_type)) cat(sprintf("* %-9s = %s\n", "time_type", self$time_type)) - if (!is.null(self$additional_metadata)) { - sapply(self$additional_metadata, function(m) { + if (!is.null(self$additional_metadata)) { + sapply(self$additional_metadata, function(m) { cat(sprintf("* %-9s = %s\n", names(m), m)) }) } @@ -178,10 +178,15 @@ epi_archive = cat(sprintf("* %-14s = %s\n", "max version", max(self$DT$version))) cat("----------\n") - cat(sprintf("Data archive (stored in DT field): %i x %i\n", + cat(sprintf("Data archive (stored in DT field): %i x %i\n", nrow(self$DT), ncol(self$DT))) cat("----------\n") - cat(sprintf("Public methods: %s", + cat(sprintf("Columns in DT: %s\n", paste(ifelse(length( + colnames(self$DT)) <= 4, paste(colnames(self$DT), collapse = ", "), + paste(paste(colnames(self$DT)[1:4], collapse = ", "), "and", + length(colnames(self$DT)[5:length(colnames(self$DT))]), "more columns"))))) + cat("----------\n") + cat(sprintf("Public methods: %s\n", paste(names(epi_archive$public_methods), collapse = ", "))) }, @@ -195,7 +200,7 @@ epi_archive = other_keys = setdiff(key(self$DT), c("geo_value", "time_value", "version")) if (length(other_keys) == 0) other_keys = NULL - + # Check a few things on max_version if (!identical(class(max_version), class(self$DT$version))) { Abort("`max_version` and `DT$version` must have same class.") @@ -209,17 +214,17 @@ epi_archive = if (max_version == self_max) { Warn("Getting data as of the latest version possible. For a variety of reasons, it is possible that we only have a preliminary picture of this version (e.g., the upstream source has updated it but we have not seen it due to latency in synchronization). Thus, the snapshot that we produce here might not be reproducible at a later time (e.g., when the archive has caught up in terms of synchronization).") } - + # Filter by version and return return( - # Make sure to use data.table ways of filtering and selecting + # Make sure to use data.table ways of filtering and selecting self$DT[between(time_value, min_time_value, max_version) & version <= max_version, ] %>% unique(by = c("geo_value", "time_value", other_keys), fromLast = TRUE) %>% - tibble::as_tibble() %>% + tibble::as_tibble() %>% dplyr::select(-.data$version) %>% as_epi_df(geo_type = self$geo_type, time_type = self$time_type, @@ -227,7 +232,7 @@ epi_archive = additional_metadata = c(self$additional_metadata, other_keys = other_keys)) ) - }, + }, ##### #' @description Merges another `data.table` with the current one, and allows for #' a post-filling of `NA` values by last observation carried forward (LOCF). @@ -236,7 +241,7 @@ epi_archive = merge = function(y, ..., locf = TRUE, nan = NA) { # Check we have a `data.table` object if (!(inherits(y, "data.table") || inherits(y, "epi_archive"))) { - Abort("`y` must be of class `data.table` or `epi_archive`.") + Abort("`y` must be of class `data.table` or `epi_archive`.") } # Use the data.table merge function, carrying through ... args @@ -251,25 +256,25 @@ epi_archive = # Important: use nafill and not setnafill because the latter # returns the entire data frame by reference, and the former can - # be set to act on particular columns by reference using := + # be set to act on particular columns by reference using := self$DT[, - (cols) := nafill(.SD, type = "locf", nan = nan), - .SDcols = cols, + (cols) := nafill(.SD, type = "locf", nan = nan), + .SDcols = cols, by = by] } - }, + }, ##### #' @description Slides a given function over variables in an `epi_archive` #' object. See the documentation for the wrapper function `epix_as_of()` for -#' details. +#' details. #' @importFrom data.table key #' @importFrom rlang !! !!! enquo enquos is_quosure sym syms - slide = function(f, ..., n = 7, group_by, ref_time_values, + slide = function(f, ..., n = 7, group_by, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", - all_rows = FALSE) { + all_rows = FALSE) { # If missing, then set ref time values to be everything; else make - # sure we intersect with observed time values + # sure we intersect with observed time values if (missing(ref_time_values)) { ref_time_values = unique(self$DT$time_value) } @@ -277,16 +282,16 @@ epi_archive = ref_time_values = ref_time_values[ref_time_values %in% unique(self$DT$time_value)] } - - # If a custom time step is specified, then redefine units + + # If a custom time step is specified, then redefine units before_num = n-1 if (!missing(time_step)) before_num = time_step(n-1) - + # What to group by? If missing, set according to internal keys if (missing(group_by)) { group_by = setdiff(key(self$DT), c("time_value", "version")) } - + # Symbolize column name, defuse grouping variables. We have to do # the middle step here which is a bit complicated (unfortunately) # since the function epix_slide() could have called the current one, @@ -298,20 +303,20 @@ epi_archive = # Key variable names, apart from time value and version key_vars = setdiff(key(self$DT), c("time_value", "version")) - + # Computation for one group, one time value comp_one_grp = function(.data_group, - f, ..., + f, ..., time_value, key_vars, new_col) { - # Carry out the specified computation + # Carry out the specified computation comp_value = f(.data_group, ...) # Count the number of appearances of the reference time value. # Note: ideally, we want to directly count occurrences of the ref # time value but due to latency, this will often not appear in the - # data group. So we count the number of unique key values, outside + # data group. So we count the number of unique key values, outside # of the time value column count = sum(!duplicated(.data_group[, key_vars])) @@ -345,23 +350,23 @@ epi_archive = else { Abort("The slide computation must return an atomic vector or a data frame.") } - + # Note that we've already recycled comp value to make size stable, # so tibble() will just recycle time value appropriately - return(tibble::tibble(time_value = time_value, + return(tibble::tibble(time_value = time_value, !!new_col := comp_value)) } - + # If f is not missing, then just go ahead, slide by group if (!missing(f)) { if (rlang::is_formula(f)) f = rlang::as_function(f) - + x = purrr::map_dfr(ref_time_values, function(t) { self$as_of(t, min_time_value = t - before_num) %>% - tibble::as_tibble() %>% + tibble::as_tibble() %>% dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, - f = f, ..., + f = f, ..., time_value = t, key_vars = key_vars, new_col = new_col, @@ -379,14 +384,14 @@ epi_archive = if (length(quos) > 1) { Abort("If `f` is missing then only a single computation can be specified via `...`.") } - + quo = quos[[1]] f = function(x, quo, ...) rlang::eval_tidy(quo, x) new_col = sym(names(rlang::quos_auto_name(quos))) x = purrr::map_dfr(ref_time_values, function(t) { self$as_of(t, min_time_value = t - before_num) %>% - tibble::as_tibble() %>% + tibble::as_tibble() %>% dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, f = f, quo = quo, @@ -397,12 +402,12 @@ epi_archive = dplyr::ungroup() }) } - + # Unnest if we need to if (!as_list_col) { x = tidyr::unnest(x, !!new_col, names_sep = names_sep) } - + # Join to get all rows, if we need to, then return if (all_rows) { cols = c(as.character(group_by), "time_value") @@ -413,7 +418,7 @@ epi_archive = } ) ) - + #' Convert to `epi_archive` format #' #' Converts a data frame, data table, or tibble into an `epi_archive` @@ -466,15 +471,15 @@ epi_archive = #' time_type = "day", #' other_keys = "county") as_epi_archive = function(x, geo_type, time_type, other_keys, - additional_metadata = list()) { - epi_archive$new(x, geo_type, time_type, other_keys, additional_metadata) + additional_metadata = list()) { + epi_archive$new(x, geo_type, time_type, other_keys, additional_metadata) } #' Test for `epi_archive` format #' #' @param x An object. #' @return `TRUE` if the object inherits from `epi_archive`. -#' +#' #' @export #' @examples #' is_epi_archive(jhu_csse_daily_subset) # FALSE (this is an epi_df, not epi_archive) diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R index 1bf1dfb5..11b02974 100644 --- a/R/methods-epi_df.R +++ b/R/methods-epi_df.R @@ -26,7 +26,8 @@ as_tsibble.epi_df = function(x, key, ...) { #' @method print epi_df #' @export print.epi_df = function(x, ...) { - cat("An `epi_df` object, with metadata:\n") + cat("An `epi_df` object,", prettyNum(nrow(x),","), "x", + prettyNum(ncol(x),","), "with metadata:\n") cat(sprintf("* %-9s = %s\n", "geo_type", attributes(x)$metadata$geo_type)) cat(sprintf("* %-9s = %s\n", "time_type", attributes(x)$metadata$time_type)) cat(sprintf("* %-9s = %s\n", "as_of", attributes(x)$metadata$as_of)) diff --git a/R/slide.R b/R/slide.R index 5847b130..2323107a 100644 --- a/R/slide.R +++ b/R/slide.R @@ -84,7 +84,27 @@ #' tidy evaluation (first example, above), then the name for the new column is #' inferred from the given expression and overrides any name passed explicitly #' through the `new_col_name` argument. -#' +#' +#' When `f` is a named function with arguments, if a tibble with an unnamed +#' grouping variable is passed in as the method argument to `f`, include a +#' parameter for the grouping-variable in `function()` just prior to +#' specifying the method to prevent that from being overridden. For example: +#' ``` +#' # Construct an tibble with an unnamed grouping variable +#' edf = bind_rows(tibble(geo_value = "ak", time_value = as.Date("2020-01-01") +#' + 1:10, x1=1:10, y=1:10 + rnorm(10L))) %>% +#' as_epi_df() +#' +#' # Now, include a row parameter for the grouping variable in the tibble, +#' # which we denote as g, just prior to method = "qr" +#' # Note that if g was not included below, then the method = "qr" would be +#' # overridden, as described above +#' edf %>% +#' group_by(geo_value) %>% +#' epi_slide(function(x, g, method="qr", ...) tibble(model=list( +#' lm(y ~ x1, x, method=method))), n=7L) +#' ``` +#' #' @importFrom lubridate days weeks #' @importFrom rlang .data .env !! enquo enquos sym #' @export @@ -121,7 +141,7 @@ epi_slide = function(x, f, ..., n = 7, ref_time_values, # intersect with observed time values if (missing(ref_time_values)) { ref_time_values = unique(x$time_value) - } + } else { ref_time_values = ref_time_values[ref_time_values %in% unique(x$time_value)] @@ -164,6 +184,10 @@ epi_slide = function(x, f, ..., n = 7, ref_time_values, time_range = range(unique(x$time_value)) starts = in_range(ref_time_values - before_num, time_range) stops = in_range(ref_time_values + after_num, time_range) + + if( length(starts) == 0 || length(stops) == 0 ) { + Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check your settings for ref_time_values and align (and before, if specified).") + } # Symbolize new column name new_col = sym(new_col_name) diff --git a/man/as_epi_archive.Rd b/man/as_epi_archive.Rd index a95550be..c7098f77 100644 --- a/man/as_epi_archive.Rd +++ b/man/as_epi_archive.Rd @@ -42,11 +42,15 @@ examples. } \details{ This simply a wrapper around the \code{new()} method of the \code{epi_archive} -class, so for example:\preformatted{x <- as_epi_archive(df, geo_type = "state", time_type = "day") -} +class, so for example: -would be equivalent to:\preformatted{x <- epi_archive$new(df, geo_type = "state", time_type = "day") -} +\if{html}{\out{
}}\preformatted{x <- as_epi_archive(df, geo_type = "state", time_type = "day") +}\if{html}{\out{
}} + +would be equivalent to: + +\if{html}{\out{
}}\preformatted{x <- epi_archive$new(df, geo_type = "state", time_type = "day") +}\if{html}{\out{
}} } \examples{ df <- data.frame (geo_value = c(replicate(2, "ca"), replicate(2, "fl")), diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd index fff4e714..5214b01d 100644 --- a/man/epi_archive.Rd +++ b/man/epi_archive.Rd @@ -84,17 +84,17 @@ are documented in the wrapper function \code{epix_slide()}. \section{Methods}{ \subsection{Public methods}{ \itemize{ -\item \href{#method-new}{\code{epi_archive$new()}} -\item \href{#method-print}{\code{epi_archive$print()}} -\item \href{#method-as_of}{\code{epi_archive$as_of()}} -\item \href{#method-merge}{\code{epi_archive$merge()}} -\item \href{#method-slide}{\code{epi_archive$slide()}} -\item \href{#method-clone}{\code{epi_archive$clone()}} +\item \href{#method-epi_archive-new}{\code{epi_archive$new()}} +\item \href{#method-epi_archive-print}{\code{epi_archive$print()}} +\item \href{#method-epi_archive-as_of}{\code{epi_archive$as_of()}} +\item \href{#method-epi_archive-merge}{\code{epi_archive$merge()}} +\item \href{#method-epi_archive-slide}{\code{epi_archive$slide()}} +\item \href{#method-epi_archive-clone}{\code{epi_archive$clone()}} } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-new}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-new}{}}} \subsection{Method \code{new()}}{ Creates a new \code{epi_archive} object. \subsection{Usage}{ @@ -130,8 +130,8 @@ An \code{epi_archive} object. } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-print}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-print}{}}} \subsection{Method \code{print()}}{ \subsection{Usage}{ \if{html}{\out{
}}\preformatted{epi_archive$print()}\if{html}{\out{
}} @@ -139,8 +139,8 @@ An \code{epi_archive} object. } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-as_of}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-as_of}{}}} \subsection{Method \code{as_of()}}{ Generates a snapshot in \code{epi_df} format as of a given version. See the documentation for the wrapper function \code{epix_as_of()} for details. @@ -150,8 +150,8 @@ See the documentation for the wrapper function \code{epix_as_of()} for details. } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-merge}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-merge}{}}} \subsection{Method \code{merge()}}{ Merges another \code{data.table} with the current one, and allows for a post-filling of \code{NA} values by last observation carried forward (LOCF). @@ -162,8 +162,8 @@ See the documentation for the wrapper function \code{epix_merge()} for details. } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-slide}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-slide}{}}} \subsection{Method \code{slide()}}{ Slides a given function over variables in an \code{epi_archive} object. See the documentation for the wrapper function \code{epix_as_of()} for @@ -185,8 +185,8 @@ details. } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-clone}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-epi_archive-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index 2e737293..903cb017 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -104,17 +104,41 @@ incomplete windows) is therefore left up to the user, either through the specified function or formula \code{f}, or through post-processing. If \code{f} is missing, then an expression for tidy evaluation can be specified, -for example, as in:\preformatted{epi_slide(x, cases_7dav = mean(cases), n = 7) -} +for example, as in: + +\if{html}{\out{
}}\preformatted{epi_slide(x, cases_7dav = mean(cases), n = 7) +}\if{html}{\out{
}} -which would be equivalent to:\preformatted{epi_slide(x, function(x, ...) mean(x$cases), n = 7, +which would be equivalent to: + +\if{html}{\out{
}}\preformatted{epi_slide(x, function(x, ...) mean(x$cases), n = 7, new_col_name = "cases_7dav") -} +}\if{html}{\out{
}} Thus, to be clear, when the computation is specified via an expression for tidy evaluation (first example, above), then the name for the new column is inferred from the given expression and overrides any name passed explicitly through the \code{new_col_name} argument. + +When \code{f} is a named function with arguments, if a tibble with an unnamed +grouping variable is passed in as the method argument to \code{f}, include a +parameter for the grouping-variable in \verb{function()} just prior to +specifying the method to prevent that from being overridden. For example: + +\if{html}{\out{
}}\preformatted{# Construct an tibble with an unnamed grouping variable +edf = bind_rows(tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + + 1:10, x1=1:10, y=1:10 + rnorm(10L))) \%>\% + as_epi_df() + +# Now, include a row parameter for the grouping variable in the tibble, +# which we denote as g, just prior to method = "qr" +# Note that if g was not included below, then the method = "qr" would be +# overridden, as described above +edf \%>\% +group_by(geo_value) \%>\% +epi_slide(function(x, g, method="qr", ...) tibble(model=list( + lm(y ~ x1, x, method=method))), n=7L) +}\if{html}{\out{
}} } \examples{ # slide a 7-day trailing average formula on cases diff --git a/man/epix_as_of.Rd b/man/epix_as_of.Rd index 658e7169..b5d5969c 100644 --- a/man/epix_as_of.Rd +++ b/man/epix_as_of.Rd @@ -29,11 +29,15 @@ examples. } \details{ This is simply a wrapper around the \code{as_of()} method of the -\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then:\preformatted{epix_as_of(x, max_version = v) -} +\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then: -is equivalent to:\preformatted{x$as_of(max_version = v) -} +\if{html}{\out{
}}\preformatted{epix_as_of(x, max_version = v) +}\if{html}{\out{
}} + +is equivalent to: + +\if{html}{\out{
}}\preformatted{x$as_of(max_version = v) +}\if{html}{\out{
}} } \examples{ # warning message of data latency shown diff --git a/man/epix_merge.Rd b/man/epix_merge.Rd index 781ef6fe..3d1b2e1c 100644 --- a/man/epix_merge.Rd +++ b/man/epix_merge.Rd @@ -35,11 +35,15 @@ examples. } \details{ This is simply a wrapper around the \code{merge()} method of the -\code{epi_archive} class, so if \code{x} and \code{y} are an \code{epi_archive} objects, then:\preformatted{epix_merge(x, y) -} +\code{epi_archive} class, so if \code{x} and \code{y} are an \code{epi_archive} objects, then: -is equivalent to:\preformatted{x$merge(y) -} +\if{html}{\out{
}}\preformatted{epix_merge(x, y) +}\if{html}{\out{
}} + +is equivalent to: + +\if{html}{\out{
}}\preformatted{x$merge(y) +}\if{html}{\out{
}} } \examples{ # create two example epi_archive datasets diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index f01a0a71..b6f7a323 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -115,11 +115,15 @@ should never be used in place of \code{epi_slide()}, and only used when version-aware sliding is necessary (as it its purpose). Finally, this is simply a wrapper around the \code{slide()} method of the -\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then:\preformatted{epix_slide(x, new_var = comp(old_var), n = 120) -} +\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then: -is equivalent to:\preformatted{x$slide(x, new_var = comp(old_var), n = 120) -} +\if{html}{\out{
}}\preformatted{epix_slide(x, new_var = comp(old_var), n = 120) +}\if{html}{\out{
}} + +is equivalent to: + +\if{html}{\out{
}}\preformatted{x$slide(x, new_var = comp(old_var), n = 120) +}\if{html}{\out{
}} } \examples{ # these dates are reference time points for the 3 day average sliding window diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R new file mode 100644 index 00000000..71a180c4 --- /dev/null +++ b/tests/testthat/test-epi_slide.R @@ -0,0 +1,34 @@ +## Create an epi. df and a function to test epi_slide with + +edf = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = as.Date("2020-01-01") + 1:200, value=1:200), + dplyr::tibble(geo_value = "al", time_value=as.Date("2020-01-01") + 1:5, value=-(1:5)) +) %>% + as_epi_df() + +f = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value)) + +## --- These cases generate the error: --- +test_that("`ref_time_values` + `align` that result in no slide data, generate the error", { + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")), + "starting and/or stopping times for sliding are out of bounds") # before the first, no data in the slide windows + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+207L), + "starting and/or stopping times for sliding are out of bounds") # beyond the last, no data in window +}) + +test_that("`ref_time_values` + `align` that have some slide data, but generate the error due to ref. time being out of time range", { + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01"), align="left"), + "starting and/or stopping times for sliding are out of bounds") # before the first, but we'd expect there to be data in the window + expect_error(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+201L), + "starting and/or stopping times for sliding are out of bounds") # beyond the last, but still with data in window +}) + +## --- These cases doesn't generate the error: --- +test_that("these doesn't produce an error; the error appears only if the ref time values are out of the range for every group", { + expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-01")+200L) %>% + dplyr::select("geo_value","slide_value_value"), + dplyr::tibble(geo_value = "ak", slide_value_value = 199) %>% group_by(geo_value)) # out of range for one group + expect_identical(edf %>% group_by(geo_value) %>% epi_slide(f, n=3L, ref_time_values=as.Date("2020-01-04")) %>% + dplyr::select("geo_value","slide_value_value"), + dplyr::tibble(geo_value = c("ak", "al"), slide_value_value = c(2, -2)) %>% group_by(geo_value)) # not out of range for either group +}) \ No newline at end of file