Skip to content

Commit f898bcb

Browse files
committed
WIP on .prefix =, .suffix =, .new_col_names = args
1 parent ac12ec1 commit f898bcb

File tree

5 files changed

+79
-28
lines changed

5 files changed

+79
-28
lines changed

NAMESPACE

+3
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ importFrom(checkmate,assert_list)
114114
importFrom(checkmate,assert_logical)
115115
importFrom(checkmate,assert_numeric)
116116
importFrom(checkmate,assert_scalar)
117+
importFrom(checkmate,assert_string)
117118
importFrom(checkmate,checkInt)
118119
importFrom(checkmate,check_atomic)
119120
importFrom(checkmate,check_data_frame)
@@ -176,6 +177,7 @@ importFrom(dplyr,summarize)
176177
importFrom(dplyr,tibble)
177178
importFrom(dplyr,ungroup)
178179
importFrom(ggplot2,autoplot)
180+
importFrom(glue,glue)
179181
importFrom(lifecycle,deprecated)
180182
importFrom(lubridate,as.period)
181183
importFrom(lubridate,days)
@@ -206,6 +208,7 @@ importFrom(rlang,is_quosure)
206208
importFrom(rlang,list2)
207209
importFrom(rlang,missing_arg)
208210
importFrom(rlang,new_function)
211+
importFrom(rlang,quo_get_env)
209212
importFrom(rlang,quo_is_missing)
210213
importFrom(rlang,sym)
211214
importFrom(rlang,syms)

R/epi_df.R

+5-6
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,6 @@ as_epi_df.tbl_df <- function(
232232
as_of,
233233
other_keys = character(),
234234
...) {
235-
# possible standard substitutions for time_value
236235
x <- rename(x, ...)
237236
x <- guess_column_name(x, "time_value", time_column_names())
238237
x <- guess_column_name(x, "geo_value", geo_column_names())
@@ -282,11 +281,11 @@ as_epi_df.tbl_df <- function(
282281
cli_abort("as_epi_df: `other_keys` can't include \".time_value_counts\"")
283282
}
284283

285-
duplicated_time_values <- x %>%
286-
group_by(across(all_of(c("geo_value", "time_value", other_keys)))) %>%
287-
filter(dplyr::n() > 1) %>%
288-
ungroup()
289-
if (nrow(duplicated_time_values) > 0) {
284+
if (anyDuplicated(x[c("geo_value", "time_value", other_keys)])) {
285+
duplicated_time_values <- x %>%
286+
group_by(across(all_of(c("geo_value", "time_value", other_keys)))) %>%
287+
filter(dplyr::n() > 1) %>%
288+
ungroup()
290289
bad_data <- capture.output(duplicated_time_values)
291290
cli_abort(
292291
"as_epi_df: some groups in the data have duplicated time values. epi_df requires a unique time_value per group.",

R/epiprocess-package.R

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#' @importFrom checkmate anyInfinite anyMissing assert assert_character
77
#' @importFrom checkmate assert_class assert_data_frame assert_int assert_list
88
#' @importFrom checkmate assert_logical assert_numeric assert_scalar checkInt
9+
#' @importFrom checkmate assert_string
910
#' @importFrom checkmate check_atomic check_data_frame expect_class test_int
1011
#' @importFrom checkmate check_names
1112
#' @importFrom checkmate test_subset test_set_equal vname

R/slide.R

+67-22
Original file line numberDiff line numberDiff line change
@@ -564,8 +564,9 @@ get_before_after_from_window <- function(window_size, align, time_type) {
564564
#' functions).
565565
#'
566566
#' @importFrom dplyr bind_rows mutate %>% arrange tibble select all_of
567-
#' @importFrom rlang enquo expr_label caller_arg
567+
#' @importFrom rlang enquo expr_label caller_arg quo_get_env
568568
#' @importFrom tidyselect eval_select
569+
#' @importFrom glue glue
569570
#' @importFrom purrr map map_lgl
570571
#' @importFrom data.table frollmean frollsum frollapply
571572
#' @importFrom lubridate as.period
@@ -593,6 +594,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
593594
epi_slide_opt <- function(
594595
.x, .col_names, .f, ...,
595596
.window_size = NULL, .align = c("right", "center", "left"),
597+
.prefix = NULL, .suffix = NULL, .new_col_names = NULL,
596598
.ref_time_values = NULL, .all_rows = FALSE) {
597599
assert_class(.x, "epi_df")
598600

@@ -644,21 +646,37 @@ epi_slide_opt <- function(
644646
)
645647
}
646648

649+
# The position of a given column can be differ between input `.x` and
650+
# `.data_group` since the grouping step by default drops grouping columns.
651+
# To avoid rerunning `eval_select` for every `.data_group`, convert
652+
# positions of user-provided `col_names` into string column names. We avoid
653+
# using `names(pos)` directly for robustness and in case we later want to
654+
# allow users to rename fields via tidyselection.
655+
col_names_quo <- enquo(.col_names)
656+
pos <- eval_select(col_names_quo, data = .x, allow_rename = FALSE)
657+
col_names_chr <- names(.x)[pos]
658+
647659
# Check that slide function `.f` is one of those short-listed from
648660
# `data.table` and `slider` (or a function that has the exact same
649661
# definition, e.g. if the function has been reexported or defined
650662
# locally).
651-
if (any(map_lgl(
652-
list(frollmean, frollsum, frollapply),
653-
~ identical(.f, .x)
654-
))) {
655-
f_from_package <- "data.table"
656-
} else if (any(map_lgl(
657-
list(slide_sum, slide_prod, slide_mean, slide_min, slide_max, slide_all, slide_any),
658-
~ identical(.f, .x)
659-
))) {
660-
f_from_package <- "slider"
661-
} else {
663+
f_possibilities <-
664+
tibble::tribble(
665+
~f, ~package, ~abbr,
666+
frollmean, "data.table", "av",
667+
frollsum, "data.table", "sum",
668+
frollapply, "data.table", "slide",
669+
slide_sum, "slider", "sum",
670+
slide_prod, "slider", "prod",
671+
slide_mean, "slider", "av",
672+
slide_min, "slider", "min",
673+
slide_max, "slider", "max",
674+
slide_all, "slider", "all",
675+
slide_any, "slider", "any",
676+
)
677+
f_info <- f_possibilities %>%
678+
filter(map_lgl(.data$f, ~ identical(.f, .x)))
679+
if (nrow(f_info) == 0L) {
662680
# `f` is from somewhere else and not supported
663681
cli_abort(
664682
c(
@@ -672,6 +690,43 @@ epi_slide_opt <- function(
672690
epiprocess__f = .f
673691
)
674692
}
693+
f_from_package <- f_info$package
694+
695+
assert_string(.prefix, null.ok = TRUE)
696+
assert_string(.suffix, null.ok = TRUE)
697+
assert_character(.new_col_names, len = length(col_names_chr), null.ok = TRUE)
698+
if ((!is.null(.prefix) || !is.null(.suffix)) && !is.null(.new_col_names)) {
699+
cli_abort(
700+
"Can't use both .prefix/.suffix and .new_col_names at the same time."
701+
)
702+
}
703+
if (is.null(.prefix) && is.null(.suffix) && is.null(.new_col_names)) {
704+
.suffix <- "_{.window_size}{.time_unit}{.f_abbr}"
705+
}
706+
if (!is.null(.prefix) || !is.null(.suffix)) {
707+
.prefix <- .prefix %||% ""
708+
.suffix <- .suffix %||% ""
709+
glue_env <- rlang::env(
710+
.window_size = .window_size, # FIXME typing
711+
.time_unit = "d", # FIXME
712+
.f_abbr = f_info$abbr,
713+
quo_get_env(col_names_quo)
714+
)
715+
.new_col_names <- unclass(
716+
glue(.prefix, .envir = glue_env) +
717+
col_names_chr +
718+
glue(.suffix, .envir = glue_env)
719+
)
720+
} else {
721+
# `.new_col_names` was provided by user; we don't need to do anything.
722+
}
723+
if (any(.new_col_names %in% names(.x))) {
724+
cli_abort(c(
725+
"Naming conflict between new columns and existing columns",
726+
"x" = "Overlapping names: {format_varnames(intersect(.new_col_names, names(.x)))}"
727+
))
728+
}
729+
result_col_names <- .new_col_names
675730

676731
user_provided_rtvs <- !is.null(.ref_time_values)
677732
if (!user_provided_rtvs) {
@@ -708,16 +763,6 @@ epi_slide_opt <- function(
708763
pad_early_dates <- date_seq_list$pad_early_dates
709764
pad_late_dates <- date_seq_list$pad_late_dates
710765

711-
# The position of a given column can be differ between input `.x` and
712-
# `.data_group` since the grouping step by default drops grouping columns.
713-
# To avoid rerunning `eval_select` for every `.data_group`, convert
714-
# positions of user-provided `col_names` into string column names. We avoid
715-
# using `names(pos)` directly for robustness and in case we later want to
716-
# allow users to rename fields via tidyselection.
717-
pos <- eval_select(enquo(.col_names), data = .x, allow_rename = FALSE)
718-
col_names_chr <- names(.x)[pos]
719-
# Always rename results to "slide_value_<original column name>".
720-
result_col_names <- paste0("slide_value_", col_names_chr)
721766
slide_one_grp <- function(.data_group, .group_key, ...) {
722767
missing_times <- all_dates[!(all_dates %in% .data_group$time_value)]
723768
# `frollmean` requires a full window to compute a result. Add NA values

man/epi_slide_opt.Rd

+3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)