@@ -537,7 +537,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
537
537
# '
538
538
# ' @template basic-slide-params
539
539
# ' @param .col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column
540
- # ' name(e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
540
+ # ' name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
541
541
# ' [other tidy-select expression][tidyselect::language], or a vector of
542
542
# ' characters (e.g. `c("cases", "deaths")`). Variable names can be used as if
543
543
# ' they were positions in the data frame, so expressions like `x:y` can be
@@ -559,13 +559,40 @@ get_before_after_from_window <- function(window_size, align, time_type) {
559
559
# ' `epi_slide_mean` and `epi_slide_sum`) take care of window completion
560
560
# ' automatically to prevent associated errors.
561
561
# ' @param ... Additional arguments to pass to the slide computation `.f`, for
562
- # ' example, `algo` or `na.rm` in data.table functions. You don't need to
563
- # ' specify `.x`, `.window_size`, or `.align` (or `before`/`after` for slider
564
- # ' functions).
562
+ # ' example, `algo` or `na.rm` in data.table functions. You don't need to
563
+ # ' specify `.x`, `.window_size`, or `.align` (or `before`/`after` for slider
564
+ # ' functions).
565
+ # ' @param .prefix Optional [`glue::glue`] format string; name the slide result
566
+ # ' column(s) by attaching this prefix to the corresponding input column(s).
567
+ # ' Some shorthand is supported for basing the output names on `.window_size`
568
+ # ' or other arguments; see "Prefix and suffix shorthand" below.
569
+ # ' @param .suffix Optional [`glue::glue`] format string; like `.prefix`. The
570
+ # ' default naming behavior is equivalent to `.suffix =
571
+ # ' "_{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"`. Can be used in combination
572
+ # ' with `.prefix`.
573
+ # ' @param .new_col_names Optional character vector with length matching the
574
+ # ' number of input columns from `.col_names`; name the slide result column(s)
575
+ # ' with these names. Cannot be used in combination with `.prefix` and/or
576
+ # ' `.suffix`.
577
+ # '
578
+ # ' @section Prefix and suffix shorthand:
579
+ # '
580
+ # ' [`glue::glue`] format strings specially interpret content within curly
581
+ # ' braces. E.g., `glue::glue("ABC{2 + 2}")` evaluates to `"ABC4"`. For `.prefix`
582
+ # ' and `.suffix`, we provide `glue` with some additional variable bindings:
583
+ # '
584
+ # ' - `{.n}` will be the number of time steps in the computation
585
+ # ' corresponding to the `.window_size`.
586
+ # ' - `{.time_unit_abbr}` will be a lower-case letter corresponding to the
587
+ # ' `time_type` of `.x`
588
+ # ' - `{.align_abbr}` will be `""` if `.align` is the default of `"right"`;
589
+ # ' otherwise, it will be the first letter of `.align`
590
+ # ' - `{.f_abbr}` will be a short string based on what `.f`
565
591
# '
566
592
# ' @importFrom dplyr bind_rows mutate %>% arrange tibble select all_of
567
- # ' @importFrom rlang enquo expr_label caller_arg
593
+ # ' @importFrom rlang enquo expr_label caller_arg quo_get_env
568
594
# ' @importFrom tidyselect eval_select
595
+ # ' @importFrom glue glue
569
596
# ' @importFrom purrr map map_lgl
570
597
# ' @importFrom data.table frollmean frollsum frollapply
571
598
# ' @importFrom lubridate as.period
@@ -577,8 +604,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
577
604
# ' # Compute a 7-day trailing average on cases.
578
605
# ' cases_deaths_subset %>%
579
606
# ' group_by(geo_value) %>%
580
- # ' epi_slide_opt(cases, .f = data.table::frollmean, .window_size = 7) %>%
581
- # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases)
607
+ # ' epi_slide_opt(cases, .f = data.table::frollmean, .window_size = 7)
582
608
# '
583
609
# ' # Same as above, but adjust `frollmean` settings for speed, accuracy, and
584
610
# ' # to allow partially-missing windows.
@@ -588,11 +614,11 @@ get_before_after_from_window <- function(window_size, align, time_type) {
588
614
# ' cases,
589
615
# ' .f = data.table::frollmean, .window_size = 7,
590
616
# ' algo = "exact", hasNA = TRUE, na.rm = TRUE
591
- # ' ) %>%
592
- # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases)
617
+ # ' )
593
618
epi_slide_opt <- function (
594
619
.x , .col_names , .f , ... ,
595
620
.window_size = NULL , .align = c(" right" , " center" , " left" ),
621
+ .prefix = NULL , .suffix = NULL , .new_col_names = NULL ,
596
622
.ref_time_values = NULL , .all_rows = FALSE ) {
597
623
assert_class(.x , " epi_df" )
598
624
@@ -620,7 +646,7 @@ epi_slide_opt <- function(
620
646
if (" new_col_name" %in% provided_args || " .new_col_name" %in% provided_args ) {
621
647
cli :: cli_abort(
622
648
" epi_slide_opt: the argument `new_col_name` is not supported for `epi_slide_opt`. If you want to customize
623
- the output column names, use `dplyr::rename` after the slide ." ,
649
+ the output column names, use `.prefix =`, `.suffix =`, or `.new_col_**names** =` ." ,
624
650
class = " epiprocess__epi_slide_opt__new_name_not_supported"
625
651
)
626
652
}
@@ -644,21 +670,37 @@ epi_slide_opt <- function(
644
670
)
645
671
}
646
672
673
+ # The position of a given column can be differ between input `.x` and
674
+ # `.data_group` since the grouping step by default drops grouping columns.
675
+ # To avoid rerunning `eval_select` for every `.data_group`, convert
676
+ # positions of user-provided `col_names` into string column names. We avoid
677
+ # using `names(pos)` directly for robustness and in case we later want to
678
+ # allow users to rename fields via tidyselection.
679
+ col_names_quo <- enquo(.col_names )
680
+ pos <- eval_select(col_names_quo , data = .x , allow_rename = FALSE )
681
+ col_names_chr <- names(.x )[pos ]
682
+
647
683
# Check that slide function `.f` is one of those short-listed from
648
684
# `data.table` and `slider` (or a function that has the exact same
649
685
# definition, e.g. if the function has been reexported or defined
650
686
# locally).
651
- if (any(map_lgl(
652
- list (frollmean , frollsum , frollapply ),
653
- ~ identical(.f , .x )
654
- ))) {
655
- f_from_package <- " data.table"
656
- } else if (any(map_lgl(
657
- list (slide_sum , slide_prod , slide_mean , slide_min , slide_max , slide_all , slide_any ),
658
- ~ identical(.f , .x )
659
- ))) {
660
- f_from_package <- " slider"
661
- } else {
687
+ f_possibilities <-
688
+ tibble :: tribble(
689
+ ~ f , ~ package , ~ abbr ,
690
+ frollmean , " data.table" , " av" ,
691
+ frollsum , " data.table" , " sum" ,
692
+ frollapply , " data.table" , " slide" ,
693
+ slide_sum , " slider" , " sum" ,
694
+ slide_prod , " slider" , " prod" ,
695
+ slide_mean , " slider" , " av" ,
696
+ slide_min , " slider" , " min" ,
697
+ slide_max , " slider" , " max" ,
698
+ slide_all , " slider" , " all" ,
699
+ slide_any , " slider" , " any" ,
700
+ )
701
+ f_info <- f_possibilities %> %
702
+ filter(map_lgl(.data $ f , ~ identical(.f , .x )))
703
+ if (nrow(f_info ) == 0L ) {
662
704
# `f` is from somewhere else and not supported
663
705
cli_abort(
664
706
c(
@@ -672,6 +714,7 @@ epi_slide_opt <- function(
672
714
epiprocess__f = .f
673
715
)
674
716
}
717
+ f_from_package <- f_info $ package
675
718
676
719
user_provided_rtvs <- ! is.null(.ref_time_values )
677
720
if (! user_provided_rtvs ) {
@@ -702,22 +745,72 @@ epi_slide_opt <- function(
702
745
validate_slide_window_arg(.window_size , time_type )
703
746
window_args <- get_before_after_from_window(.window_size , .align , time_type )
704
747
748
+ # Handle output naming
749
+ if ((! is.null(.prefix ) || ! is.null(.suffix )) && ! is.null(.new_col_names )) {
750
+ cli_abort(
751
+ " Can't use both .prefix/.suffix and .new_col_names at the same time." ,
752
+ class = " epiprocess__epi_slide_opt_incompatible_naming_args"
753
+ )
754
+ }
755
+ assert_string(.prefix , null.ok = TRUE )
756
+ assert_string(.suffix , null.ok = TRUE )
757
+ assert_character(.new_col_names , len = length(col_names_chr ), null.ok = TRUE )
758
+ if (is.null(.prefix ) && is.null(.suffix ) && is.null(.new_col_names )) {
759
+ .suffix <- " _{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"
760
+ # ^ does not account for any arguments specified to underlying functions via
761
+ # `...` such as `na.rm =`, nor does it distinguish between functions from
762
+ # different packages accomplishing the same type of computation. Those are
763
+ # probably only set one way per task, so this probably produces cleaner
764
+ # names without clashes (though maybe some confusion if switching between
765
+ # code with different settings).
766
+ }
767
+ if (! is.null(.prefix ) || ! is.null(.suffix )) {
768
+ .prefix <- .prefix %|| % " "
769
+ .suffix <- .suffix %|| % " "
770
+ if (identical(.window_size , Inf )) {
771
+ n <- " running_"
772
+ time_unit_abbr <- " "
773
+ align_abbr <- " "
774
+ } else {
775
+ n <- time_delta_to_n_steps(.window_size , time_type )
776
+ time_unit_abbr <- time_type_unit_abbr(time_type )
777
+ align_abbr <- c(right = " " , center = " c" , left = " l" )[[.align ]]
778
+ }
779
+ glue_env <- rlang :: env(
780
+ .n = n ,
781
+ .time_unit_abbr = time_unit_abbr ,
782
+ .align_abbr = align_abbr ,
783
+ .f_abbr = f_info $ abbr ,
784
+ quo_get_env(col_names_quo )
785
+ )
786
+ .new_col_names <- unclass(
787
+ glue(.prefix , .envir = glue_env ) +
788
+ col_names_chr +
789
+ glue(.suffix , .envir = glue_env )
790
+ )
791
+ } else {
792
+ # `.new_col_names` was provided by user; we don't need to do anything.
793
+ }
794
+ if (any(.new_col_names %in% names(.x ))) {
795
+ cli_abort(c(
796
+ " Naming conflict between new columns and existing columns" ,
797
+ " x" = " Overlapping names: {format_varnames(intersect(.new_col_names, names(.x)))}"
798
+ ), class = " epiprocess__epi_slide_opt_old_new_name_conflict" )
799
+ }
800
+ if (anyDuplicated(.new_col_names )) {
801
+ cli_abort(c(
802
+ " New column names contain duplicates" ,
803
+ " x" = " Duplicated names: {format_varnames(unique(.new_col_names[duplicated(.new_col_names)]))}"
804
+ ), class = " epiprocess__epi_slide_opt_new_name_duplicated" )
805
+ }
806
+ result_col_names <- .new_col_names
807
+
705
808
# Make a complete date sequence between min(.x$time_value) and max(.x$time_value).
706
809
date_seq_list <- full_date_seq(.x , window_args $ before , window_args $ after , time_type )
707
810
all_dates <- date_seq_list $ all_dates
708
811
pad_early_dates <- date_seq_list $ pad_early_dates
709
812
pad_late_dates <- date_seq_list $ pad_late_dates
710
813
711
- # The position of a given column can be differ between input `.x` and
712
- # `.data_group` since the grouping step by default drops grouping columns.
713
- # To avoid rerunning `eval_select` for every `.data_group`, convert
714
- # positions of user-provided `col_names` into string column names. We avoid
715
- # using `names(pos)` directly for robustness and in case we later want to
716
- # allow users to rename fields via tidyselection.
717
- pos <- eval_select(enquo(.col_names ), data = .x , allow_rename = FALSE )
718
- col_names_chr <- names(.x )[pos ]
719
- # Always rename results to "slide_value_<original column name>".
720
- result_col_names <- paste0(" slide_value_" , col_names_chr )
721
814
slide_one_grp <- function (.data_group , .group_key , ... ) {
722
815
missing_times <- all_dates [! (all_dates %in% .data_group $ time_value )]
723
816
# `frollmean` requires a full window to compute a result. Add NA values
@@ -827,8 +920,7 @@ epi_slide_opt <- function(
827
920
# ' # Compute a 7-day trailing average on cases.
828
921
# ' cases_deaths_subset %>%
829
922
# ' group_by(geo_value) %>%
830
- # ' epi_slide_mean(cases, .window_size = 7) %>%
831
- # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases)
923
+ # ' epi_slide_mean(cases, .window_size = 7)
832
924
# '
833
925
# ' # Same as above, but adjust `frollmean` settings for speed, accuracy, and
834
926
# ' # to allow partially-missing windows.
@@ -838,11 +930,11 @@ epi_slide_opt <- function(
838
930
# ' cases,
839
931
# ' .window_size = 7,
840
932
# ' na.rm = TRUE, algo = "exact", hasNA = TRUE
841
- # ' ) %>%
842
- # ' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases)
933
+ # ' )
843
934
epi_slide_mean <- function (
844
935
.x , .col_names , ... ,
845
936
.window_size = NULL , .align = c(" right" , " center" , " left" ),
937
+ .prefix = NULL , .suffix = NULL , .new_col_names = NULL ,
846
938
.ref_time_values = NULL , .all_rows = FALSE ) {
847
939
# Deprecated argument handling
848
940
provided_args <- rlang :: call_args_names(rlang :: call_match())
@@ -885,6 +977,9 @@ epi_slide_mean <- function(
885
977
... ,
886
978
.window_size = .window_size ,
887
979
.align = .align ,
980
+ .prefix = .prefix ,
981
+ .suffix = .suffix ,
982
+ .new_col_names = .new_col_names ,
888
983
.ref_time_values = .ref_time_values ,
889
984
.all_rows = .all_rows
890
985
)
@@ -899,11 +994,11 @@ epi_slide_mean <- function(
899
994
# ' # Compute a 7-day trailing sum on cases.
900
995
# ' cases_deaths_subset %>%
901
996
# ' group_by(geo_value) %>%
902
- # ' epi_slide_sum(cases, .window_size = 7) %>%
903
- # ' dplyr::select(geo_value, time_value, cases, cases_7dsum = slide_value_cases)
997
+ # ' epi_slide_sum(cases, .window_size = 7)
904
998
epi_slide_sum <- function (
905
999
.x , .col_names , ... ,
906
1000
.window_size = NULL , .align = c(" right" , " center" , " left" ),
1001
+ .prefix = NULL , .suffix = NULL , .new_col_names = NULL ,
907
1002
.ref_time_values = NULL , .all_rows = FALSE ) {
908
1003
# Deprecated argument handling
909
1004
provided_args <- rlang :: call_args_names(rlang :: call_match())
@@ -945,6 +1040,9 @@ epi_slide_sum <- function(
945
1040
... ,
946
1041
.window_size = .window_size ,
947
1042
.align = .align ,
1043
+ .prefix = .prefix ,
1044
+ .suffix = .suffix ,
1045
+ .new_col_names = .new_col_names ,
948
1046
.ref_time_values = .ref_time_values ,
949
1047
.all_rows = .all_rows
950
1048
)
0 commit comments