Skip to content

Step through pre-calculated start times for each group using closure rather than using .real col in epi_slide #397

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 18 additions & 42 deletions R/slide.R
Original file line number Diff line number Diff line change
Expand Up @@ -230,37 +230,15 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
after <- time_step(after)
}

# Do set up to let us recover `ref_time_value`s later.
min_ref_time_values <- ref_time_values - before
min_ref_time_values_not_in_x <- min_ref_time_values[!(min_ref_time_values %in% unique(x$time_value))]

# Do set up to let us recover `ref_time_value`s later.
# A helper column marking real observations.
x$.real <- TRUE

# Create df containing phony data. Df has the same columns and attributes as
# `x`, but filled with `NA`s aside from grouping columns. Number of rows is
# equal to the number of `min_ref_time_values_not_in_x` we have * the
# number of unique levels seen in the grouping columns.
before_time_values_df <- data.frame(time_value = min_ref_time_values_not_in_x)
if (length(group_vars(x)) != 0) {
before_time_values_df <- dplyr::cross_join(
# Get unique combinations of grouping columns seen in real data.
unique(x[, group_vars(x)]),
before_time_values_df
)
}
# Automatically fill in all other columns from `x` with `NA`s, and carry
# attributes over to new df.
before_time_values_df <- bind_rows(x[0, ], before_time_values_df)
before_time_values_df$.real <- FALSE

x <- bind_rows(before_time_values_df, x)

# Arrange by increasing time_value
x <- arrange(x, time_value)

# Now set up starts and stops for sliding/hopping
time_range <- range(unique(x$time_value))
time_range <- range(unique(c(x$time_value, min_ref_time_values_not_in_x)))
starts <- in_range(ref_time_values - before, time_range)
stops <- in_range(ref_time_values + after, time_range)

Expand All @@ -273,7 +251,7 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,

# Computation for one group, all time values
slide_one_grp <- function(.data_group,
f, ...,
f_factory, ...,
starts,
stops,
time_values,
Expand All @@ -288,6 +266,8 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
stops <- stops[o]
time_values <- time_values[o]

f <- f_factory(starts)

# Compute the slide values
slide_values_list <- slider::hop_index(
.x = .data_group,
Expand Down Expand Up @@ -349,7 +329,6 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
# fills with NA equivalent.
vctrs::vec_slice(slide_values, o) <- orig_values
} else {
# This implicitly removes phony (`.real` == FALSE) observations.
.data_group <- filter(.data_group, o)
}
return(mutate(.data_group, !!new_col := slide_values))
Expand All @@ -372,15 +351,21 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,

f <- as_slide_computation(f, ...)
# Create a wrapper that calculates and passes `.ref_time_value` to the
# computation.
f_wrapper <- function(.x, .group_key, ...) {
.ref_time_value <- min(.x$time_value) + before
.x <- .x[.x$.real, ]
.x$.real <- NULL
f(.x, .group_key, .ref_time_value, ...)
# computation. `i` is contained in the `f_wrapper_factory` environment such
# that when called within `slide_one_grp` `i` is reset for every group.
f_wrapper_factory <- function(starts) {
# Use `i` to advance through list of start dates.
i <- 1L
starts <- starts + before
f_wrapper <- function(.x, .group_key, ...) {
.ref_time_value <- starts[[i]]
i <<- i + 1L
f(.x, .group_key, .ref_time_value, ...)
}
return(f_wrapper)
}
x <- group_modify(x, slide_one_grp,
f = f_wrapper, ...,
f_factory = f_wrapper_factory, ...,
starts = starts,
stops = stops,
time_values = ref_time_values,
Expand All @@ -394,14 +379,5 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
x <- unnest(x, !!new_col, names_sep = names_sep)
}

# Remove any remaining phony observations. When `all_rows` is TRUE, phony
# observations aren't necessarily removed in `slide_one_grp`.
if (all_rows) {
x <- x[x$.real, ]
}

# Drop helper column `.real`.
x$.real <- NULL

return(x)
}
56 changes: 34 additions & 22 deletions man/as_epi_archive.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 26 additions & 18 deletions man/as_epi_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 38 additions & 24 deletions man/detect_outlr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/detect_outlr_rm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/detect_outlr_stl.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 11 additions & 7 deletions man/epi_archive.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading