Skip to content

Commit 448d29a

Browse files
committed
simple compactify test, rename function, fix warn
1 parent c08760c commit 448d29a

File tree

8 files changed

+43
-27
lines changed

8 files changed

+43
-27
lines changed

R/archive.R

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -318,11 +318,13 @@ new_epi_archive <- function(
318318
nrow_before_compactify <- nrow(data_table)
319319
# Runs compactify on data frame
320320
if (is.null(compactify) || compactify == TRUE) {
321-
data_table <- compactify(data_table, key_vars, compactify_tol)
321+
compactified <- apply_compactify(data_table, key_vars, compactify_tol)
322+
} else{
323+
compactified <- data_table
322324
}
323325
# Warns about redundant rows if the number of rows decreased, and we didn't
324326
# explicitly say to compactify
325-
if (is.null(compactify) && nrow(data_table) < nrow_before_compactify) {
327+
if (is.null(compactify) && nrow(compactified) < nrow_before_compactify) {
326328
elim <- removed_by_compactify(data_table, key_vars, compactify_tol)
327329
warning_intro <- cli::format_inline(
328330
"Found rows that appear redundant based on
@@ -345,7 +347,7 @@ new_epi_archive <- function(
345347

346348
structure(
347349
list(
348-
DT = data_table,
350+
DT = compactified,
349351
geo_type = geo_type,
350352
time_type = time_type,
351353
additional_metadata = additional_metadata,
@@ -364,7 +366,7 @@ new_epi_archive <- function(
364366
#' changed, and so is kept.
365367
#' @keywords internal
366368
#' @importFrom dplyr filter
367-
compactify <- function(df, keys, tolerance = .Machine$double.eps^.5) {
369+
apply_compactify <- function(df, keys, tolerance = .Machine$double.eps^.5) {
368370
df %>%
369371
arrange(!!!keys) %>%
370372
filter(if_any(
@@ -379,7 +381,7 @@ compactify <- function(df, keys, tolerance = .Machine$double.eps^.5) {
379381
removed_by_compactify <- function(df, keys, tolerance) {
380382
df %>%
381383
arrange(!!!keys) %>%
382-
filter(if_any(
384+
filter(if_all(
383385
c(everything(), -version),
384386
~ is_locf(., tolerance)
385387
)) # nolint: object_usage_linter

R/revision_analysis.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ revision_summary <- function(epi_arch,
103103
if (should_compactify) {
104104
revision_behavior <- revision_behavior %>%
105105
arrange(across(c(geo_value, time_value, all_of(keys), version))) %>% # need to sort before compactifying
106-
compactify(c(keys, version), compactify_tol)
106+
apply_compactify(c(keys, version), compactify_tol)
107107
}
108108
revision_behavior <-
109109
revision_behavior %>%

man/apply_compactify.Rd

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/compactify.Rd

Lines changed: 0 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/revision_summary.Rd

Lines changed: 4 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/_snaps/revision-latency-functions.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,16 @@
5858
* 2 out of 19 (10.53%)
5959
Fraction of epi_key+time_values with
6060
No revisions:
61-
* 1 out of 7 (14.29%)
61+
* 2 out of 7 (28.57%)
6262
Quick revisions (last revision within 3 days of the `time_value`):
63-
* 3 out of 7 (42.86%)
63+
* 4 out of 7 (57.14%)
6464
Few revisions (At most 3 revisions for that `time_value`):
6565
* 6 out of 7 (85.71%)
6666
Fraction of revised epi_key+time_values which have:
6767
Less than 0.1 spread in relative value:
68-
* 3 out of 6 (50%)
68+
* 2 out of 5 (40%)
6969
Spread of more than 5.1 in actual value (when revised):
70-
* 3 out of 6 (50%)
70+
* 3 out of 5 (60%)
7171
days until within 20% of the latest value:
7272
Output
7373
min median mean max
@@ -76,10 +76,10 @@
7676
time_value geo_value n_revisions min_lag max_lag spread rel_spread
7777
<date> <chr> <dbl> <drtn> <drtn> <dbl> <dbl>
7878
1 2020-01-01 ak 6 2 days 19 days 101 0.990
79-
2 2020-01-01 al 2 0 days 19 days 99 0.99
79+
2 2020-01-01 al 1 0 days 19 days 99 0.99
8080
3 2020-01-02 ak 1 4 days 5 days 9 0.09
8181
4 2020-01-02 al 0 0 days 0 days 0 0
82-
5 2020-01-03 ak 1 3 days 4 days 0 NaN
82+
5 2020-01-03 ak 0 3 days 3 days 0 NaN
8383
6 2020-01-03 al 1 1 days 2 days 3 0.75
8484
7 2020-01-04 al 1 0 days 1 days 0 0
8585
time_near_latest

tests/testthat/test-archive.R

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ test_that("as_epi_archive custom name mapping works correctly", {
4848
)
4949
})
5050

51+
dumb_ex <- data.frame(
52+
geo_value = c("ca", "ca"),
53+
time_value = as.Date(c("2020-01-01", "2020-01-01")),
54+
value = c(1,1),
55+
version = as.Date(c("2020-01-01", "2020-01-02"))
56+
)
57+
test_that("new_epi_archive correctly detects and warns about compactification", {
58+
expect_snapshot(res <- dumb_ex %>% as_epi_archive())
59+
})
60+
5161
test_that("other_keys can only contain names of the data.frame columns", {
5262
expect_error(as_epi_archive(archive_data, other_keys = "xyz", compactify = FALSE),
5363
regexp = "`other_keys` must be contained in the column names of `x`."

tests/testthat/test-revision-latency-functions.R

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@ dummy_ex <- tibble::tribble(
3131
as_epi_archive(compactify = FALSE)
3232

3333
test_that("revision_summary works for a dummy dataset", {
34-
dummy_ex %>%
35-
revision_summary() %>%
36-
print(n = 10, width = 300)
3734
expect_snapshot(dummy_ex %>% revision_summary() %>% print(n = 10, width = 300))
3835
expect_snapshot(dummy_ex %>% revision_summary(drop_nas = FALSE) %>% print(n = 10, width = 300))
3936
})

0 commit comments

Comments
 (0)