Skip to content

Commit c5cfb5e

Browse files
authored
Merge pull request #196 from cmu-delphi/192-to-194-update_epi_df_indexing_fun
Addressed issues 192-194
2 parents 9259796 + c2ced25 commit c5cfb5e

File tree

3 files changed

+84
-24
lines changed

3 files changed

+84
-24
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ Imports:
3737
tidyr,
3838
tidyselect,
3939
tsibble,
40-
utils,
41-
vctrs
40+
utils
4241
Suggests:
4342
covidcast,
4443
epidatr,
@@ -47,6 +46,7 @@ Suggests:
4746
outbreaks,
4847
rmarkdown,
4948
testthat (>= 3.0.0),
49+
vctrs,
5050
waldo (>= 0.3.1),
5151
withr
5252
VignetteBuilder:

R/methods-epi_df.R

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,22 +71,37 @@ summary.epi_df = function(object, ...) {
7171

7272
if (missing(i)) {
7373
i <- NULL
74-
i_arg <- NULL
7574
}
7675

7776
if (missing(j)) {
7877
j <- NULL
79-
j_arg <- NULL
8078
}
8179

8280
cn <- names(res)
83-
nr <- vctrs::vec_size(x)
84-
not_epi_df <- (!("time_value" %in% cn) || !("geo_value" %in% cn) || vctrs::vec_size(res) > nr || any(i > nr))
8581

86-
if (not_epi_df) return(tibble::as_tibble(res))
82+
# Duplicate columns, Abort
83+
dup_col_names = cn[duplicated(cn)]
84+
if (length(dup_col_names) != 0) {
85+
Abort(paste0("Column name(s) ",
86+
paste(unique(dup_col_names),
87+
collapse = ", "), " must not be duplicated."))
88+
}
89+
90+
not_epi_df <- !("time_value" %in% cn) || !("geo_value" %in% cn)
91+
92+
if (not_epi_df) {
93+
attributes(res)$metadata <- NULL
94+
return(tibble::as_tibble(res))
95+
}
96+
97+
# Use reclass as safeguard (in case class &/or metadata are dropped)
98+
res <- reclass(res, attr(x, "metadata"))
99+
100+
# Amend additional metadata if some other_keys cols are dropped in the subset
101+
old_other_keys = attr(x, "metadata")$other_keys
102+
attr(res, "metadata")$other_keys <- old_other_keys[old_other_keys %in% cn]
87103

88-
# Use reclass as safeguard (in case class & metadata are dropped)
89-
reclass(res, attr(x, "metadata"))
104+
res
90105
}
91106

92107
#' `dplyr` verbs

tests/testthat/test-methods-epi_df.R

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@ toy_epi_df <- tibble::tibble(
77
length.out = 5
88
), times = 2),
99
geo_value = rep(c("ca", "hi"), each = 5),
10-
indicator_var = as.factor(rep(1:2, times = 5)),
11-
) %>% as_epi_df(additional_metadata = list(other_keys = "indicator_var"))
10+
indic_var1 = as.factor(rep(1:2, times = 5)),
11+
indic_var2 = as.factor(rep(letters[1:5], times = 2))
12+
) %>% as_epi_df(additional_metadata =
13+
list(other_keys = c("indic_var1", "indic_var2")))
1214

1315
att_toy = attr(toy_epi_df, "metadata")
1416

15-
test_that("head and tail do not drop the epi_df class", {
17+
test_that("Head and tail do not drop the epi_df class", {
1618
att_head = attr(head(toy_epi_df), "metadata")
1719
att_tail = attr(tail(toy_epi_df), "metadata")
1820

@@ -29,35 +31,43 @@ test_that("head and tail do not drop the epi_df class", {
2931
})
3032

3133

32-
test_that("subsetting drops or does not drop the epi_df class appropriately", {
34+
test_that("Subsetting drops & does not drop the epi_df class appropriately", {
3335

3436
# Row subset - should be epi_df
3537
row_subset = toy_epi_df[1:2, ]
3638
att_row_subset = attr(row_subset, "metadata")
3739

3840
expect_true(is_epi_df(row_subset))
3941
expect_equal(nrow(row_subset), 2L)
40-
expect_equal(ncol(row_subset), 5L)
42+
expect_equal(ncol(row_subset), 6L)
4143
expect_identical(att_row_subset$geo_type, att_toy$geo_type)
4244
expect_identical(att_row_subset$time_type, att_toy$time_type)
4345
expect_identical(att_row_subset$as_of, att_toy$as_of)
4446
expect_identical(att_row_subset$other_keys, att_toy$other_keys)
4547

46-
# Col subset - shouldn't be an epi_df
47-
col_subset = toy_epi_df[, 2:3]
48-
49-
expect_false(is_epi_df(col_subset))
50-
expect_true(tibble::is_tibble(col_subset))
51-
expect_equal(nrow(col_subset), 10L)
52-
expect_equal(ncol(col_subset), 2L)
53-
5448
# Row and col single value - shouldn't be an epi_df
5549
row_col_subset1 = toy_epi_df[1,2]
5650
expect_false(is_epi_df(row_col_subset1))
5751
expect_true(tibble::is_tibble(row_col_subset1))
5852
expect_equal(nrow(row_col_subset1), 1L)
5953
expect_equal(ncol(row_col_subset1), 1L)
6054

55+
# Col subset with no time_value - shouldn't be an epi_df
56+
col_subset1 = toy_epi_df[, c(1,3)]
57+
58+
expect_false(is_epi_df(col_subset1))
59+
expect_true(tibble::is_tibble(col_subset1))
60+
expect_equal(nrow(col_subset1), 10L)
61+
expect_equal(ncol(col_subset1), 2L)
62+
63+
# Col subset with no geo_value - shouldn't be an epi_df
64+
col_subset2 = toy_epi_df[, 2:3]
65+
66+
expect_false(is_epi_df(col_subset2))
67+
expect_true(tibble::is_tibble(col_subset2))
68+
expect_equal(nrow(col_subset2), 10L)
69+
expect_equal(ncol(col_subset2), 2L)
70+
6171
# Row and col subset that contains geo_value and time_value - should be epi_df
6272
row_col_subset2 = toy_epi_df[2:3,1:3]
6373
att_row_col_subset2 = attr(row_col_subset2, "metadata")
@@ -68,6 +78,41 @@ test_that("subsetting drops or does not drop the epi_df class appropriately", {
6878
expect_identical(att_row_col_subset2$geo_type, att_toy$geo_type)
6979
expect_identical(att_row_col_subset2$time_type, att_toy$time_type)
7080
expect_identical(att_row_col_subset2$as_of, att_toy$as_of)
71-
expect_identical(att_row_col_subset2$other_keys, att_toy$other_keys)
81+
expect_identical(att_row_col_subset2$other_keys, character(0))
82+
})
83+
84+
test_that("When duplicate cols in subset should abort", {
85+
expect_error(toy_epi_df[, c(2,2:3,4,4,4)],
86+
"Column name(s) time_value, y must not be duplicated.", fixed = T)
87+
expect_error(toy_epi_df[1:4, c(1,2:4,1)],
88+
"Column name(s) geo_value must not be duplicated.", fixed = T)
89+
})
90+
91+
test_that("Correct metadata when subset includes some of other_keys", {
92+
# Only include other_var of indic_var1
93+
only_indic_var1 = toy_epi_df[, 1:5]
94+
att_only_indic_var1 = attr(only_indic_var1, "metadata")
95+
96+
expect_true(is_epi_df(only_indic_var1))
97+
expect_equal(nrow(only_indic_var1), 10L)
98+
expect_equal(ncol(only_indic_var1), 5L)
99+
expect_identical(att_only_indic_var1$geo_type, att_toy$geo_type)
100+
expect_identical(att_only_indic_var1$time_type, att_toy$time_type)
101+
expect_identical(att_only_indic_var1$as_of, att_toy$as_of)
102+
expect_identical(att_only_indic_var1$other_keys, att_toy$other_keys[-2])
103+
104+
# Only include other_var of indic_var2
105+
only_indic_var2 = toy_epi_df[, c(1:4,6)]
106+
att_only_indic_var2 = attr(only_indic_var2, "metadata")
72107

73-
})
108+
expect_true(is_epi_df(only_indic_var2))
109+
expect_equal(nrow(only_indic_var2), 10L)
110+
expect_equal(ncol(only_indic_var2), 5L)
111+
expect_identical(att_only_indic_var2$geo_type, att_toy$geo_type)
112+
expect_identical(att_only_indic_var2$time_type, att_toy$time_type)
113+
expect_identical(att_only_indic_var2$as_of, att_toy$as_of)
114+
expect_identical(att_only_indic_var2$other_keys, att_toy$other_keys[-1])
115+
116+
# Including both original other_keys was already tested above
117+
})
118+

0 commit comments

Comments
 (0)