Skip to content

Commit eb496c8

Browse files
authored
Merge pull request #277 from cmu-delphi/lcb/grouped_epi_archive
Add `group_by` for `epi_archive` + other interface improvements
2 parents 8b00b3b + 87f3989 commit eb496c8

35 files changed

+2673
-753
lines changed

DESCRIPTION

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Authors@R: c(
66
person("Jacob", "Bien", role = "ctb"),
77
person("Logan", "Brooks", role = "aut"),
88
person("Rafael", "Catoia", role = "ctb"),
9+
person("Nat", "DeFries", role = "ctb"),
910
person("Daniel", "McDonald", role = "aut"),
1011
person("Rachel", "Lobay", role = "ctb"),
1112
person("Ken", "Mawer", role = "ctb"),
@@ -22,11 +23,12 @@ Description: This package introduces a common data structure for epidemiological
2223
License: MIT + file LICENSE
2324
Imports:
2425
data.table,
25-
dplyr,
26+
dplyr (>= 1.0.0),
2627
fabletools,
2728
feasts,
2829
generics,
2930
genlasso,
31+
lifecycle (>= 1.0.1),
3032
lubridate,
3133
magrittr,
3234
purrr,
@@ -35,7 +37,7 @@ Imports:
3537
slider,
3638
tibble,
3739
tidyr,
38-
tidyselect,
40+
tidyselect (>= 1.2.0),
3941
tsibble,
4042
utils,
4143
vctrs
@@ -63,3 +65,18 @@ RoxygenNote: 7.2.1
6365
Depends:
6466
R (>= 2.10)
6567
URL: https://cmu-delphi.github.io/epiprocess/
68+
Collate:
69+
'archive.R'
70+
'correlation.R'
71+
'data.R'
72+
'epi_df.R'
73+
'epiprocess.R'
74+
'methods-epi_archive.R'
75+
'grouped_epi_archive.R'
76+
'growth_rate.R'
77+
'methods-epi_df.R'
78+
'outliers.R'
79+
'reexports.R'
80+
'slide.R'
81+
'utils.R'
82+
'utils_pipe.R'

NAMESPACE

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,29 @@
11
# Generated by roxygen2: do not edit by hand
22

33
S3method("[",epi_df)
4-
S3method(arrange,epi_df)
4+
S3method("names<-",epi_df)
55
S3method(as_epi_df,data.frame)
66
S3method(as_epi_df,epi_df)
77
S3method(as_epi_df,tbl_df)
88
S3method(as_epi_df,tbl_ts)
99
S3method(as_tsibble,epi_df)
10-
S3method(filter,epi_df)
10+
S3method(dplyr_col_modify,col_modify_recorder_df)
11+
S3method(dplyr_col_modify,epi_df)
12+
S3method(dplyr_reconstruct,epi_df)
13+
S3method(dplyr_row_slice,epi_df)
14+
S3method(epix_truncate_versions_after,epi_archive)
15+
S3method(epix_truncate_versions_after,grouped_epi_archive)
16+
S3method(group_by,epi_archive)
1117
S3method(group_by,epi_df)
12-
S3method(group_modify,epi_df)
13-
S3method(mutate,epi_df)
18+
S3method(group_by,grouped_epi_archive)
19+
S3method(group_by_drop_default,grouped_epi_archive)
20+
S3method(groups,grouped_epi_archive)
1421
S3method(next_after,Date)
1522
S3method(next_after,integer)
1623
S3method(print,epi_df)
17-
S3method(relocate,epi_df)
18-
S3method(rename,epi_df)
19-
S3method(slice,epi_df)
2024
S3method(summary,epi_df)
2125
S3method(ungroup,epi_df)
26+
S3method(ungroup,grouped_epi_archive)
2227
S3method(unnest,epi_df)
2328
export("%>%")
2429
export(archive_cases_dv_subset)
@@ -35,12 +40,14 @@ export(epi_slide)
3540
export(epix_as_of)
3641
export(epix_merge)
3742
export(epix_slide)
43+
export(epix_truncate_versions_after)
3844
export(filter)
3945
export(group_by)
4046
export(group_modify)
4147
export(growth_rate)
4248
export(is_epi_archive)
4349
export(is_epi_df)
50+
export(is_grouped_epi_archive)
4451
export(max_version_with_row_in)
4552
export(mutate)
4653
export(new_epi_df)
@@ -60,9 +67,14 @@ importFrom(data.table,key)
6067
importFrom(data.table,set)
6168
importFrom(data.table,setkeyv)
6269
importFrom(dplyr,arrange)
70+
importFrom(dplyr,dplyr_col_modify)
71+
importFrom(dplyr,dplyr_reconstruct)
72+
importFrom(dplyr,dplyr_row_slice)
6373
importFrom(dplyr,filter)
6474
importFrom(dplyr,group_by)
75+
importFrom(dplyr,group_by_drop_default)
6576
importFrom(dplyr,group_modify)
77+
importFrom(dplyr,groups)
6678
importFrom(dplyr,mutate)
6779
importFrom(dplyr,relocate)
6880
importFrom(dplyr,rename)

NEWS.md

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,83 @@ Note that `epiprocess` uses the [Semantic Versioning
44
("semver")](https://semver.org/) scheme for all release versions, but not for
55
development versions. A ".9999" suffix indicates a development version.
66

7+
## Breaking changes:
8+
9+
* Changes to both `epi_slide` and `epix_slide`:
10+
* The `n`, `align`, and `before` arguments have been replaced by new `before`
11+
and `after` arguments. To migrate to the new version, replace these
12+
arguments in every `epi_slide` and `epix_slide` call. If you were only using
13+
the `n` argument, then this means replacing `n = <n value>` with `before =
14+
<n value> - 1`.
15+
* `epi_slide`'s time windows now extend `before` time steps before and
16+
`after` time steps after the corresponding `ref_time_values`. See
17+
`?epi_slide` for details on matching old alignments.
18+
* `epix_slide`'s time windows now extend `before` time steps before the
19+
corresponding `ref_time_values` all the way through the latest data
20+
available at the corresponding `ref_time_values`.
21+
* Slide functions now keep any grouping of `x` in their results, like
22+
`mutate` and `group_modify`.
23+
* To obtain the old behavior, `dplyr::ungroup` the slide results immediately.
24+
* Additional`epix_slide` changes:
25+
* `epix_slide`'s `group_by` argument has been replaced by `dplyr::group_by` and
26+
`dplyr::ungroup` S3 methods. The `group_by` method uses "data masking" (also
27+
referred to as "tidy evaluation") rather than "tidy selection".
28+
* Old syntax:
29+
* `x %>% epix_slide(<other args>, group_by=c(col1, col2))`
30+
* `x %>% epix_slide(<other args>, group_by=all_of(colname_vector))`
31+
* New syntax:
32+
* `x %>% group_by(col1, col2) %>% epix_slide(<other args>)`
33+
* `x %>% group_by(across(all_of(colname_vector))) %>% epix_slide(<other args>)`
34+
* `epix_slide` no longer defaults to grouping by non-`time_value`, non-`version`
35+
key columns, instead considering all data to be in one big group.
36+
* To obtain the old behavior, precede each `epix_slide` call lacking a
37+
`group_by` argument with an appropriate `group_by` call.
38+
* `epix_slide` now guesses `ref_time_values` to be a regularly spaced sequence
39+
covering all the `DT$version` values and the `version_end`, rather than the
40+
distinct `DT$time_value`s. To obtain the old behavior, pass in
41+
`ref_time_values = unique(<ungrouped archive>$DT$time_value)`.
42+
* `epi_archive`'s `clobberable_versions_start`'s default is now `NA`, so there
43+
will be no warnings by default about potential nonreproducibility. To obtain
44+
the old behavior, pass in `clobberable_versions_start =
45+
max_version_with_row_in(x)`.
46+
47+
## Potentially-breaking changes:
48+
49+
* Fixed `[` on grouped `epi_df`s to maintain the grouping if possible when
50+
dropping the `epi_df` class (e.g., when removing the `time_value` column).
51+
* Fixed `epi_df` operations to be more consistent about decaying into
52+
non-`epi_df`s when the result of the operation doesn't make sense as an
53+
`epi_df` (e.g., when removing the `time_value` column).
54+
* Changed `bind_rows` on grouped `epi_df`s to not drop the `epi_df` class. Like
55+
with ungrouped `epi_df`s, the metadata of the result is still simply taken
56+
from the first result, and may be inappropriate
57+
([#242](https://github.com/cmu-delphi/epiprocess/issues/242)).
58+
* `epi_slide` and `epix_slide` now raise an error rather than silently filtering
59+
out `ref_time_values` that don't meet their expectations.
60+
61+
## New features:
62+
63+
* `epix_slide`, `<epi_archive>$slide` have a new parameter `all_versions`. With
64+
`all_versions=TRUE`, `epix_slide` will pass a filtered `epi_archive` to each
65+
computation rather than an `epi_df` snapshot. This enables, e.g., performing
66+
pseudoprospective forecasts with a revision-aware forecaster using nested
67+
`epix_slide` operations.
68+
69+
## Improvements:
70+
71+
* Added `dplyr::group_by` and `dplyr::ungroup` S3 methods for `epi_archive`
72+
objects, plus corresponding `$group_by` and `$ungroup` R6 methods. The
73+
`group_by` implementation supports the `.add` and `.drop` arguments, and
74+
`ungroup` supports partial ungrouping with `...`.
75+
* `as_epi_archive`, `epi_archive$new` now perform checks for the key uniqueness
76+
requirement (part of
77+
[#154](https://github.com/cmu-delphi/epiprocess/issues/154)).
78+
779
## Cleanup:
880

981
* Added a `NEWS.md` file to track changes to the package.
82+
* Implemented `?dplyr::dplyr_extending` for `epi_df`s
83+
([#223](https://github.com/cmu-delphi/epiprocess/issues/223)).
1084

1185
# epiprocess 0.5.0:
1286

0 commit comments

Comments
 (0)