-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathas_epi_df.Rd
148 lines (119 loc) · 5.58 KB
/
as_epi_df.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/epi_df.R
\name{as_epi_df}
\alias{as_epi_df}
\alias{as_epi_df.epi_df}
\alias{as_epi_df.tbl_df}
\alias{as_epi_df.data.frame}
\alias{as_epi_df.tbl_ts}
\title{Convert to \code{epi_df} format}
\usage{
as_epi_df(x, ...)
\method{as_epi_df}{epi_df}(x, ...)
\method{as_epi_df}{tbl_df}(x, geo_type, time_type, as_of, additional_metadata = list(), ...)
\method{as_epi_df}{data.frame}(x, geo_type, time_type, as_of, additional_metadata = list(), ...)
\method{as_epi_df}{tbl_ts}(x, geo_type, time_type, as_of, additional_metadata = list(), ...)
}
\arguments{
\item{x}{A data.frame, \link[tibble:tibble]{tibble::tibble}, or \link[tsibble:tsibble]{tsibble::tsibble} to be converted}
\item{...}{Additional arguments passed to methods.}
\item{geo_type}{Type for the geo values. If missing, then the function will
attempt to infer it from the geo values present; if this fails, then it
will be set to "custom".}
\item{time_type}{Type for the time values. If missing, then the function will
attempt to infer it from the time values present; if this fails, then it
will be set to "custom".}
\item{as_of}{Time value representing the time at which the given data were
available. For example, if \code{as_of} is January 31, 2022, then the \code{epi_df}
object that is created would represent the most up-to-date version of the
data available as of January 31, 2022. If the \code{as_of} argument is missing,
then the current day-time will be used.}
\item{additional_metadata}{List of additional metadata to attach to the
\code{epi_df} object. The metadata will have \code{geo_type}, \code{time_type}, and
\code{as_of} fields; named entries from the passed list will be included as
well. If your tibble has additional keys, be sure to specify them as a
character vector in the \code{other_keys} component of \code{additional_metadata}.}
}
\value{
An \code{epi_df} object.
}
\description{
Converts a data frame or tibble into an \code{epi_df} object. See the \href{https://cmu-delphi.github.io/epiprocess/articles/epiprocess.html}{getting started guide} for
examples.
}
\section{Methods (by class)}{
\itemize{
\item \code{as_epi_df(epi_df)}: Simply returns the \code{epi_df} object unchanged.
\item \code{as_epi_df(tbl_df)}: The input tibble \code{x} must contain the columns
\code{geo_value} and \code{time_value}, or column names that uniquely map onto these
(e.g. \code{date} or \code{province}). Alternatively, you can specify the conversion
explicitly (\code{time_value = someWeirdColumnName}). All other columns not
specified as \code{other_keys} will be preserved as is, and treated as measured
variables.
If \code{as_of} is missing, then the function will try to guess it from an
\code{as_of}, \code{issue}, or \code{version} column of \code{x} (if any of these are present),
or from as an \code{as_of} field in its metadata (stored in its attributes); if
this fails, then the current day-time will be used.
\item \code{as_epi_df(data.frame)}: Works analogously to \code{as_epi_df.tbl_df()}.
\item \code{as_epi_df(tbl_ts)}: Works analogously to \code{as_epi_df.tbl_df()}, except that
the \code{tbl_ts} class is dropped, and any key variables (other than
"geo_value") are added to the metadata of the returned object, under the
\code{other_keys} field.
}}
\examples{
# Convert a `tsibble` that has county code as an extra key
# Notice that county code should be a character string to preserve any leading zeroes
ex1_input <- tibble::tibble(
geo_value = rep(c("ca", "fl", "pa"), each = 3),
county_code = c(
"06059", "06061", "06067",
"12111", "12113", "12117",
"42101", "42103", "42105"
),
time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"),
by = "day"
), length.out = length(geo_value)),
value = 1:length(geo_value) + 0.01 * rnorm(length(geo_value))
) \%>\%
tsibble::as_tsibble(index = time_value, key = c(geo_value, county_code))
# The `other_keys` metadata (`"county_code"` in this case) is automatically
# inferred from the `tsibble`'s `key`:
ex1 <- as_epi_df(x = ex1_input, geo_type = "state", time_type = "day", as_of = "2020-06-03")
attr(ex1, "metadata")[["other_keys"]]
# Dealing with misspecified column names:
# Geographical and temporal information must be provided in columns named
# `geo_value` and `time_value`; if we start from a data frame with a
# different format, it must be converted to use `geo_value` and `time_value`
# before calling `as_epi_df`.
ex2_input <- tibble::tibble(
state = rep(c("ca", "fl", "pa"), each = 3), # misnamed
pol = rep(c("blue", "swing", "swing"), each = 3), # extra key
reported_date = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"),
by = "day"
), length.out = length(state)), # misnamed
value = 1:length(state) + 0.01 * rnorm(length(state))
)
print(ex2_input)
ex2 <- ex2_input \%>\%
dplyr::rename(geo_value = state, time_value = reported_date) \%>\%
as_epi_df(
geo_type = "state", as_of = "2020-06-03",
additional_metadata = list(other_keys = "pol")
)
attr(ex2, "metadata")
# Adding additional keys to an `epi_df` object
ex3_input <- jhu_csse_county_level_subset \%>\%
dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") \%>\%
dplyr::slice_tail(n = 6)
ex3 <- ex3_input \%>\%
tsibble::as_tsibble() \%>\% # needed to add the additional metadata
# add 2 extra keys
dplyr::mutate(
state = rep("MA", 6),
pol = rep(c("blue", "swing", "swing"), each = 2)
) \%>\%
# the 2 extra keys we added have to be specified in the other_keys
# component of additional_metadata.
as_epi_df(additional_metadata = list(other_keys = c("state", "pol")))
attr(ex3, "metadata")
}