8
8
from epiweeks import Week
9
9
10
10
11
- def date_generator (startdate : date , enddate : date ) -> Iterable [date ]:
11
+ def date_generator (startdate : date , enddate : date , time_type : str ) -> Iterable [date ]:
12
12
"""
13
13
Take start date and end date and generates date string.
14
14
15
15
Parameters
16
16
----------
17
17
startdate: date
18
18
enddate: date
19
+ time_type: str
19
20
20
21
Returns
21
22
-------
22
23
generator of str
23
24
"""
24
- while startdate <= enddate :
25
- yield startdate .strftime ("%Y-%m-%d" )
26
- startdate = startdate + timedelta (days = 1 )
25
+ if time_type .lower () == "day" :
26
+ while startdate <= enddate :
27
+ yield startdate .strftime ("%Y-%m-%d" )
28
+ startdate = startdate + timedelta (days = 1 )
29
+ elif time_type .lower () == "week" :
30
+ while startdate <= enddate :
31
+ epiweek = Week .fromdate (startdate )
32
+ yield epiweek
33
+ startdate = startdate + timedelta (days = 7 )
27
34
28
35
29
36
def _parse_datetimes (date_int : int , time_type : str , date_format : str = "%Y%m%d" ) -> Union [pd .Timestamp , None ]:
@@ -34,8 +41,11 @@ def _parse_datetimes(date_int: int, time_type: str, date_format: str = "%Y%m%d")
34
41
35
42
Epiweeks use the CDC format.
36
43
37
- :param date_int: Int representation of date.
38
- :param date_format: String of the date format to parse.
44
+ date_int: Int representation of date.
45
+ time_type: The temporal resolution to request this data. Most signals
46
+ are available at the "day" resolution (the default); some are only
47
+ available at the "week" resolution, representing an MMWR week ("epiweek").
48
+ date_format: String of the date format to parse.
39
49
:returns: Timestamp.
40
50
"""
41
51
date_str = str (date_int )
@@ -55,8 +65,7 @@ def metadata() -> Union[pd.DataFrame, None]:
55
65
-------
56
66
pd.DataFrame of covidcast metadata.
57
67
"""
58
- # pylint: disable=W0212
59
- response = Epidata ._request ("covidcast_meta" )
68
+ response = Epidata .covidcast_meta ()
60
69
61
70
if response ["result" ] != 1 :
62
71
# Something failed in the API and we did not get real metadata
@@ -80,145 +89,43 @@ def signal(
80
89
lag : int = None ,
81
90
time_type : str = "day" ,
82
91
) -> Union [pd .DataFrame , None ]:
83
- """Download a Pandas data frame for one signal.
84
-
85
- Obtains data for selected date ranges for all geographic regions of the
86
- United States. Available data sources and signals are documented in the
87
- `COVIDcast signal documentation
88
- <https://cmu-delphi.github.io/delphi-epidata/api/covidcast_signals.html>`_.
89
- Most (but not all) data sources are available at the county level, but the
90
- API can also return data aggregated to metropolitan statistical areas,
91
- hospital referral regions, or states, as desired, by using the ``geo_type``
92
- argument.
93
-
94
- The COVIDcast API tracks updates and changes to its underlying data, and
95
- records the first date each observation became available. For example, a
96
- data source may report its estimate for a specific state on June 3rd on June
97
- 5th, once records become available. This data is considered "issued" on June
98
- 5th. Later, the data source may update its estimate for June 3rd based on
99
- revised data, creating a new issue on June 8th. By default, ``signal()``
100
- returns the most recent issue available for every observation. The
101
- ``as_of``, ``issues``, and ``lag`` parameters allow the user to select
102
- specific issues instead, or to see all updates to observations. These
103
- options are mutually exclusive; if you specify more than one, ``as_of`` will
104
- take priority over ``issues``, which will take priority over ``lag``.
105
-
106
- Note that the API only tracks the initial value of an estimate and *changes*
107
- to that value. If a value was first issued on June 5th and never updated,
108
- asking for data issued on June 6th (using ``issues`` or ``lag``) would *not*
109
- return that value, though asking for data ``as_of`` June 6th would.
110
-
111
- Note also that the API enforces a maximum result row limit; results beyond
112
- the maximum limit are truncated. This limit is sufficient to fetch
113
- observations in all counties in the United States on one day. This client
114
- automatically splits queries for multiple days across multiple API calls.
115
- However, if data for one day has been issued many times, using the
116
- ``issues`` argument may return more results than the query limit. A warning
117
- will be issued in this case. To see all results, split your query across
118
- multiple calls with different ``issues`` arguments.
119
-
120
- See the `COVIDcast API documentation
121
- <https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html>`_ for more
122
- information on available geography types, signals, and data formats, and
123
- further discussion of issue dates and data versioning.
124
-
125
- :param data_source: String identifying the data source to query, such as
92
+ """
93
+ Makes covidcast signal api call.
94
+
95
+ data_source: String identifying the data source to query, such as
126
96
``"fb-survey"``.
127
- :param signal: String identifying the signal from that source to query,
97
+ signal: String identifying the signal from that source to query,
128
98
such as ``"smoothed_cli"``.
129
- :param start_day: Query data beginning on this date. Provided as a
99
+ start_day: Query data beginning on this date. Provided as a
130
100
``datetime.date`` object. If ``start_day`` is ``None``, defaults to the
131
101
first day data is available for this signal. If ``time_type == "week"``, then
132
102
this is rounded to the epiweek containing the day (i.e. the previous Sunday).
133
- :param end_day: Query data up to this date, inclusive. Provided as a
103
+ end_day: Query data up to this date, inclusive. Provided as a
134
104
``datetime.date`` object. If ``end_day`` is ``None``, defaults to the most
135
105
recent day data is available for this signal. If ``time_type == "week"``, then
136
106
this is rounded to the epiweek containing the day (i.e. the previous Sunday).
137
- :param geo_type: The geography type for which to request this data, such as
107
+ geo_type: The geography type for which to request this data, such as
138
108
``"county"`` or ``"state"``. Available types are described in the
139
109
COVIDcast signal documentation. Defaults to ``"county"``.
140
- :param geo_values: The geographies to fetch data for. The default, ``"*"``,
110
+ geo_values: The geographies to fetch data for. The default, ``"*"``,
141
111
fetches all geographies. To fetch one geography, specify its ID as a
142
112
string; multiple geographies can be provided as an iterable (list, tuple,
143
113
...) of strings.
144
- :param as_of: Fetch only data that was available on or before this date,
114
+ as_of: Fetch only data that was available on or before this date,
145
115
provided as a ``datetime.date`` object. If ``None``, the default, return
146
116
the most recent available data. If ``time_type == "week"``, then
147
117
this is rounded to the epiweek containing the day (i.e. the previous Sunday).
148
- :param issues: Fetch only data that was published or updated ("issued") on
149
- these dates. Provided as either a single ``datetime.date`` object,
150
- indicating a single date to fetch data issued on, or a tuple or list
151
- specifying (start, end) dates. In this case, return all data issued in
152
- this range. There may be multiple rows for each observation, indicating
153
- several updates to its value. If ``None``, the default, return the most
154
- recently issued data. If ``time_type == "week"``, then these are rounded to
155
- the epiweek containing the day (i.e. the previous Sunday).
156
- :param lag: Integer. If, for example, ``lag=3``, fetch only data that was
118
+ lag: Integer. If, for example, ``lag=3``, fetch only data that was
157
119
published or updated exactly 3 days after the date. For example, a row
158
120
with ``time_value`` of June 3 will only be included in the results if its
159
121
data was issued or updated on June 6. If ``None``, the default, return the
160
122
most recently issued data regardless of its lag.
161
- :param time_type: The temporal resolution to request this data. Most signals
123
+ time_type: The temporal resolution to request this data. Most signals
162
124
are available at the "day" resolution (the default); some are only
163
125
available at the "week" resolution, representing an MMWR week ("epiweek").
164
126
:returns: A Pandas data frame with matching data, or ``None`` if no data is
165
127
returned. Each row is one observation on one day in one geographic location.
166
128
Contains the following columns:
167
-
168
- ``geo_value``
169
- Identifies the location, such as a state name or county FIPS code. The
170
- geographic coding used by COVIDcast is described in the `API
171
- documentation here
172
- <https://cmu-delphi.github.io/delphi-epidata/api/covidcast_geography.html>`_.
173
-
174
- ``signal``
175
- Name of the signal, same as the value of the ``signal`` input argument. Used for
176
- downstream functions to recognize where this signal is from.
177
-
178
- ``time_value``
179
- Contains a `pandas Timestamp object
180
- <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html>`_
181
- identifying the date this estimate is for. For data with ``time_type = "week"``, this
182
- is the first day of the corresponding epiweek.
183
-
184
- ``issue``
185
- Contains a `pandas Timestamp object
186
- <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html>`_
187
- identifying the date this estimate was issued. For example, an estimate
188
- with a ``time_value`` of June 3 might have been issued on June 5, after
189
- the data for June 3rd was collected and ingested into the API.
190
-
191
- ``lag``
192
- Integer giving the difference between ``issue`` and ``time_value``,
193
- in days.
194
-
195
- ``value``
196
- The signal quantity requested. For example, in a query for the
197
- ``confirmed_cumulative_num`` signal from the ``usa-facts`` source,
198
- this would be the cumulative number of confirmed cases in the area, as
199
- of the ``time_value``.
200
-
201
- ``stderr``
202
- The value's standard error, if available.
203
-
204
- ``sample_size``
205
- Indicates the sample size available in that geography on that day;
206
- sample size may not be available for all signals, due to privacy or
207
- other constraints.
208
-
209
- ``geo_type``
210
- Geography type for the signal, same as the value of the ``geo_type`` input argument.
211
- Used for downstream functions to parse ``geo_value`` correctly
212
-
213
- ``data_source``
214
- Name of the signal source, same as the value of the ``data_source`` input argument. Used for
215
- downstream functions to recognize where this signal is from.
216
-
217
- Consult the `signal documentation
218
- <https://cmu-delphi.github.io/delphi-epidata/api/covidcast_signals.html>`_
219
- for more details on how values and standard errors are calculated for
220
- specific signals.
221
-
222
129
"""
223
130
if start_day > end_day :
224
131
raise ValueError (
@@ -239,7 +146,7 @@ def signal(
239
146
)
240
147
if response ["result" ] != 1 :
241
148
# Something failed in the API and we did not get real metadata
242
- raise RuntimeError ("Error when fetching metadata from the API" , response ["message" ])
149
+ raise RuntimeError ("Error when fetching signal data from the API" , response ["message" ])
243
150
244
151
api_df = pd .DataFrame .from_dict (response ["epidata" ])
245
152
api_df ["issue" ] = pd .to_datetime (api_df ["issue" ], format = "%Y%m%d" )
0 commit comments