88from epiweeks import Week
99
1010
11- def date_generator (startdate : date , enddate : date ) -> Iterable [date ]:
11+ def date_generator (startdate : date , enddate : date , time_type : str ) -> Iterable [date ]:
1212 """
1313 Take start date and end date and generates date string.
1414
1515 Parameters
1616 ----------
1717 startdate: date
1818 enddate: date
19+ time_type: str
1920
2021 Returns
2122 -------
2223 generator of str
2324 """
24- while startdate <= enddate :
25- yield startdate .strftime ("%Y-%m-%d" )
26- startdate = startdate + timedelta (days = 1 )
25+ if time_type .lower () == "day" :
26+ while startdate <= enddate :
27+ yield startdate .strftime ("%Y-%m-%d" )
28+ startdate = startdate + timedelta (days = 1 )
29+ elif time_type .lower () == "week" :
30+ while startdate <= enddate :
31+ epiweek = Week .fromdate (startdate )
32+ yield epiweek
33+ startdate = startdate + timedelta (days = 7 )
2734
2835
2936def _parse_datetimes (date_int : int , time_type : str , date_format : str = "%Y%m%d" ) -> Union [pd .Timestamp , None ]:
@@ -34,8 +41,11 @@ def _parse_datetimes(date_int: int, time_type: str, date_format: str = "%Y%m%d")
3441
3542 Epiweeks use the CDC format.
3643
37- :param date_int: Int representation of date.
38- :param date_format: String of the date format to parse.
44+ date_int: Int representation of date.
45+ time_type: The temporal resolution to request this data. Most signals
46+ are available at the "day" resolution (the default); some are only
47+ available at the "week" resolution, representing an MMWR week ("epiweek").
48+ date_format: String of the date format to parse.
3949 :returns: Timestamp.
4050 """
4151 date_str = str (date_int )
@@ -55,8 +65,7 @@ def metadata() -> Union[pd.DataFrame, None]:
5565 -------
5666 pd.DataFrame of covidcast metadata.
5767 """
58- # pylint: disable=W0212
59- response = Epidata ._request ("covidcast_meta" )
68+ response = Epidata .covidcast_meta ()
6069
6170 if response ["result" ] != 1 :
6271 # Something failed in the API and we did not get real metadata
@@ -80,145 +89,43 @@ def signal(
8089 lag : int = None ,
8190 time_type : str = "day" ,
8291) -> Union [pd .DataFrame , None ]:
83- """Download a Pandas data frame for one signal.
84-
85- Obtains data for selected date ranges for all geographic regions of the
86- United States. Available data sources and signals are documented in the
87- `COVIDcast signal documentation
88- <https://cmu-delphi.github.io/delphi-epidata/api/covidcast_signals.html>`_.
89- Most (but not all) data sources are available at the county level, but the
90- API can also return data aggregated to metropolitan statistical areas,
91- hospital referral regions, or states, as desired, by using the ``geo_type``
92- argument.
93-
94- The COVIDcast API tracks updates and changes to its underlying data, and
95- records the first date each observation became available. For example, a
96- data source may report its estimate for a specific state on June 3rd on June
97- 5th, once records become available. This data is considered "issued" on June
98- 5th. Later, the data source may update its estimate for June 3rd based on
99- revised data, creating a new issue on June 8th. By default, ``signal()``
100- returns the most recent issue available for every observation. The
101- ``as_of``, ``issues``, and ``lag`` parameters allow the user to select
102- specific issues instead, or to see all updates to observations. These
103- options are mutually exclusive; if you specify more than one, ``as_of`` will
104- take priority over ``issues``, which will take priority over ``lag``.
105-
106- Note that the API only tracks the initial value of an estimate and *changes*
107- to that value. If a value was first issued on June 5th and never updated,
108- asking for data issued on June 6th (using ``issues`` or ``lag``) would *not*
109- return that value, though asking for data ``as_of`` June 6th would.
110-
111- Note also that the API enforces a maximum result row limit; results beyond
112- the maximum limit are truncated. This limit is sufficient to fetch
113- observations in all counties in the United States on one day. This client
114- automatically splits queries for multiple days across multiple API calls.
115- However, if data for one day has been issued many times, using the
116- ``issues`` argument may return more results than the query limit. A warning
117- will be issued in this case. To see all results, split your query across
118- multiple calls with different ``issues`` arguments.
119-
120- See the `COVIDcast API documentation
121- <https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html>`_ for more
122- information on available geography types, signals, and data formats, and
123- further discussion of issue dates and data versioning.
124-
125- :param data_source: String identifying the data source to query, such as
92+ """
93+ Makes covidcast signal api call.
94+
95+ data_source: String identifying the data source to query, such as
12696 ``"fb-survey"``.
127- :param signal: String identifying the signal from that source to query,
97+ signal: String identifying the signal from that source to query,
12898 such as ``"smoothed_cli"``.
129- :param start_day: Query data beginning on this date. Provided as a
99+ start_day: Query data beginning on this date. Provided as a
130100 ``datetime.date`` object. If ``start_day`` is ``None``, defaults to the
131101 first day data is available for this signal. If ``time_type == "week"``, then
132102 this is rounded to the epiweek containing the day (i.e. the previous Sunday).
133- :param end_day: Query data up to this date, inclusive. Provided as a
103+ end_day: Query data up to this date, inclusive. Provided as a
134104 ``datetime.date`` object. If ``end_day`` is ``None``, defaults to the most
135105 recent day data is available for this signal. If ``time_type == "week"``, then
136106 this is rounded to the epiweek containing the day (i.e. the previous Sunday).
137- :param geo_type: The geography type for which to request this data, such as
107+ geo_type: The geography type for which to request this data, such as
138108 ``"county"`` or ``"state"``. Available types are described in the
139109 COVIDcast signal documentation. Defaults to ``"county"``.
140- :param geo_values: The geographies to fetch data for. The default, ``"*"``,
110+ geo_values: The geographies to fetch data for. The default, ``"*"``,
141111 fetches all geographies. To fetch one geography, specify its ID as a
142112 string; multiple geographies can be provided as an iterable (list, tuple,
143113 ...) of strings.
144- :param as_of: Fetch only data that was available on or before this date,
114+ as_of: Fetch only data that was available on or before this date,
145115 provided as a ``datetime.date`` object. If ``None``, the default, return
146116 the most recent available data. If ``time_type == "week"``, then
147117 this is rounded to the epiweek containing the day (i.e. the previous Sunday).
148- :param issues: Fetch only data that was published or updated ("issued") on
149- these dates. Provided as either a single ``datetime.date`` object,
150- indicating a single date to fetch data issued on, or a tuple or list
151- specifying (start, end) dates. In this case, return all data issued in
152- this range. There may be multiple rows for each observation, indicating
153- several updates to its value. If ``None``, the default, return the most
154- recently issued data. If ``time_type == "week"``, then these are rounded to
155- the epiweek containing the day (i.e. the previous Sunday).
156- :param lag: Integer. If, for example, ``lag=3``, fetch only data that was
118+ lag: Integer. If, for example, ``lag=3``, fetch only data that was
157119 published or updated exactly 3 days after the date. For example, a row
158120 with ``time_value`` of June 3 will only be included in the results if its
159121 data was issued or updated on June 6. If ``None``, the default, return the
160122 most recently issued data regardless of its lag.
161- :param time_type: The temporal resolution to request this data. Most signals
123+ time_type: The temporal resolution to request this data. Most signals
162124 are available at the "day" resolution (the default); some are only
163125 available at the "week" resolution, representing an MMWR week ("epiweek").
164126 :returns: A Pandas data frame with matching data, or ``None`` if no data is
165127 returned. Each row is one observation on one day in one geographic location.
166128 Contains the following columns:
167-
168- ``geo_value``
169- Identifies the location, such as a state name or county FIPS code. The
170- geographic coding used by COVIDcast is described in the `API
171- documentation here
172- <https://cmu-delphi.github.io/delphi-epidata/api/covidcast_geography.html>`_.
173-
174- ``signal``
175- Name of the signal, same as the value of the ``signal`` input argument. Used for
176- downstream functions to recognize where this signal is from.
177-
178- ``time_value``
179- Contains a `pandas Timestamp object
180- <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html>`_
181- identifying the date this estimate is for. For data with ``time_type = "week"``, this
182- is the first day of the corresponding epiweek.
183-
184- ``issue``
185- Contains a `pandas Timestamp object
186- <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html>`_
187- identifying the date this estimate was issued. For example, an estimate
188- with a ``time_value`` of June 3 might have been issued on June 5, after
189- the data for June 3rd was collected and ingested into the API.
190-
191- ``lag``
192- Integer giving the difference between ``issue`` and ``time_value``,
193- in days.
194-
195- ``value``
196- The signal quantity requested. For example, in a query for the
197- ``confirmed_cumulative_num`` signal from the ``usa-facts`` source,
198- this would be the cumulative number of confirmed cases in the area, as
199- of the ``time_value``.
200-
201- ``stderr``
202- The value's standard error, if available.
203-
204- ``sample_size``
205- Indicates the sample size available in that geography on that day;
206- sample size may not be available for all signals, due to privacy or
207- other constraints.
208-
209- ``geo_type``
210- Geography type for the signal, same as the value of the ``geo_type`` input argument.
211- Used for downstream functions to parse ``geo_value`` correctly
212-
213- ``data_source``
214- Name of the signal source, same as the value of the ``data_source`` input argument. Used for
215- downstream functions to recognize where this signal is from.
216-
217- Consult the `signal documentation
218- <https://cmu-delphi.github.io/delphi-epidata/api/covidcast_signals.html>`_
219- for more details on how values and standard errors are calculated for
220- specific signals.
221-
222129 """
223130 if start_day > end_day :
224131 raise ValueError (
@@ -239,7 +146,7 @@ def signal(
239146 )
240147 if response ["result" ] != 1 :
241148 # Something failed in the API and we did not get real metadata
242- raise RuntimeError ("Error when fetching metadata from the API" , response ["message" ])
149+ raise RuntimeError ("Error when fetching signal data from the API" , response ["message" ])
243150
244151 api_df = pd .DataFrame .from_dict (response ["epidata" ])
245152 api_df ["issue" ] = pd .to_datetime (api_df ["issue" ], format = "%Y%m%d" )
0 commit comments