Skip to content

Commit a2a149f

Browse files
committed
implimentating suggested changes
1 parent 6e22db8 commit a2a149f

22 files changed

+36
-128
lines changed

_delphi_utils_python/delphi_utils/covidcast_wrapper.py

Lines changed: 30 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,29 @@
88
from epiweeks import Week
99

1010

11-
def date_generator(startdate: date, enddate: date) -> Iterable[date]:
11+
def date_generator(startdate: date, enddate: date, time_type: str) -> Iterable[date]:
1212
"""
1313
Take start date and end date and generates date string.
1414
1515
Parameters
1616
----------
1717
startdate: date
1818
enddate: date
19+
time_type: str
1920
2021
Returns
2122
-------
2223
generator of str
2324
"""
24-
while startdate <= enddate:
25-
yield startdate.strftime("%Y-%m-%d")
26-
startdate = startdate + timedelta(days=1)
25+
if time_type.lower() == "day":
26+
while startdate <= enddate:
27+
yield startdate.strftime("%Y-%m-%d")
28+
startdate = startdate + timedelta(days=1)
29+
elif time_type.lower() == "week":
30+
while startdate <= enddate:
31+
epiweek = Week.fromdate(startdate)
32+
yield epiweek
33+
startdate = startdate + timedelta(days=7)
2734

2835

2936
def _parse_datetimes(date_int: int, time_type: str, date_format: str = "%Y%m%d") -> Union[pd.Timestamp, None]:
@@ -34,8 +41,11 @@ def _parse_datetimes(date_int: int, time_type: str, date_format: str = "%Y%m%d")
3441
3542
Epiweeks use the CDC format.
3643
37-
:param date_int: Int representation of date.
38-
:param date_format: String of the date format to parse.
44+
date_int: Int representation of date.
45+
time_type: The temporal resolution to request this data. Most signals
46+
are available at the "day" resolution (the default); some are only
47+
available at the "week" resolution, representing an MMWR week ("epiweek").
48+
date_format: String of the date format to parse.
3949
:returns: Timestamp.
4050
"""
4151
date_str = str(date_int)
@@ -55,8 +65,7 @@ def metadata() -> Union[pd.DataFrame, None]:
5565
-------
5666
pd.DataFrame of covidcast metadata.
5767
"""
58-
# pylint: disable=W0212
59-
response = Epidata._request("covidcast_meta")
68+
response = Epidata.covidcast_meta()
6069

6170
if response["result"] != 1:
6271
# Something failed in the API and we did not get real metadata
@@ -80,145 +89,43 @@ def signal(
8089
lag: int = None,
8190
time_type: str = "day",
8291
) -> Union[pd.DataFrame, None]:
83-
"""Download a Pandas data frame for one signal.
84-
85-
Obtains data for selected date ranges for all geographic regions of the
86-
United States. Available data sources and signals are documented in the
87-
`COVIDcast signal documentation
88-
<https://cmu-delphi.github.io/delphi-epidata/api/covidcast_signals.html>`_.
89-
Most (but not all) data sources are available at the county level, but the
90-
API can also return data aggregated to metropolitan statistical areas,
91-
hospital referral regions, or states, as desired, by using the ``geo_type``
92-
argument.
93-
94-
The COVIDcast API tracks updates and changes to its underlying data, and
95-
records the first date each observation became available. For example, a
96-
data source may report its estimate for a specific state on June 3rd on June
97-
5th, once records become available. This data is considered "issued" on June
98-
5th. Later, the data source may update its estimate for June 3rd based on
99-
revised data, creating a new issue on June 8th. By default, ``signal()``
100-
returns the most recent issue available for every observation. The
101-
``as_of``, ``issues``, and ``lag`` parameters allow the user to select
102-
specific issues instead, or to see all updates to observations. These
103-
options are mutually exclusive; if you specify more than one, ``as_of`` will
104-
take priority over ``issues``, which will take priority over ``lag``.
105-
106-
Note that the API only tracks the initial value of an estimate and *changes*
107-
to that value. If a value was first issued on June 5th and never updated,
108-
asking for data issued on June 6th (using ``issues`` or ``lag``) would *not*
109-
return that value, though asking for data ``as_of`` June 6th would.
110-
111-
Note also that the API enforces a maximum result row limit; results beyond
112-
the maximum limit are truncated. This limit is sufficient to fetch
113-
observations in all counties in the United States on one day. This client
114-
automatically splits queries for multiple days across multiple API calls.
115-
However, if data for one day has been issued many times, using the
116-
``issues`` argument may return more results than the query limit. A warning
117-
will be issued in this case. To see all results, split your query across
118-
multiple calls with different ``issues`` arguments.
119-
120-
See the `COVIDcast API documentation
121-
<https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html>`_ for more
122-
information on available geography types, signals, and data formats, and
123-
further discussion of issue dates and data versioning.
124-
125-
:param data_source: String identifying the data source to query, such as
92+
"""
93+
Makes covidcast signal api call.
94+
95+
data_source: String identifying the data source to query, such as
12696
``"fb-survey"``.
127-
:param signal: String identifying the signal from that source to query,
97+
signal: String identifying the signal from that source to query,
12898
such as ``"smoothed_cli"``.
129-
:param start_day: Query data beginning on this date. Provided as a
99+
start_day: Query data beginning on this date. Provided as a
130100
``datetime.date`` object. If ``start_day`` is ``None``, defaults to the
131101
first day data is available for this signal. If ``time_type == "week"``, then
132102
this is rounded to the epiweek containing the day (i.e. the previous Sunday).
133-
:param end_day: Query data up to this date, inclusive. Provided as a
103+
end_day: Query data up to this date, inclusive. Provided as a
134104
``datetime.date`` object. If ``end_day`` is ``None``, defaults to the most
135105
recent day data is available for this signal. If ``time_type == "week"``, then
136106
this is rounded to the epiweek containing the day (i.e. the previous Sunday).
137-
:param geo_type: The geography type for which to request this data, such as
107+
geo_type: The geography type for which to request this data, such as
138108
``"county"`` or ``"state"``. Available types are described in the
139109
COVIDcast signal documentation. Defaults to ``"county"``.
140-
:param geo_values: The geographies to fetch data for. The default, ``"*"``,
110+
geo_values: The geographies to fetch data for. The default, ``"*"``,
141111
fetches all geographies. To fetch one geography, specify its ID as a
142112
string; multiple geographies can be provided as an iterable (list, tuple,
143113
...) of strings.
144-
:param as_of: Fetch only data that was available on or before this date,
114+
as_of: Fetch only data that was available on or before this date,
145115
provided as a ``datetime.date`` object. If ``None``, the default, return
146116
the most recent available data. If ``time_type == "week"``, then
147117
this is rounded to the epiweek containing the day (i.e. the previous Sunday).
148-
:param issues: Fetch only data that was published or updated ("issued") on
149-
these dates. Provided as either a single ``datetime.date`` object,
150-
indicating a single date to fetch data issued on, or a tuple or list
151-
specifying (start, end) dates. In this case, return all data issued in
152-
this range. There may be multiple rows for each observation, indicating
153-
several updates to its value. If ``None``, the default, return the most
154-
recently issued data. If ``time_type == "week"``, then these are rounded to
155-
the epiweek containing the day (i.e. the previous Sunday).
156-
:param lag: Integer. If, for example, ``lag=3``, fetch only data that was
118+
lag: Integer. If, for example, ``lag=3``, fetch only data that was
157119
published or updated exactly 3 days after the date. For example, a row
158120
with ``time_value`` of June 3 will only be included in the results if its
159121
data was issued or updated on June 6. If ``None``, the default, return the
160122
most recently issued data regardless of its lag.
161-
:param time_type: The temporal resolution to request this data. Most signals
123+
time_type: The temporal resolution to request this data. Most signals
162124
are available at the "day" resolution (the default); some are only
163125
available at the "week" resolution, representing an MMWR week ("epiweek").
164126
:returns: A Pandas data frame with matching data, or ``None`` if no data is
165127
returned. Each row is one observation on one day in one geographic location.
166128
Contains the following columns:
167-
168-
``geo_value``
169-
Identifies the location, such as a state name or county FIPS code. The
170-
geographic coding used by COVIDcast is described in the `API
171-
documentation here
172-
<https://cmu-delphi.github.io/delphi-epidata/api/covidcast_geography.html>`_.
173-
174-
``signal``
175-
Name of the signal, same as the value of the ``signal`` input argument. Used for
176-
downstream functions to recognize where this signal is from.
177-
178-
``time_value``
179-
Contains a `pandas Timestamp object
180-
<https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html>`_
181-
identifying the date this estimate is for. For data with ``time_type = "week"``, this
182-
is the first day of the corresponding epiweek.
183-
184-
``issue``
185-
Contains a `pandas Timestamp object
186-
<https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html>`_
187-
identifying the date this estimate was issued. For example, an estimate
188-
with a ``time_value`` of June 3 might have been issued on June 5, after
189-
the data for June 3rd was collected and ingested into the API.
190-
191-
``lag``
192-
Integer giving the difference between ``issue`` and ``time_value``,
193-
in days.
194-
195-
``value``
196-
The signal quantity requested. For example, in a query for the
197-
``confirmed_cumulative_num`` signal from the ``usa-facts`` source,
198-
this would be the cumulative number of confirmed cases in the area, as
199-
of the ``time_value``.
200-
201-
``stderr``
202-
The value's standard error, if available.
203-
204-
``sample_size``
205-
Indicates the sample size available in that geography on that day;
206-
sample size may not be available for all signals, due to privacy or
207-
other constraints.
208-
209-
``geo_type``
210-
Geography type for the signal, same as the value of the ``geo_type`` input argument.
211-
Used for downstream functions to parse ``geo_value`` correctly
212-
213-
``data_source``
214-
Name of the signal source, same as the value of the ``data_source`` input argument. Used for
215-
downstream functions to recognize where this signal is from.
216-
217-
Consult the `signal documentation
218-
<https://cmu-delphi.github.io/delphi-epidata/api/covidcast_signals.html>`_
219-
for more details on how values and standard errors are calculated for
220-
specific signals.
221-
222129
"""
223130
if start_day > end_day:
224131
raise ValueError(
@@ -239,7 +146,7 @@ def signal(
239146
)
240147
if response["result"] != 1:
241148
# Something failed in the API and we did not get real metadata
242-
raise RuntimeError("Error when fetching metadata from the API", response["message"])
149+
raise RuntimeError("Error when fetching signal data from the API", response["message"])
243150

244151
api_df = pd.DataFrame.from_dict(response["epidata"])
245152
api_df["issue"] = pd.to_datetime(api_df["issue"], format="%Y%m%d")

_delphi_utils_python/delphi_utils/validator/dynamic.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,6 @@ def validate(self, all_frames, report):
8080
# Get 14 days prior to the earliest list date
8181
outlier_lookbehind = timedelta(days=14)
8282

83-
# Authenticate API
84-
# Epidata.auth = ("epidata", api)
8583

8684
# Get all expected combinations of geo_type and signal.
8785
geo_signal_combos = get_geo_signal_combos(self.params.data_source,

_delphi_utils_python/tests/test_covidcast_wrapper.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,22 @@
44
from delphi_utils import covidcast_wrapper
55
import covidcast
66
from freezegun import freeze_time
7+
from delphi_epidata import Epidata
78
from pandas.testing import assert_frame_equal
89

910
TEST_DIR = Path(__file__).parent
1011
class TestCovidcastWrapper:
12+
Epidata.debug = True
1113
def test_metadata(self):
1214
expected_df = covidcast.metadata()
1315
df = covidcast_wrapper.metadata()
1416
assert_frame_equal(expected_df, df)
1517

16-
@freeze_time("2024-07-29")
18+
@freeze_time("2022-01-29")
1719
def test_signal(self):
18-
meta_df = covidcast_wrapper.metadata()
19-
data_filter = ((meta_df["max_time"] >= datetime(year=2024, month=6, day=1)) & (meta_df["time_type"] == "day"))
20+
meta_df = pd.read_pickle(f"{TEST_DIR}/test_data/covidcast_metadata.pkl")
21+
22+
data_filter = (meta_df["max_time"] >= datetime(year=2024, month=6, day=1))
2023
signal_df = meta_df[data_filter].groupby("data_source")["signal"].agg(['unique'])
2124
enddate = datetime.today()
2225
startdate = enddate - timedelta(days=15)
360 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)