Skip to content

Commit a2a149f

Browse files
committed
implimentating suggested changes
1 parent 6e22db8 commit a2a149f

22 files changed

+36
-128
lines changed

_delphi_utils_python/delphi_utils/covidcast_wrapper.py

+30-123
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,29 @@
88
from epiweeks import Week
99

1010

11-
def date_generator(startdate: date, enddate: date) -> Iterable[date]:
11+
def date_generator(startdate: date, enddate: date, time_type: str) -> Iterable[date]:
1212
"""
1313
Take start date and end date and generates date string.
1414
1515
Parameters
1616
----------
1717
startdate: date
1818
enddate: date
19+
time_type: str
1920
2021
Returns
2122
-------
2223
generator of str
2324
"""
24-
while startdate <= enddate:
25-
yield startdate.strftime("%Y-%m-%d")
26-
startdate = startdate + timedelta(days=1)
25+
if time_type.lower() == "day":
26+
while startdate <= enddate:
27+
yield startdate.strftime("%Y-%m-%d")
28+
startdate = startdate + timedelta(days=1)
29+
elif time_type.lower() == "week":
30+
while startdate <= enddate:
31+
epiweek = Week.fromdate(startdate)
32+
yield epiweek
33+
startdate = startdate + timedelta(days=7)
2734

2835

2936
def _parse_datetimes(date_int: int, time_type: str, date_format: str = "%Y%m%d") -> Union[pd.Timestamp, None]:
@@ -34,8 +41,11 @@ def _parse_datetimes(date_int: int, time_type: str, date_format: str = "%Y%m%d")
3441
3542
Epiweeks use the CDC format.
3643
37-
:param date_int: Int representation of date.
38-
:param date_format: String of the date format to parse.
44+
date_int: Int representation of date.
45+
time_type: The temporal resolution to request this data. Most signals
46+
are available at the "day" resolution (the default); some are only
47+
available at the "week" resolution, representing an MMWR week ("epiweek").
48+
date_format: String of the date format to parse.
3949
:returns: Timestamp.
4050
"""
4151
date_str = str(date_int)
@@ -55,8 +65,7 @@ def metadata() -> Union[pd.DataFrame, None]:
5565
-------
5666
pd.DataFrame of covidcast metadata.
5767
"""
58-
# pylint: disable=W0212
59-
response = Epidata._request("covidcast_meta")
68+
response = Epidata.covidcast_meta()
6069

6170
if response["result"] != 1:
6271
# Something failed in the API and we did not get real metadata
@@ -80,145 +89,43 @@ def signal(
8089
lag: int = None,
8190
time_type: str = "day",
8291
) -> Union[pd.DataFrame, None]:
83-
"""Download a Pandas data frame for one signal.
84-
85-
Obtains data for selected date ranges for all geographic regions of the
86-
United States. Available data sources and signals are documented in the
87-
`COVIDcast signal documentation
88-
<https://cmu-delphi.github.io/delphi-epidata/api/covidcast_signals.html>`_.
89-
Most (but not all) data sources are available at the county level, but the
90-
API can also return data aggregated to metropolitan statistical areas,
91-
hospital referral regions, or states, as desired, by using the ``geo_type``
92-
argument.
93-
94-
The COVIDcast API tracks updates and changes to its underlying data, and
95-
records the first date each observation became available. For example, a
96-
data source may report its estimate for a specific state on June 3rd on June
97-
5th, once records become available. This data is considered "issued" on June
98-
5th. Later, the data source may update its estimate for June 3rd based on
99-
revised data, creating a new issue on June 8th. By default, ``signal()``
100-
returns the most recent issue available for every observation. The
101-
``as_of``, ``issues``, and ``lag`` parameters allow the user to select
102-
specific issues instead, or to see all updates to observations. These
103-
options are mutually exclusive; if you specify more than one, ``as_of`` will
104-
take priority over ``issues``, which will take priority over ``lag``.
105-
106-
Note that the API only tracks the initial value of an estimate and *changes*
107-
to that value. If a value was first issued on June 5th and never updated,
108-
asking for data issued on June 6th (using ``issues`` or ``lag``) would *not*
109-
return that value, though asking for data ``as_of`` June 6th would.
110-
111-
Note also that the API enforces a maximum result row limit; results beyond
112-
the maximum limit are truncated. This limit is sufficient to fetch
113-
observations in all counties in the United States on one day. This client
114-
automatically splits queries for multiple days across multiple API calls.
115-
However, if data for one day has been issued many times, using the
116-
``issues`` argument may return more results than the query limit. A warning
117-
will be issued in this case. To see all results, split your query across
118-
multiple calls with different ``issues`` arguments.
119-
120-
See the `COVIDcast API documentation
121-
<https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html>`_ for more
122-
information on available geography types, signals, and data formats, and
123-
further discussion of issue dates and data versioning.
124-
125-
:param data_source: String identifying the data source to query, such as
92+
"""
93+
Makes covidcast signal api call.
94+
95+
data_source: String identifying the data source to query, such as
12696
``"fb-survey"``.
127-
:param signal: String identifying the signal from that source to query,
97+
signal: String identifying the signal from that source to query,
12898
such as ``"smoothed_cli"``.
129-
:param start_day: Query data beginning on this date. Provided as a
99+
start_day: Query data beginning on this date. Provided as a
130100
``datetime.date`` object. If ``start_day`` is ``None``, defaults to the
131101
first day data is available for this signal. If ``time_type == "week"``, then
132102
this is rounded to the epiweek containing the day (i.e. the previous Sunday).
133-
:param end_day: Query data up to this date, inclusive. Provided as a
103+
end_day: Query data up to this date, inclusive. Provided as a
134104
``datetime.date`` object. If ``end_day`` is ``None``, defaults to the most
135105
recent day data is available for this signal. If ``time_type == "week"``, then
136106
this is rounded to the epiweek containing the day (i.e. the previous Sunday).
137-
:param geo_type: The geography type for which to request this data, such as
107+
geo_type: The geography type for which to request this data, such as
138108
``"county"`` or ``"state"``. Available types are described in the
139109
COVIDcast signal documentation. Defaults to ``"county"``.
140-
:param geo_values: The geographies to fetch data for. The default, ``"*"``,
110+
geo_values: The geographies to fetch data for. The default, ``"*"``,
141111
fetches all geographies. To fetch one geography, specify its ID as a
142112
string; multiple geographies can be provided as an iterable (list, tuple,
143113
...) of strings.
144-
:param as_of: Fetch only data that was available on or before this date,
114+
as_of: Fetch only data that was available on or before this date,
145115
provided as a ``datetime.date`` object. If ``None``, the default, return
146116
the most recent available data. If ``time_type == "week"``, then
147117
this is rounded to the epiweek containing the day (i.e. the previous Sunday).
148-
:param issues: Fetch only data that was published or updated ("issued") on
149-
these dates. Provided as either a single ``datetime.date`` object,
150-
indicating a single date to fetch data issued on, or a tuple or list
151-
specifying (start, end) dates. In this case, return all data issued in
152-
this range. There may be multiple rows for each observation, indicating
153-
several updates to its value. If ``None``, the default, return the most
154-
recently issued data. If ``time_type == "week"``, then these are rounded to
155-
the epiweek containing the day (i.e. the previous Sunday).
156-
:param lag: Integer. If, for example, ``lag=3``, fetch only data that was
118+
lag: Integer. If, for example, ``lag=3``, fetch only data that was
157119
published or updated exactly 3 days after the date. For example, a row
158120
with ``time_value`` of June 3 will only be included in the results if its
159121
data was issued or updated on June 6. If ``None``, the default, return the
160122
most recently issued data regardless of its lag.
161-
:param time_type: The temporal resolution to request this data. Most signals
123+
time_type: The temporal resolution to request this data. Most signals
162124
are available at the "day" resolution (the default); some are only
163125
available at the "week" resolution, representing an MMWR week ("epiweek").
164126
:returns: A Pandas data frame with matching data, or ``None`` if no data is
165127
returned. Each row is one observation on one day in one geographic location.
166128
Contains the following columns:
167-
168-
``geo_value``
169-
Identifies the location, such as a state name or county FIPS code. The
170-
geographic coding used by COVIDcast is described in the `API
171-
documentation here
172-
<https://cmu-delphi.github.io/delphi-epidata/api/covidcast_geography.html>`_.
173-
174-
``signal``
175-
Name of the signal, same as the value of the ``signal`` input argument. Used for
176-
downstream functions to recognize where this signal is from.
177-
178-
``time_value``
179-
Contains a `pandas Timestamp object
180-
<https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html>`_
181-
identifying the date this estimate is for. For data with ``time_type = "week"``, this
182-
is the first day of the corresponding epiweek.
183-
184-
``issue``
185-
Contains a `pandas Timestamp object
186-
<https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html>`_
187-
identifying the date this estimate was issued. For example, an estimate
188-
with a ``time_value`` of June 3 might have been issued on June 5, after
189-
the data for June 3rd was collected and ingested into the API.
190-
191-
``lag``
192-
Integer giving the difference between ``issue`` and ``time_value``,
193-
in days.
194-
195-
``value``
196-
The signal quantity requested. For example, in a query for the
197-
``confirmed_cumulative_num`` signal from the ``usa-facts`` source,
198-
this would be the cumulative number of confirmed cases in the area, as
199-
of the ``time_value``.
200-
201-
``stderr``
202-
The value's standard error, if available.
203-
204-
``sample_size``
205-
Indicates the sample size available in that geography on that day;
206-
sample size may not be available for all signals, due to privacy or
207-
other constraints.
208-
209-
``geo_type``
210-
Geography type for the signal, same as the value of the ``geo_type`` input argument.
211-
Used for downstream functions to parse ``geo_value`` correctly
212-
213-
``data_source``
214-
Name of the signal source, same as the value of the ``data_source`` input argument. Used for
215-
downstream functions to recognize where this signal is from.
216-
217-
Consult the `signal documentation
218-
<https://cmu-delphi.github.io/delphi-epidata/api/covidcast_signals.html>`_
219-
for more details on how values and standard errors are calculated for
220-
specific signals.
221-
222129
"""
223130
if start_day > end_day:
224131
raise ValueError(
@@ -239,7 +146,7 @@ def signal(
239146
)
240147
if response["result"] != 1:
241148
# Something failed in the API and we did not get real metadata
242-
raise RuntimeError("Error when fetching metadata from the API", response["message"])
149+
raise RuntimeError("Error when fetching signal data from the API", response["message"])
243150

244151
api_df = pd.DataFrame.from_dict(response["epidata"])
245152
api_df["issue"] = pd.to_datetime(api_df["issue"], format="%Y%m%d")

_delphi_utils_python/delphi_utils/validator/dynamic.py

-2
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,6 @@ def validate(self, all_frames, report):
8080
# Get 14 days prior to the earliest list date
8181
outlier_lookbehind = timedelta(days=14)
8282

83-
# Authenticate API
84-
# Epidata.auth = ("epidata", api)
8583

8684
# Get all expected combinations of geo_type and signal.
8785
geo_signal_combos = get_geo_signal_combos(self.params.data_source,

_delphi_utils_python/tests/test_covidcast_wrapper.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,22 @@
44
from delphi_utils import covidcast_wrapper
55
import covidcast
66
from freezegun import freeze_time
7+
from delphi_epidata import Epidata
78
from pandas.testing import assert_frame_equal
89

910
TEST_DIR = Path(__file__).parent
1011
class TestCovidcastWrapper:
12+
Epidata.debug = True
1113
def test_metadata(self):
1214
expected_df = covidcast.metadata()
1315
df = covidcast_wrapper.metadata()
1416
assert_frame_equal(expected_df, df)
1517

16-
@freeze_time("2024-07-29")
18+
@freeze_time("2022-01-29")
1719
def test_signal(self):
18-
meta_df = covidcast_wrapper.metadata()
19-
data_filter = ((meta_df["max_time"] >= datetime(year=2024, month=6, day=1)) & (meta_df["time_type"] == "day"))
20+
meta_df = pd.read_pickle(f"{TEST_DIR}/test_data/covidcast_metadata.pkl")
21+
22+
data_filter = (meta_df["max_time"] >= datetime(year=2024, month=6, day=1))
2023
signal_df = meta_df[data_filter].groupby("data_source")["signal"].agg(['unique'])
2124
enddate = datetime.today()
2225
startdate = enddate - timedelta(days=15)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)