Skip to content

Commit c941982

Browse files
committed
post process for metadata
1 parent e4f2679 commit c941982

File tree

1 file changed

+28
-2
lines changed

1 file changed

+28
-2
lines changed

_delphi_utils_python/delphi_utils/covidcast_wrapper.py

+28-2
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,39 @@
1-
from datetime import datetime, date, timedelta
1+
from datetime import date, timedelta
22
from typing import List, Tuple, Union, Iterable
33

4+
import numpy as np
45
import pandas as pd
56

67
from delphi_epidata import Epidata
8+
from epiweeks import Week
79

810
def date_generator(startdate, enddate):
911
while startdate <= enddate:
1012
yield startdate.strftime('%Y-%m-%d')
1113
startdate = startdate + timedelta(days=1)
1214

1315

16+
def _parse_datetimes(date_int: int,
17+
time_type: str,
18+
date_format: str = "%Y%m%d") -> Union[pd.Timestamp]: # annotating nan errors
19+
"""Convert a date or epiweeks string into timestamp objects.
20+
21+
Datetimes (length 8) are converted to their corresponding date, while epiweeks (length 6)
22+
are converted to the date of the start of the week. Returns nan otherwise
23+
24+
Epiweeks use the CDC format.
25+
26+
:param date_int: Int representation of date.
27+
:param date_format: String of the date format to parse.
28+
:returns: Timestamp.
29+
"""
30+
date_str = str(date_int)
31+
if time_type == "day":
32+
return pd.to_datetime(date_str, format=date_format)
33+
if time_type == "week":
34+
epiwk = Week(int(date_str[:4]), int(date_str[-2:]))
35+
return pd.to_datetime(epiwk.startdate())
36+
return np.nan
1437

1538
def metadata():
1639
response = Epidata._request("covidcast_meta")
@@ -21,6 +44,9 @@ def metadata():
2144
response["message"])
2245

2346
df = pd.DataFrame.from_dict(response["epidata"])
47+
df["min_time"] = df.apply(lambda x: _parse_datetimes(x.min_time, x.time_type), axis=1)
48+
df["max_time"] = df.apply(lambda x: _parse_datetimes(x.max_time, x.time_type), axis=1)
49+
df["last_update"] = pd.to_datetime(df["last_update"], unit="s")
2450
return df
2551

2652

@@ -182,7 +208,7 @@ def signal(
182208
)
183209

184210
time_values = list(date_generator(start_day, end_day))
185-
issues = list(date_generator(start_day, end_day)) #TODO placesholder
211+
issues = list(date_generator(start_day, end_day)) #TODO placesholder need to see how the issues params are coming in
186212
response = Epidata.covidcast(data_source, signal, time_type=time_type,
187213
geo_type=geo_type, time_values=time_values,
188214
geo_value=geo_values, as_of=as_of,

0 commit comments

Comments
 (0)