Skip to content

Commit c9ab090

Browse files
committed
NANs Quidel:
* missingness columns * insufficient data check * tests
1 parent 2111d73 commit c9ab090

File tree

6 files changed

+241
-101
lines changed

6 files changed

+241
-101
lines changed

quidel/delphi_quidel/data_tools.py

+25-6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pandas as pd
55

6+
from delphi_utils import Nans
7+
68
def _prop_var(p, n):
79
"""
810
Calculate variance of proportion.
@@ -117,7 +119,7 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs, max_borrow_obs
117119
return borrow_prop
118120

119121

120-
def raw_positive_prop(positives, tests, min_obs):
122+
def raw_positive_prop(positives, tests, min_obs, missing_val, missing_se, missing_sample_size):
121123
"""
122124
Calculate proportion of positive tests for a single location with no temporal smoothing.
123125
@@ -166,10 +168,15 @@ def raw_positive_prop(positives, tests, min_obs):
166168
positive_prop = positives / tests
167169
se = np.sqrt(_prop_var(positive_prop, tests))
168170
sample_size = tests
169-
return positive_prop, se, sample_size
171+
missing_val[np.isnan(tests) | (tests < min_obs) | np.isnan(positive_prop)] = Nans.PRIVACY
172+
missing_se[np.isnan(se)] = Nans.PRIVACY
173+
missing_sample_size[np.isnan(tests) | (tests < min_obs)] = Nans.PRIVACY
174+
175+
return positive_prop, se, sample_size, missing_val, missing_se, missing_sample_size
170176

171177

172178
def smoothed_positive_prop(positives, tests, min_obs, max_borrow_obs, pool_days,
179+
missing_val, missing_se, missing_sample_size,
173180
parent_positives=None, parent_tests=None):
174181
"""
175182
Calculate the proportion of negative tests for a single location with temporal smoothing.
@@ -259,10 +266,13 @@ def smoothed_positive_prop(positives, tests, min_obs, max_borrow_obs, pool_days,
259266
pooled_positives = tpooled_positives
260267
pooled_tests = tpooled_tests
261268
## STEP 2: CALCULATE AS THOUGH THEY'RE RAW
262-
return raw_positive_prop(pooled_positives, pooled_tests, min_obs)
269+
return raw_positive_prop(
270+
pooled_positives, pooled_tests, min_obs,
271+
missing_val, missing_se, missing_sample_size
272+
)
263273

264274

265-
def raw_tests_per_device(devices, tests, min_obs):
275+
def raw_tests_per_device(devices, tests, min_obs, missing_val, missing_se, missing_sample_size):
266276
"""
267277
Calculate the tests per device for a single geographic location, without any temporal smoothing.
268278
@@ -297,14 +307,20 @@ def raw_tests_per_device(devices, tests, min_obs):
297307
'with no np.nan')
298308
if min_obs <= 0:
299309
raise ValueError('min_obs should be positive')
310+
300311
tests[tests < min_obs] = np.nan
301312
tests_per_device = tests / devices
302313
se = np.repeat(np.nan, len(devices))
303314
sample_size = tests
304315

305-
return tests_per_device, se, sample_size
316+
missing_val[np.isnan(tests) | (tests < min_obs)] = Nans.PRIVACY
317+
missing_se = np.repeat(Nans.NOT_APPLICABLE, len(devices))
318+
missing_sample_size[np.isnan(tests) | (tests < min_obs)] = Nans.PRIVACY
319+
320+
return tests_per_device, se, sample_size, missing_val, missing_se, missing_sample_size
306321

307322
def smoothed_tests_per_device(devices, tests, min_obs, max_borrow_obs, pool_days,
323+
missing_val, missing_se, missing_sample_size,
308324
parent_devices=None, parent_tests=None):
309325
"""
310326
Calculate the ratio of tests per device for a single location with temporal smoothing.
@@ -383,4 +399,7 @@ def smoothed_tests_per_device(devices, tests, min_obs, max_borrow_obs, pool_days
383399
pooled_devices = tpooled_devices
384400
pooled_tests = tpooled_tests
385401
## STEP 2: CALCULATE AS THOUGH THEY'RE RAW
386-
return raw_tests_per_device(pooled_devices, pooled_tests, min_obs)
402+
return raw_tests_per_device(
403+
pooled_devices, pooled_tests, min_obs,
404+
missing_val, missing_se, missing_sample_size
405+
)

quidel/delphi_quidel/generate_sensor.py

+108-52
Original file line numberDiff line numberDiff line change
@@ -31,39 +31,62 @@ def generate_sensor_for_states(state_groups, smooth, device, first_date, last_da
3131

3232
# smoothed test per device
3333
if device & smooth:
34-
stat, se, sample_size = smoothed_tests_per_device(
35-
devices=state_group["numUniqueDevices"].values,
36-
tests=state_group['totalTest'].values,
37-
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
38-
pool_days=POOL_DAYS)
34+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = (
35+
smoothed_tests_per_device(
36+
devices=state_group["numUniqueDevices"].values,
37+
tests=state_group['totalTest'].values,
38+
missing_val=state_group['missing_val'].values,
39+
missing_se=state_group['missing_se'].values,
40+
missing_sample_size=state_group['missing_sample_size'].values,
41+
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
42+
pool_days=POOL_DAYS)
43+
)
3944
# raw test per device
4045
elif device & (not smooth):
41-
stat, se, sample_size = raw_tests_per_device(
42-
devices=state_group["numUniqueDevices"].values,
43-
tests=state_group['totalTest'].values,
44-
min_obs=MIN_OBS)
46+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = (
47+
raw_tests_per_device(
48+
devices=state_group["numUniqueDevices"].values,
49+
tests=state_group['totalTest'].values,
50+
missing_val=state_group['missing_val'].values,
51+
missing_se=state_group['missing_se'].values,
52+
missing_sample_size=state_group['missing_sample_size'].values,
53+
min_obs=MIN_OBS)
54+
)
4555
# smoothed pct positive
4656
elif (not device) & smooth:
47-
stat, se, sample_size = smoothed_positive_prop(
48-
tests=state_group['totalTest'].values,
49-
positives=state_group['positiveTest'].values,
50-
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
51-
pool_days=POOL_DAYS)
57+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = (
58+
smoothed_positive_prop(
59+
tests=state_group['totalTest'].values,
60+
positives=state_group['positiveTest'].values,
61+
missing_val=state_group['missing_val'].values,
62+
missing_se=state_group['missing_se'].values,
63+
missing_sample_size=state_group['missing_sample_size'].values,
64+
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
65+
pool_days=POOL_DAYS)
66+
)
5267
stat = stat * 100
5368
# raw pct positive
5469
else:
55-
stat, se, sample_size = raw_positive_prop(
56-
tests=state_group['totalTest'].values,
57-
positives=state_group['positiveTest'].values,
58-
min_obs=MIN_OBS)
70+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = (
71+
raw_positive_prop(
72+
tests=state_group['totalTest'].values,
73+
positives=state_group['positiveTest'].values,
74+
missing_val=state_group['missing_val'].values,
75+
missing_se=state_group['missing_se'].values,
76+
missing_sample_size=state_group['missing_sample_size'].values,
77+
min_obs=MIN_OBS)
78+
)
5979
stat = stat * 100
6080

6181
se = se * 100
6282
state_df = state_df.append(pd.DataFrame({"geo_id": state,
6383
"timestamp": state_group.index,
6484
"val": stat,
6585
"se": se,
66-
"sample_size": sample_size}))
86+
"sample_size": sample_size,
87+
"missing_val": missing_val,
88+
"missing_se": missing_se,
89+
"missing_sample_size": missing_sample_size}))
6790
return state_df
6891

6992
def generate_sensor_for_other_geores(state_groups, data, res_key, smooth,
@@ -102,53 +125,86 @@ def generate_sensor_for_other_geores(state_groups, data, res_key, smooth,
102125
if smooth:
103126
if has_parent:
104127
if device:
105-
stat, se, sample_size = smoothed_tests_per_device(
106-
devices=res_group["numUniqueDevices"].values,
107-
tests=res_group['totalTest'].values,
108-
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
109-
pool_days=POOL_DAYS,
110-
parent_devices=res_group["numUniqueDevices_parent"].values,
111-
parent_tests=res_group["totalTest_parent"].values)
128+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = (
129+
smoothed_tests_per_device(
130+
devices=res_group["numUniqueDevices"].values,
131+
tests=res_group['totalTest'].values,
132+
missing_val=res_group['missing_val'].values,
133+
missing_se=res_group['missing_se'].values,
134+
missing_sample_size=res_group['missing_sample_size'].values,
135+
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
136+
pool_days=POOL_DAYS,
137+
parent_devices=res_group["numUniqueDevices_parent"].values,
138+
parent_tests=res_group["totalTest_parent"].values)
139+
)
112140
else:
113-
stat, se, sample_size = smoothed_positive_prop(
114-
tests=res_group['totalTest'].values,
115-
positives=res_group['positiveTest'].values,
116-
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
117-
pool_days=POOL_DAYS,
118-
parent_tests=res_group["totalTest_parent"].values,
119-
parent_positives=res_group['positiveTest_parent'].values)
141+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = (
142+
smoothed_positive_prop(
143+
tests=res_group['totalTest'].values,
144+
positives=res_group['positiveTest'].values,
145+
missing_val=res_group['missing_val'].values,
146+
missing_se=res_group['missing_se'].values,
147+
missing_sample_size=res_group['missing_sample_size'].values,
148+
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
149+
pool_days=POOL_DAYS,
150+
parent_tests=res_group["totalTest_parent"].values,
151+
parent_positives=res_group['positiveTest_parent'].values)
152+
)
120153
stat = stat * 100
121154
else:
122155
if device:
123-
stat, se, sample_size = smoothed_tests_per_device(
124-
devices=res_group["numUniqueDevices"].values,
125-
tests=res_group['totalTest'].values,
126-
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
127-
pool_days=POOL_DAYS)
156+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = (
157+
smoothed_tests_per_device(
158+
devices=res_group["numUniqueDevices"].values,
159+
tests=res_group['totalTest'].values,
160+
missing_val=res_group['missing_val'].values,
161+
missing_se=res_group['missing_se'].values,
162+
missing_sample_size=res_group['missing_sample_size'].values,
163+
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
164+
pool_days=POOL_DAYS)
165+
)
128166
else:
129-
stat, se, sample_size = smoothed_positive_prop(
130-
tests=res_group['totalTest'].values,
131-
positives=res_group['positiveTest'].values,
132-
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
133-
pool_days=POOL_DAYS)
167+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = (
168+
smoothed_positive_prop(
169+
tests=res_group['totalTest'].values,
170+
positives=res_group['positiveTest'].values,
171+
missing_val=res_group['missing_val'].values,
172+
missing_se=res_group['missing_se'].values,
173+
missing_sample_size=res_group['missing_sample_size'].values,
174+
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
175+
pool_days=POOL_DAYS)
176+
)
134177
stat = stat * 100
135178
else:
136179
if device:
137-
stat, se, sample_size = raw_tests_per_device(
138-
devices=res_group["numUniqueDevices"].values,
139-
tests=res_group['totalTest'].values,
140-
min_obs=MIN_OBS)
180+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = (
181+
raw_tests_per_device(
182+
devices=res_group["numUniqueDevices"].values,
183+
tests=res_group['totalTest'].values,
184+
missing_val=res_group['missing_val'].values,
185+
missing_se=res_group['missing_se'].values,
186+
missing_sample_size=res_group['missing_sample_size'].values,
187+
min_obs=MIN_OBS)
188+
)
141189
else:
142-
stat, se, sample_size = raw_positive_prop(
143-
tests=res_group['totalTest'].values,
144-
positives=res_group['positiveTest'].values,
145-
min_obs=MIN_OBS)
190+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = (
191+
raw_positive_prop(
192+
tests=res_group['totalTest'].values,
193+
positives=res_group['positiveTest'].values,
194+
missing_val=res_group['missing_val'].values,
195+
missing_se=res_group['missing_se'].values,
196+
missing_sample_size=res_group['missing_sample_size'].values,
197+
min_obs=MIN_OBS)
198+
)
146199
stat = stat * 100
147200

148201
se = se * 100
149202
res_df = res_df.append(pd.DataFrame({"geo_id": loc,
150203
"timestamp": res_group.index,
151204
"val": stat,
152205
"se": se,
153-
"sample_size": sample_size}))
206+
"sample_size": sample_size,
207+
"missing_val": missing_val,
208+
"missing_se": missing_se,
209+
"missing_sample_size": missing_sample_size}))
154210
return res_df

quidel/delphi_quidel/run.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from delphi_utils import (
1313
add_prefix,
1414
create_export_csv,
15-
get_structured_logger
15+
get_structured_logger,
16+
Nans
1617
)
1718

1819
from .constants import (END_FROM_TODAY_MINUS, EXPORT_DAY_RANGE,
@@ -83,6 +84,12 @@ def run_module(params: Dict[str, Any]):
8384
test_type = "covid_ag" if "covid_ag" in sensor else "flu_ag"
8485
print("state", sensor)
8586
data = dfs[test_type].copy()
87+
88+
# Default missingness values
89+
data["missing_val"] = Nans.NOT_MISSING
90+
data["missing_se"] = Nans.NOT_MISSING
91+
data["missing_sample_size"] = Nans.NOT_MISSING
92+
8693
state_groups = geo_map("state", data, map_df).groupby("state_id")
8794
first_date, last_date = data["timestamp"].min(), data["timestamp"].max()
8895

0 commit comments

Comments
 (0)