Skip to content

Commit 3c31a43

Browse files
committed
NANs Quidel:
* missingness columns * insufficient data check * tests
1 parent 2111d73 commit 3c31a43

File tree

6 files changed

+175
-61
lines changed

6 files changed

+175
-61
lines changed

quidel/delphi_quidel/data_tools.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pandas as pd
55

6+
from delphi_utils import Nans
7+
68
def _prop_var(p, n):
79
"""
810
Calculate variance of proportion.
@@ -117,7 +119,7 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs, max_borrow_obs
117119
return borrow_prop
118120

119121

120-
def raw_positive_prop(positives, tests, min_obs):
122+
def raw_positive_prop(positives, tests, min_obs, missing_val, missing_se, missing_sample_size):
121123
"""
122124
Calculate proportion of positive tests for a single location with no temporal smoothing.
123125
@@ -166,10 +168,15 @@ def raw_positive_prop(positives, tests, min_obs):
166168
positive_prop = positives / tests
167169
se = np.sqrt(_prop_var(positive_prop, tests))
168170
sample_size = tests
169-
return positive_prop, se, sample_size
171+
missing_val[np.isnan(tests) | (tests < min_obs) | np.isnan(positive_prop)] = Nans.DATA_INSUFFICIENT
172+
missing_se[np.isnan(se)] = Nans.DATA_INSUFFICIENT
173+
missing_sample_size[np.isnan(tests) | (tests < min_obs)] = Nans.DATA_INSUFFICIENT
174+
175+
return positive_prop, se, sample_size, missing_val, missing_se, missing_sample_size
170176

171177

172178
def smoothed_positive_prop(positives, tests, min_obs, max_borrow_obs, pool_days,
179+
missing_val, missing_se, missing_sample_size,
173180
parent_positives=None, parent_tests=None):
174181
"""
175182
Calculate the proportion of negative tests for a single location with temporal smoothing.
@@ -259,10 +266,10 @@ def smoothed_positive_prop(positives, tests, min_obs, max_borrow_obs, pool_days,
259266
pooled_positives = tpooled_positives
260267
pooled_tests = tpooled_tests
261268
## STEP 2: CALCULATE AS THOUGH THEY'RE RAW
262-
return raw_positive_prop(pooled_positives, pooled_tests, min_obs)
269+
return raw_positive_prop(pooled_positives, pooled_tests, min_obs, missing_val, missing_se, missing_sample_size)
263270

264271

265-
def raw_tests_per_device(devices, tests, min_obs):
272+
def raw_tests_per_device(devices, tests, min_obs, missing_val, missing_se, missing_sample_size):
266273
"""
267274
Calculate the tests per device for a single geographic location, without any temporal smoothing.
268275
@@ -297,14 +304,20 @@ def raw_tests_per_device(devices, tests, min_obs):
297304
'with no np.nan')
298305
if min_obs <= 0:
299306
raise ValueError('min_obs should be positive')
307+
300308
tests[tests < min_obs] = np.nan
301309
tests_per_device = tests / devices
302310
se = np.repeat(np.nan, len(devices))
303311
sample_size = tests
304312

305-
return tests_per_device, se, sample_size
313+
missing_val[np.isnan(tests) | (tests < min_obs)] = Nans.DATA_INSUFFICIENT
314+
missing_se = np.repeat(Nans.NOT_APPLICABLE, len(devices))
315+
missing_sample_size[np.isnan(tests) | (tests < min_obs)] = Nans.DATA_INSUFFICIENT
316+
317+
return tests_per_device, se, sample_size, missing_val, missing_se, missing_sample_size
306318

307319
def smoothed_tests_per_device(devices, tests, min_obs, max_borrow_obs, pool_days,
320+
missing_val, missing_se, missing_sample_size,
308321
parent_devices=None, parent_tests=None):
309322
"""
310323
Calculate the ratio of tests per device for a single location with temporal smoothing.
@@ -383,4 +396,4 @@ def smoothed_tests_per_device(devices, tests, min_obs, max_borrow_obs, pool_days
383396
pooled_devices = tpooled_devices
384397
pooled_tests = tpooled_tests
385398
## STEP 2: CALCULATE AS THOUGH THEY'RE RAW
386-
return raw_tests_per_device(pooled_devices, pooled_tests, min_obs)
399+
return raw_tests_per_device(pooled_devices, pooled_tests, min_obs, missing_val, missing_se, missing_sample_size)

quidel/delphi_quidel/generate_sensor.py

+48-12
Original file line numberDiff line numberDiff line change
@@ -31,30 +31,42 @@ def generate_sensor_for_states(state_groups, smooth, device, first_date, last_da
3131

3232
# smoothed test per device
3333
if device & smooth:
34-
stat, se, sample_size = smoothed_tests_per_device(
34+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = smoothed_tests_per_device(
3535
devices=state_group["numUniqueDevices"].values,
3636
tests=state_group['totalTest'].values,
37+
missing_val=state_group['missing_val'].values,
38+
missing_se=state_group['missing_se'].values,
39+
missing_sample_size=state_group['missing_sample_size'].values,
3740
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
3841
pool_days=POOL_DAYS)
3942
# raw test per device
4043
elif device & (not smooth):
41-
stat, se, sample_size = raw_tests_per_device(
44+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = raw_tests_per_device(
4245
devices=state_group["numUniqueDevices"].values,
4346
tests=state_group['totalTest'].values,
47+
missing_val=state_group['missing_val'].values,
48+
missing_se=state_group['missing_se'].values,
49+
missing_sample_size=state_group['missing_sample_size'].values,
4450
min_obs=MIN_OBS)
4551
# smoothed pct positive
4652
elif (not device) & smooth:
47-
stat, se, sample_size = smoothed_positive_prop(
53+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = smoothed_positive_prop(
4854
tests=state_group['totalTest'].values,
4955
positives=state_group['positiveTest'].values,
56+
missing_val=state_group['missing_val'].values,
57+
missing_se=state_group['missing_se'].values,
58+
missing_sample_size=state_group['missing_sample_size'].values,
5059
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
5160
pool_days=POOL_DAYS)
5261
stat = stat * 100
5362
# raw pct positive
5463
else:
55-
stat, se, sample_size = raw_positive_prop(
64+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = raw_positive_prop(
5665
tests=state_group['totalTest'].values,
5766
positives=state_group['positiveTest'].values,
67+
missing_val=state_group['missing_val'].values,
68+
missing_se=state_group['missing_se'].values,
69+
missing_sample_size=state_group['missing_sample_size'].values,
5870
min_obs=MIN_OBS)
5971
stat = stat * 100
6072

@@ -63,7 +75,10 @@ def generate_sensor_for_states(state_groups, smooth, device, first_date, last_da
6375
"timestamp": state_group.index,
6476
"val": stat,
6577
"se": se,
66-
"sample_size": sample_size}))
78+
"sample_size": sample_size,
79+
"missing_val": missing_val,
80+
"missing_se": missing_se,
81+
"missing_sample_size": missing_sample_size}))
6782
return state_df
6883

6984
def generate_sensor_for_other_geores(state_groups, data, res_key, smooth,
@@ -102,46 +117,64 @@ def generate_sensor_for_other_geores(state_groups, data, res_key, smooth,
102117
if smooth:
103118
if has_parent:
104119
if device:
105-
stat, se, sample_size = smoothed_tests_per_device(
120+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = smoothed_tests_per_device(
106121
devices=res_group["numUniqueDevices"].values,
107122
tests=res_group['totalTest'].values,
123+
missing_val=res_group['missing_val'].values,
124+
missing_se=res_group['missing_se'].values,
125+
missing_sample_size=res_group['missing_sample_size'].values,
108126
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
109127
pool_days=POOL_DAYS,
110128
parent_devices=res_group["numUniqueDevices_parent"].values,
111129
parent_tests=res_group["totalTest_parent"].values)
112130
else:
113-
stat, se, sample_size = smoothed_positive_prop(
131+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = smoothed_positive_prop(
114132
tests=res_group['totalTest'].values,
115133
positives=res_group['positiveTest'].values,
134+
missing_val=res_group['missing_val'].values,
135+
missing_se=res_group['missing_se'].values,
136+
missing_sample_size=res_group['missing_sample_size'].values,
116137
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
117138
pool_days=POOL_DAYS,
118139
parent_tests=res_group["totalTest_parent"].values,
119140
parent_positives=res_group['positiveTest_parent'].values)
120141
stat = stat * 100
121142
else:
122143
if device:
123-
stat, se, sample_size = smoothed_tests_per_device(
144+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = smoothed_tests_per_device(
124145
devices=res_group["numUniqueDevices"].values,
125146
tests=res_group['totalTest'].values,
147+
missing_val=res_group['missing_val'].values,
148+
missing_se=res_group['missing_se'].values,
149+
missing_sample_size=res_group['missing_sample_size'].values,
126150
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
127151
pool_days=POOL_DAYS)
128152
else:
129-
stat, se, sample_size = smoothed_positive_prop(
153+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = smoothed_positive_prop(
130154
tests=res_group['totalTest'].values,
131155
positives=res_group['positiveTest'].values,
156+
missing_val=res_group['missing_val'].values,
157+
missing_se=res_group['missing_se'].values,
158+
missing_sample_size=res_group['missing_sample_size'].values,
132159
min_obs=MIN_OBS, max_borrow_obs=MAX_BORROW_OBS,
133160
pool_days=POOL_DAYS)
134161
stat = stat * 100
135162
else:
136163
if device:
137-
stat, se, sample_size = raw_tests_per_device(
164+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = raw_tests_per_device(
138165
devices=res_group["numUniqueDevices"].values,
139166
tests=res_group['totalTest'].values,
167+
missing_val=res_group['missing_val'].values,
168+
missing_se=res_group['missing_se'].values,
169+
missing_sample_size=res_group['missing_sample_size'].values,
140170
min_obs=MIN_OBS)
141171
else:
142-
stat, se, sample_size = raw_positive_prop(
172+
stat, se, sample_size, missing_val, missing_se, missing_sample_size = raw_positive_prop(
143173
tests=res_group['totalTest'].values,
144174
positives=res_group['positiveTest'].values,
175+
missing_val=res_group['missing_val'].values,
176+
missing_se=res_group['missing_se'].values,
177+
missing_sample_size=res_group['missing_sample_size'].values,
145178
min_obs=MIN_OBS)
146179
stat = stat * 100
147180

@@ -150,5 +183,8 @@ def generate_sensor_for_other_geores(state_groups, data, res_key, smooth,
150183
"timestamp": res_group.index,
151184
"val": stat,
152185
"se": se,
153-
"sample_size": sample_size}))
186+
"sample_size": sample_size,
187+
"missing_val": missing_val,
188+
"missing_se": missing_se,
189+
"missing_sample_size": missing_sample_size}))
154190
return res_df

quidel/delphi_quidel/run.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from delphi_utils import (
1313
add_prefix,
1414
create_export_csv,
15-
get_structured_logger
15+
get_structured_logger,
16+
Nans
1617
)
1718

1819
from .constants import (END_FROM_TODAY_MINUS, EXPORT_DAY_RANGE,
@@ -83,6 +84,12 @@ def run_module(params: Dict[str, Any]):
8384
test_type = "covid_ag" if "covid_ag" in sensor else "flu_ag"
8485
print("state", sensor)
8586
data = dfs[test_type].copy()
87+
88+
# Default missingness values
89+
data["missing_val"] = Nans.NOT_MISSING
90+
data["missing_se"] = Nans.NOT_MISSING
91+
data["missing_sample_size"] = Nans.NOT_MISSING
92+
8693
state_groups = geo_map("state", data, map_df).groupby("state_id")
8794
first_date, last_date = data["timestamp"].min(), data["timestamp"].max()
8895

0 commit comments

Comments
 (0)