Skip to content

Commit b2485d2

Browse files
committed
Merge branch 'main' of github.com:cmu-delphi/covidcast-indicators into indicator_runner
2 parents 0847754 + f06c3c8 commit b2485d2

File tree

9 files changed

+140
-55
lines changed

9 files changed

+140
-55
lines changed

ansible/templates/sir_complainsalot-params-prod.json.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
"fb-survey": {
4040
"max_age": 3,
4141
"maintainers": ["U01069KCRS7"],
42-
"retired-signals": ["smoothed_wearing_mask"]
42+
"retired-signals": ["smoothed_wearing_mask", "smoothed_wwearing_mask"]
4343
},
4444
"indicator-combination": {
4545
"max_age": 4,

changehc/delphi_changehc/sensor.py

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,6 @@ class CHCSensor:
2626
poly_fit_degree=1,
2727
gaussian_bandwidth=Config.SMOOTHER_BANDWIDTH)
2828

29-
@staticmethod
30-
def gauss_smooth(count,total):
31-
"""Smooth using the left_gauss_linear.
32-
33-
Args:
34-
count, total: array
35-
"""
36-
count_smooth = CHCSensor.smoother.smooth(count)
37-
total_smooth = CHCSensor.smoother.smooth(total)
38-
total_clip = np.clip(total_smooth, 0, None)
39-
count_clip = np.clip(count_smooth, 0, total_clip)
40-
return count_clip, total_clip
41-
4229
@staticmethod
4330
def backfill(
4431
num,
@@ -120,30 +107,13 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"):
120107

121108
# calculate smoothed counts and jeffreys rate
122109
# the left_gauss_linear smoother is not guaranteed to return values greater than 0
123-
124-
smoothed_total_counts, smoothed_total_visits = CHCSensor.gauss_smooth(
125-
total_counts.flatten(), total_visits
126-
)
127-
128-
# in smoothing, the numerator may have become more than the denominator
129-
# simple fix is to clip the max values elementwise to the denominator (note that
130-
# this has only been observed in synthetic data)
131-
# smoothed_total_counts = np.clip(smoothed_total_counts, 0, smoothed_total_visits)
132-
133-
smoothed_total_rates = (
134-
(smoothed_total_counts + 0.5) / (smoothed_total_visits + 1)
135-
)
136-
137-
# checks - due to the smoother, the first value will be NA
138-
assert (
139-
np.sum(np.isnan(smoothed_total_rates[1:])) == 0
140-
), "NAs in rate calculation"
141-
assert (
142-
np.sum(smoothed_total_rates[1:] <= 0) == 0
143-
), f"0 or negative value, {geo_id}"
110+
rates = total_counts.flatten() / total_visits
111+
smoothed_rate = CHCSensor.smoother.smooth(rates)
112+
clipped_smoothed_rate = np.clip(smoothed_rate, 0, 1)
113+
jeffreys_rate = (clipped_smoothed_rate * total_visits + 0.5) / (total_visits + 1)
144114

145115
# cut off at sensor indexes
146-
rate_data = pd.DataFrame({'rate':smoothed_total_rates, 'den': smoothed_total_visits},
116+
rate_data = pd.DataFrame({'rate': jeffreys_rate, 'den': total_visits},
147117
index=y_data.index)
148118
rate_data = rate_data[first_sensor_date:]
149119
include = rate_data['den'] >= Config.MIN_DEN

changehc/tests/test_update_sensor.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,29 +81,52 @@ def test_geo_reindex(self):
8181
""
8282
)
8383
su_inst.shift_dates()
84-
data_frame = su_inst.geo_reindex(self.small_test_data.reset_index())
84+
test_data = pd.DataFrame({
85+
"num": [0, 100, 200, 300, 400, 500, 600, 100, 200, 300, 400, 500, 600],
86+
"fips": ['01001'] * 7 + ['04007'] * 6,
87+
"den": [1000] * 7 + [2000] * 6,
88+
"date": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]})
89+
data_frame = su_inst.geo_reindex(test_data)
8590
assert data_frame.shape[0] == multiple*len(su_inst.fit_dates)
8691
assert (data_frame.sum() == (4200,19000)).all()
8792

8893
def test_update_sensor(self):
8994
"""Tests that the sensors are properly updated."""
90-
for geo in ["state","hrr"]:
95+
outputs = {}
96+
for geo in ["county", "state", "hhs", "nation"]:
9197
td = TemporaryDirectory()
9298
su_inst = CHCSensorUpdator(
93-
"02-01-2020",
94-
"06-01-2020",
95-
"06-12-2020",
99+
"03-01-2020",
100+
"03-22-2020",
101+
"03-27-2020",
96102
geo,
97103
self.parallel,
98104
self.weekday,
99105
self.numtype,
100106
self.se,
101107
""
102108
)
103-
su_inst.update_sensor(self.small_test_data, td.name)
109+
# As of 3/3/21 (40c258a), this set of data has county outputting data, state and hhs not
110+
# outputting data, and nation outputting data, which is undesirable. Ideal behaviour
111+
# should be all output or a subregion only outputting if its parent has output,
112+
# which is what is being tested here.
113+
small_test_data = pd.DataFrame({
114+
"num": [0, 100, 200, 300, 400, 500, 600, 100, 200, 300, 400, 500, 600] * 2,
115+
"fips": ["01001"] * 13 + ["42003"] * 13,
116+
"den": [30, 50, 50, 10, 1, 5, 5, 50, 50, 50, 0, 0, 0] * 2,
117+
"date": list(pd.date_range("20200301", "20200313")) * 2}).set_index(
118+
["fips", "date"])
119+
su_inst.update_sensor(small_test_data, td.name)
120+
for f in os.listdir(td.name):
121+
outputs[f] = pd.read_csv(os.path.join(td.name, f))
104122
assert len(os.listdir(td.name)) == len(su_inst.sensor_dates),\
105123
f"failed {geo} update sensor test"
106124
td.cleanup()
125+
assert outputs["20200319_county_smoothed_outpatient_covid.csv"].empty
126+
assert outputs["20200319_state_smoothed_outpatient_covid.csv"].empty
127+
assert outputs["20200319_hhs_smoothed_outpatient_covid.csv"].empty
128+
assert outputs["20200319_nation_smoothed_outpatient_covid.csv"].empty
129+
107130

108131
class TestWriteToCsv:
109132
"""Tests for writing output files to CSV."""

facebook/delphiFacebook/R/binary.R

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,15 @@ get_binary_indicators <- function() {
5151
"smoothed_wtravel_outside_state_7d", "weight", "c_travel_state_7d", 6, compute_binary_response, jeffreys_binary,
5252

5353
# work outside home
54-
# pre-wave-4
54+
# pre-wave 4
5555
"wip_smoothed_work_outside_home_5d", "weight_unif", "c_work_outside_5d", 6, compute_binary_response, jeffreys_binary,
5656
"wip_smoothed_wwork_outside_home_5d", "weight", "c_work_outside_5d", 6, compute_binary_response, jeffreys_binary,
57-
# wave 4+
57+
# wave 4+, pre-wave 10
5858
"smoothed_work_outside_home_1d", "weight_unif", "a_work_outside_home_1d", 6, compute_binary_response, jeffreys_binary,
5959
"smoothed_wwork_outside_home_1d", "weight", "a_work_outside_home_1d", 6, compute_binary_response, jeffreys_binary,
60+
# wave 10+
61+
"smoothed_work_outside_home_indoors_1d", "weight_unif", "a_work_outside_home_indoors_1d", 6, compute_binary_response, jeffreys_binary,
62+
"smoothed_wwork_outside_home_indoors_1d", "weight", "a_work_outside_home_indoors_1d", 6, compute_binary_response, jeffreys_binary,
6063

6164
# activities
6265
# pre-Wave 10

facebook/delphiFacebook/R/responses.R

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -332,19 +332,30 @@ bodge_v4_translation <- function(input_data) {
332332
affected <- c("V4_1", "V4_2", "V4_3", "V4_4", "V4_5")
333333
corrected <- c("V4a_1", "V4a_2", "V4a_3", "V4a_4", "V4a_5")
334334

335-
# Step 1: For any non-English results, null out V4 responses. There are NAs
336-
# because of filtering earlier in the pipeline that incorrectly handles NA, so
337-
# also remove these.
338-
non_english <- is.na(input_data$UserLanguage) | input_data$UserLanguage != "EN"
339-
for (col in affected) {
340-
input_data[non_english, col] <- NA
335+
if (any(affected %in% names(input_data))) {
336+
# This wave is affected by the problem. Step 1: For any non-English results,
337+
# null out V4 responses. There are NAs because of filtering earlier in the
338+
# pipeline that incorrectly handles NA, so also remove these.
339+
non_english <- is.na(input_data$UserLanguage) | input_data$UserLanguage != "EN"
340+
for (col in affected) {
341+
input_data[non_english, col] <- NA
342+
}
343+
} else {
344+
# This wave does not have V4, only V4a. We will move V4a's responses into V4
345+
# below, so users do not need to know about our goof. Ensure the columns
346+
# exist so the later code can move data into them.
347+
for (col in affected) {
348+
input_data[[col]] <- NA
349+
}
341350
}
342351

343352
# Step 2: If this data does not have V4a, stop.
344353
if (!("V4a_1" %in% names(input_data))) {
345354
return(input_data)
346355
}
347356

357+
# Step 3: Wherever there are values in the new columns, move them to the old
358+
# columns.
348359
for (ii in seq_along(affected)) {
349360
bad <- affected[ii]
350361
good <- corrected[ii]

facebook/delphiFacebook/python/delphi_facebook/qualtrics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def get(fetch,post,params):
9494
print()
9595
wait,waitt = progress(t)
9696
if progressStatus=="failed":
97-
return r
97+
raise Exception(f"ERROR: could not download \"{surv['name']}\"\n{json.dumps(r.json(),sort_keys=True,indent=2)}")
9898
fileId = r.json()['result']['fileId']
9999
r = fetch(f"{base}{fileId}/file")
100100
if not r.ok: return r

facebook/delphiFacebook/tests/testthat/test-responses.R

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ test_that("filter_responses works correctly", {
116116
expected)
117117
})
118118

119-
test_that("filter_data_for_aggregatation works correctly", {
119+
test_that("filter_data_for_aggregation works correctly", {
120120
params <- list(start_date=as.Date("2021-01-05"), static_dir=test_path("static"))
121121

122122
input <- tibble(
@@ -135,7 +135,7 @@ test_that("filter_data_for_aggregatation works correctly", {
135135
date = c("2021-01-01", "2021-01-01", "2021-01-01", "2021-01-01")
136136
)
137137

138-
expect_equal(filter_data_for_aggregatation(input, params),
138+
expect_equal(filter_data_for_aggregation(input, params),
139139
expected)
140140
})
141141

@@ -170,6 +170,73 @@ test_that("V4 bodge works correctly", {
170170

171171
expect_equal(bodge_v4_translation(foo),
172172
expected)
173+
174+
# Ensure the bodge works when all responses are in English to V4a; see PR #888
175+
foo <- tibble(
176+
UserLanguage = "EN",
177+
V4a_1 = c(2, NA, NA, NA, 3),
178+
V4a_2 = c(2, NA, NA, NA, 3),
179+
V4a_3 = c(2, NA, NA, NA, 3),
180+
V4a_4 = c(1, NA, NA, NA, 3),
181+
V4a_5 = c(2, NA, NA, NA, 3)
182+
)
183+
184+
expected <- tibble(
185+
UserLanguage = foo$UserLanguage,
186+
V4a_1 = foo$V4a_1,
187+
V4a_2 = foo$V4a_2,
188+
V4a_3 = foo$V4a_3,
189+
V4a_4 = foo$V4a_4,
190+
V4a_5 = foo$V4a_5,
191+
V4_1 = c(2, NA, NA, NA, 3),
192+
V4_2 = c(2, NA, NA, NA, 3),
193+
V4_3 = c(2, NA, NA, NA, 3),
194+
V4_4 = c(1, NA, NA, NA, 3),
195+
V4_5 = c(2, NA, NA, NA, 3)
196+
)
197+
198+
expect_equal(bodge_v4_translation(foo),
199+
expected)
200+
201+
# Ensure the bodge works when *no* responses are in English, but they're to
202+
# V4a, which needs no censoring
203+
foo <- tibble(
204+
UserLanguage = "ES",
205+
V4a_1 = c(2, NA, NA, NA, 3),
206+
V4a_2 = c(2, NA, NA, NA, 3),
207+
V4a_3 = c(2, NA, NA, NA, 3),
208+
V4a_4 = c(1, NA, NA, NA, 3),
209+
V4a_5 = c(2, NA, NA, NA, 3)
210+
)
211+
212+
expected <- tibble(
213+
UserLanguage = foo$UserLanguage,
214+
V4a_1 = foo$V4a_1,
215+
V4a_2 = foo$V4a_2,
216+
V4a_3 = foo$V4a_3,
217+
V4a_4 = foo$V4a_4,
218+
V4a_5 = foo$V4a_5,
219+
V4_1 = c(2, NA, NA, NA, 3),
220+
V4_2 = c(2, NA, NA, NA, 3),
221+
V4_3 = c(2, NA, NA, NA, 3),
222+
V4_4 = c(1, NA, NA, NA, 3),
223+
V4_5 = c(2, NA, NA, NA, 3)
224+
)
225+
226+
expect_equal(bodge_v4_translation(foo),
227+
expected)
228+
229+
# Ensure functioning on earlier waves before V4a happened
230+
foo <- tibble(
231+
UserLanguage = "EN",
232+
V4_1 = c(2, NA, NA, NA, 3),
233+
V4_2 = c(2, NA, 4, NA, 3),
234+
V4_3 = c(2, NA, NA, NA, 3),
235+
V4_4 = c(1, NA, NA, NA, 3),
236+
V4_5 = c(2, NA, NA, NA, 3)
237+
)
238+
239+
expect_equal(bodge_v4_translation(foo), foo)
173240
})
174241

175242

@@ -182,7 +249,7 @@ test_that("C6/8 bodge works correctly", {
182249
C8_2 = c(1, 2, 3, 4),
183250
C8_3 = c(1, 2, 3, 4)
184251
)
185-
252+
186253
expect_equal(bodge_C6_C8(input),
187254
input)
188255

sir_complainsalot/delphi_sir_complainsalot/check_source.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,17 @@ def check_source(data_source, meta, params, grace, logger):
119119

120120
gap_days = [(day - prev_day).days
121121
for day, prev_day in zip(unique_dates[1:], unique_dates[:-1])]
122+
123+
# If we only have a single day of data available then gap days will be
124+
# empty.
125+
if not gap_days:
126+
logger.info(
127+
"Not enough data to calculate gap days.",
128+
data_source=data_source,
129+
signal=row["signal"],
130+
geo_type=row["geo_type"])
131+
continue
132+
122133
gap = max(gap_days) - 1
123134
logger.info("Detecting days with data present",
124135
data_source = data_source,

sir_complainsalot/params.json.template

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
"fb-survey": {
4040
"max_age": 3,
4141
"maintainers": ["U01069KCRS7"],
42-
"retired-signals": ["smoothed_wearing_mask"]
42+
"retired-signals": ["smoothed_wearing_mask", "smoothed_wwearing_mask"]
4343
},
4444
"indicator-combination": {
4545
"max_age": 3,

0 commit comments

Comments
 (0)