|
| 1 | +# Tianchen Qian |
| 2 | +# 2019/2/5 |
| 3 | + |
| 4 | +# This code is to test the day-shifting part for primary hypothesis 2 |
| 5 | + |
| 6 | + |
| 7 | +find_change_location <- function(v){ |
| 8 | + n <- length(v) |
| 9 | + if (n <= 1) { |
| 10 | + stop("The vector need to have length > 1.") |
| 11 | + } |
| 12 | + return(c(1, 1 + which(v[1:(n-1)] != v[2:n]))) |
| 13 | +} |
| 14 | + |
| 15 | + |
| 16 | +shift_day <- function( |
| 17 | + dta, |
| 18 | + id_var = "userid", |
| 19 | + day_var = "Day", |
| 20 | + trt_var = "A", |
| 21 | + survey_completion_var, |
| 22 | + outcome_var = "Y", |
| 23 | + avail_var = NULL |
| 24 | +) { |
| 25 | + # make sure survey_completion_var is binary |
| 26 | + stopifnot(all(dta[, survey_completion_var] %in% c(0, 1))) |
| 27 | + |
| 28 | + # make sure dta is sorted by id_var then day_var |
| 29 | + dta <- dta[order(dta[, id_var], dta[, day_var]), ] |
| 30 | + |
| 31 | + # create new_dta with shifted outcome (for day t, the outcome is Y_{t+1}) |
| 32 | + new_dta <- dta |
| 33 | + new_dta$survey_completion_internal <- new_dta[, survey_completion_var] |
| 34 | + new_dta[1:(nrow(new_dta)-1), outcome_var] <- dta[2:nrow(dta), outcome_var] |
| 35 | + |
| 36 | + # remove the last observation for each individual (since there is no Y_{t+1} for the last day) |
| 37 | + person_first_index <- find_change_location(dta[, id_var]) |
| 38 | + if (length(person_first_index) != length(unique(dta[, id_var]))) { |
| 39 | + stop("The length of person_first_index doesn't equal the number of unique id_var's.") |
| 40 | + } |
| 41 | + person_last_index <- c(person_first_index[-1] - 1, nrow(dta)) |
| 42 | + new_dta <- new_dta[-person_last_index, ] |
| 43 | + |
| 44 | + # create avail_new, which is the product of both the original availability indicator |
| 45 | + # and the survey_completion_var |
| 46 | + if (is.null(avail_var)) { |
| 47 | + avail <- rep(1, nrow(new_dta)) |
| 48 | + } else { |
| 49 | + avail <- new_dta[, avail_var] |
| 50 | + } |
| 51 | + new_dta$avail_new <- avail * new_dta$survey_completion_internal |
| 52 | + |
| 53 | + return(new_dta) |
| 54 | +} |
| 55 | + |
| 56 | + |
| 57 | +user1 <- data.frame( |
| 58 | + userid = rep(1, 5), |
| 59 | + Day = 1:5, |
| 60 | + Y = c(0,1,1,0,1) |
| 61 | +) |
| 62 | + |
| 63 | +user2 <- data.frame( |
| 64 | + userid = rep(2, 3), |
| 65 | + Day = 1:3, |
| 66 | + Y = c(1,1,1) |
| 67 | +) |
| 68 | + |
| 69 | +user3 <- data.frame( |
| 70 | + userid = rep(3, 3), |
| 71 | + Day = 1:3, |
| 72 | + Y = c(0,0,0) |
| 73 | +) |
| 74 | + |
| 75 | +user4 <- data.frame( |
| 76 | + userid = rep(4, 4), |
| 77 | + Day = 1:4, |
| 78 | + Y = c(1,1,0,0) |
| 79 | +) |
| 80 | + |
| 81 | +dta <- rbind(user1, user2, user3, user4) |
| 82 | + |
| 83 | +new_dta <- shift_day(dta, survey_completion_var = "Y") |
| 84 | + |
| 85 | + |
| 86 | + |
| 87 | +print(dta) |
| 88 | + |
| 89 | +print(new_dta) |
| 90 | + |
| 91 | +# check manually that all the following tests are passed for new_dta: |
| 92 | +# (1) The last day of each individual is removed |
| 93 | +# (2) Y_t equals Y_{t+1} in dta |
| 94 | +# (3) avail_new is 1 only if the survey_completion_internal is 1 |
| 95 | +# (because the estimand is defined conditional on "completed survey on current day") |
0 commit comments