-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsim.R
161 lines (161 loc) · 7.31 KB
/
sim.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
sim <- function(n = 30, tmax = 30, M = 1000,
## response regression models
y.formula = list(w = y ~ I(a - pn) * (base + state),
u = y ~ a * (base + state)),
## names for each regression model
y.names = c(w = "Weighted and centered",
u = "GEE AR(1)"),
## labels for regression terms of the treatment effect
y.label = list(w = "I(a - pn)"),
## names of the treatment probability models or variables used
## for the weight numerator ('wn') or denominator ('wd') and
## arguments for the estimation routine
y.args = list(w = list(wn = "pn", wd = "pd"),
u = list(corstr = "ar1")),
## treatment probability models named in 'y.args'
a.formula = list(pn = a ~ lag1a,
pd = a ~ lag1a + state),
## names for each treatment probability model
a.names = c(pn = "Last treatment",
pd = "Last treatment and current state"),
## proximal (0) or delayed (1) treatment effect?
lag = 0,
## print generative and analysis model details
verbose = TRUE,
## control parameters for 'rsnmm'
control, ...) {
control <- if (missing(control)) rsnmm.control(...)
else control <- do.call("rsnmm.control", control)
## times to use in the model fit
runin.fita <- control$lag
runin.fity <- control$lag + lag
## retrieve causal control parameter values
## nb: if the regression models 'y.formula' average over an underlying
## moderator these will not represent the true causal effect unless this
## moderator has conditional mean zero
y.coef <- mapply(which.terms, x = y.formula, label = y.label,
stripnames = TRUE, SIMPLIFY = FALSE)
truth <- control[[paste0("beta", lag)]]
truth <- truth[Reduce("intersect", lapply(y.coef, names))]
y.coef <- lapply(y.coef, function(x) x[names(truth)])
## corresponding treatment probability models
## nb: we avoid delayed evaluation in 'y.args' (e.g. passing a 'weights'
## argument directly) to avoid scoping issues in 'foreach'
if (!is.null(a.formula)) {
y.prob <- lapply(y.args, function(x) do.call("c", x[c("wn", "wd")]))
y.prob <- lapply(y.prob, function(x) x[x %in% names(a.formula)])
}
else y.prob <- lapply(y.formula, function(x) list())
## print generative and analysis model properties
if (verbose) {
cat("\nGenerative model attributes\n\n")
print(control)
cat("Analysis models\n\n")
mapply(function(f, nm) write.table(cbind(" ", nm, ": y ~ ",
as.character(f)[3]), sep = "",
row.names = FALSE, col.names = FALSE,
quote = FALSE, eol = "\n\n"),
f = y.formula, nm = y.names)
cat("Treatment probability models\n\n")
mapply(function(f, nm) write.table(cbind(" ", nm, ": a ~ ",
as.character(f)[3]), sep = "",
row.names = FALSE, col.names = FALSE,
quote = FALSE, eol = "\n\n"),
f = a.formula, nm = a.names)
}
## general model fitter
## nb: d is the data frame for the replicate
fitter <- function(formula, args, prob, coef, label, response = "y",
addvar = NULL) {
if (response == "a") {
args$family <- binomial()
runin <- runin.fita
}
else runin <- runin.fity
r <- which(d$time >= runin)
l <- list(x = model.matrix(formula, data = d[r, ]), y = d[r, response])
if (is.null(args$wn) & is.null(args$wn)) l$w <- rep(1, nrow(d))
else {
l$w <- ifelse(d[, "a"] == 1, d[, args$wn] / d[, args$wd],
(1 - d[, args$wn]) / (1 - d[, args$wd]))
args[c("wn", "wd")] <- NULL
}
l$w <- l$w * d$avail
if (lag) l$w <- delay(d$id, d$time, l$w, lag)
l$w <- l$w[r]
if (!is.null(args$corstr)) {
fun <- "geese.glm"
l$id <- d$id[r]
}
else if (!is.null(args$family)) fun <- "glm.fit"
else fun <- "lm.wfit"
fit <- do.call(fun, c(l, args))
if (!inherits(fit, "geeglm")) {
fit <- glm2gee(fit, d$id[r])
fit$terms <- terms(formula)
fit$geese$X <- l$x
fit$y <- l$y
}
if (!is.null(addvar)) {
newvar <- paste0(c("", "lag1"), addvar)
d[, newvar] <<- NA
d[r, newvar[1]] <<- fit$fitted.values
d[, newvar[2]] <<- delay(d$id, d$time, d[, newvar[1]])
}
else {
## usual variance sandwich estimator
fit$vcov <- vcov.geeglm(fit)
est <- estimate(fit)[coef, 1:4, drop = FALSE]
## correction for any estimates in weights
l <- if (length(prob)) setNames(fita[prob], gsub("^w", "p", names(prob)))
else NULL
fit$vcov <- NULL
fit$vcov <- do.call("vcov.geeglm", c(list(x = fit, label = label), l))
estc <- estimate(fit)[coef, 1:4, drop = FALSE]
fit <- data.frame(moderator = names(coef), truth = truth,
est = est[, "Estimate"], se = est[, "SE"],
lcl = est[, "95% LCL"], ucl = est[, "95% UCL"],
sec = estc[, "SE"], lclc = estc[, "95% LCL"],
uclc = estc[, "95% UCL"], row.names = NULL)
}
fit
}
fita <- list()
## for each replicate...
out <- foreach(m = 1:M, .combine = "rbind") %dopar% {
## ... generate data
d <- rsnmm(n, tmax, control = control)
d$pn <- d$pd <- d$prob
## ... fit treatment probability models
if (!is.null(a.formula))
fita <- mapply(fitter, formula = a.formula, addvar = names(a.formula),
MoreArgs = list(args = list(), prob = list(),
coef = list(), label = list(),
response = "a"), SIMPLIFY = FALSE)
## ... fit response models
fity <- mapply(fitter, formula = y.formula, args = y.args, prob = y.prob,
coef = y.coef, label = y.label, SIMPLIFY = FALSE)
fity <- mapply(function(nm, d) data.frame(iter = m, method = nm, d,
row.names = NULL),
nm = y.names[names(fity)], d = fity, SIMPLIFY = FALSE)
out <- do.call("rbind", setNames(fity, NULL))
}
out <- data.frame(n, tmax, out)
## 95% CI coverage probability using uncorrected SEs
out$cp <- with(out, lcl <= truth & truth <= ucl)
## coverage probability using SEs corrected for estimates in weights
out$cpc <- with(out, lclc <= truth & truth <= uclc)
## root MSE
out$rmse <- with(out, (est - truth)^2)
## mean and SD estimate, number of replicates
out <- cbind(aggregate(cbind(est, se, sec, cp, cpc, rmse) ~
method + moderator + truth + n + tmax,
data = out, FUN = mean),
sd = aggregate(est ~ method + moderator + truth + n + tmax,
data = out, FUN = sd)$est,
iter = aggregate(iter ~ method + moderator + truth + n + tmax,
data = out,
FUN = function(x) length(unique(x)))$iter)
out$rmse <- sqrt(out$rmse)
out
}