-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathDevelopment_SDH_minimal.R
175 lines (130 loc) · 4.46 KB
/
Development_SDH_minimal.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# Load libraries and data -------------------------------------------------
# General packages
pkgs <- c("survival", "mstate", "rms", "riskRegression")
vapply(pkgs, function(pkg) {
if (!require(pkg, character.only = TRUE)) install.packages(pkg)
require(pkg, character.only = TRUE, quietly = TRUE)
}, FUN.VALUE = logical(length = 1L))
# Load datasets
rdata <- readRDS("Data/rdata.rds")
# Expand data to prepare for fitting the model using mstate::crprep()
rdata.w <- crprep(
Tstop = "time",
status = "status_num",
trans = c(1, 2),
id = "id",
keep = c("age", "size", "ncat", "hr_status"),
data = rdata
)
# Save extended data with weights for recurrence (failcode=1)
primary_event <- 1 # breast cancer recurrence is the primary event
rdata.w1 <- rdata.w[rdata.w$failcode == primary_event, ]
# Cumulative incidence function ------------
mfit_rdata <- survfit(
Surv(Tstart, Tstop, status == 1) ~ 1,
data = rdata.w1, weights = weight.cens
)
par(xaxs = "i", yaxs = "i", las = 1)
plot(mfit_rdata,
col = 1, lwd = 2,
xlab = "Years since BC diagnosis",
ylab = "Cumulative incidence", bty = "n",
ylim = c(0, 0.25), xlim = c(0, 5), fun = "event", conf.int = TRUE
)
title("Development data")
# Annual cumulative incidences -------------------
smfit_rdata <- summary(mfit_rdata, times = c(1, 2, 3, 4, 5))
CIF <- cbind.data.frame(
"time" = smfit_rdata$time,
"CI" = 1 - smfit_rdata$surv,
"2.5 %" = 1 - smfit_rdata$upper,
"97.5 %" = 1 - smfit_rdata$lower
)
CIF
### Check non-linearity of continuous predictors ----------------
# Defining knots of the restricted cubic splines ------------------
# Extract knots position of the restricted cubic spline based on the
# original distribution of the data
# Age at breast cancer diagnosis
rcs3_age <- rcspline.eval(rdata$age, nk = 3)
attr(rcs3_age, "dim") <- NULL
attr(rcs3_age, "knots") <- NULL
pos_knots_age <- attributes(rcspline.eval(rdata$age, nk = 3))$knots
rdata$age3 <- rcs3_age
# Size of breast cancer
rcs3_size <- rcspline.eval(rdata$size, nk = 3)
attr(rcs3_size, "dim") <- NULL
attr(rcs3_size, "knots") <- NULL
pos_knots_size <- attributes(rcspline.eval(rdata$size, nk = 3))$knots
rdata$size3 <- rcs3_size
# FG model
dd <- datadist(rdata)
options(datadist = "dd")
fit_fg_rcs <- cph(Surv(Tstart, Tstop, status == 1) ~
rcs(age, pos_knots_age) + rcs(size, pos_knots_size) +
ncat + hr_status,
weights = weight.cens,
x = T,
y = T,
surv = T,
data = rdata.w1
)
P_fg_age_rcs <- Predict(fit_fg_rcs, "age")
P_fg_size_rcs <- Predict(fit_fg_rcs, "size")
plot(P_fg_age_rcs)
plot(P_fg_size_rcs)
options(datadist = NULL)
# Fit the Fine and Gray model assuming linear relation of
# the continuous predictors
dd <- datadist(rdata, adjto.cat = "first")
options(datadist = "dd")
fit_fg <- cph(Surv(Tstart, Tstop, status == 1) ~ age + size +
ncat + hr_status,
weights = weight.cens,
x = T,
y = T,
surv = T,
data = rdata.w1
)
options(datadist = NULL)
res_AIC <- c("Without splines" = AIC(fit_fg),
"With splines" = AIC(fit_fg_rcs))
res_AIC
# The AIC and the graphical check suggested
# a potential linear relation
# between the continuous predictors (age and size)
# and the event of interest (breast cancer recurrence).
### Checking proportional subdistribution hazards assumption ---------
zp_fg <- cox.zph(fit_fg, transform = "identity")
par(las = 1, xaxs = "i", yaxs = "i")
# c(bottom, left, top, right)
oldpar <- par(mfrow = c(2, 2), mar = c(2, 2, 3, 2))
sub_title <- c("Age", "Size", "Lymph node status", "HR status")
for (i in 1:4) {
plot(zp_fg[i],
resid = F,
bty = "n",
xlim = c(0, 5))
abline(0, 0, lty = 3)
title(sub_title[i])
}
mtext("Fine and Gray",
side = 3,
line = -1,
outer = TRUE,
font = 2)
par(oldpar)
zp_fg$table
# The statistical tests showed
# a potential violation of the proportional hazards assumption
# for size of breast cancer.
# For simplicity we ignore this violation in the remainder.
### Model development - fit the risk prediction models ---------------------
fit_fg <- coxph(Surv(Tstart, Tstop, status == 1) ~
age + size + ncat + hr_status,
weights = weight.cens,
x = T,
y = T,
data = rdata.w1
)
# NOTE: rms:cph() can also be used as before