-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRcodes_NimbleGDR.R
474 lines (335 loc) · 10.9 KB
/
Rcodes_NimbleGDR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
# ---- R codes associated with NIMBLE workshop for GDR Ecostat 2022
## Olivier Gimenez, Maud Quéroué, Valentin Lauret
# slides at https://oliviergimenez.github.io/nimble-workshop/
# ---- load packages ----
library(tidyverse)
library(nimble)
theme_set(theme_light())
# ---- Let's start with a Capture Recapture Example ----
## we capture, mark and release n=57 animals
## we recapture y=19 animals alive
y <- 19 # nb of success
n <- 57 # nb of attempts
# ---- 1. Build model ----
## Specify binomial likelihood and uniform prior on survival probability theta
model <- nimbleCode({
# likelihood
survived ~ dbinom(theta, released)
# prior
theta ~ dunif(0, 1)
# derived quantity
lifespan <- -1/log(theta) # expected lifespan
# w/ constant survival
})
## check the model R object
model
# ---- 2. Read in data ----
## Constants are values that do not change
my.constants <- list(released = 57)
## Data are values that change, i.e. left of a ~
my.data <- list(survived = 19)
# ---- 3. Specify parameters ----
parameters.to.save <- c("theta", "lifespan")
# ---- 4. Initial values ----
init1 <- list(theta = 0.1)
init2 <- list(theta = 0.5)
initial.values <- list(init1, init2)
initial.values
## Alternative intitalization
initial.values <- function() list(theta = runif(1,0,1))
initial.values()
# ---- 5. Provide MCMC details ----
n.iter <- 5000
n.burnin <- 1000
n.chains <- 2
# ---- RUN NIMBLE ----
## Do not focus on the warnings for now
# Running time : +/- 30 sec
mcmc.output <- nimbleMCMC(code = model,
data = my.data,
constants = my.constants,
inits = initial.values,
monitors = parameters.to.save,
niter = n.iter,
nburnin = n.burnin,
nchains = n.chains)
# ---- INSPECT MCMC OUTPUT ----
str(mcmc.output)
## Dimensions
dim(mcmc.output$chain1)
## Return values
head(mcmc.output$chain1)
## Compute posterior summaries
mean(mcmc.output$chain1[,'theta'])
## 95% credible interval
quantile(mcmc.output$chain1[,'theta'], probs = c(2.5, 97.5)/100)
## Visualize posterior distribution with a histogram
mcmc.output %>%
as_tibble() %>%
ggplot() +
geom_histogram(aes(x = chain1[,"theta"]), color = "white") +
labs(x = "survival probability")
# ---- MCMC VISUALIZATION ----
## load package
library(MCMCvis)
## Numerical summaries
MCMCsummary(object = mcmc.output, round = 2)
## Caterpillar plot for posterior distribution
MCMCplot(object = mcmc.output, params = 'theta')
## Trace and posterior density
MCMCtrace(object = mcmc.output,
pdf = FALSE, # no export to PDF
ind = TRUE, # separate density lines per chain
params = "theta")
## Add convergence diagnostic
MCMCtrace(object = mcmc.output,
pdf = FALSE,
ind = TRUE,
Rhat = TRUE, # add Rhat
n.eff = TRUE, # add eff sample size
params = "theta")
# ---- DERIVED QUANTIITES -----
## pool theta samples between the three chains
theta_samples <- c(mcmc.output$chain1[,'theta'],
mcmc.output$chain2[,'theta'],
mcmc.output$chain3[,'theta'])
## calculate lifespan from all theta samples
lifespan <- -1/log(theta_samples)
## obtain numerical summaries
mean(lifespan)
quantile(lifespan, probs = c(2.5, 97.5)/100)
## visualize lifespan posterior distribution
lifespan %>%
as_tibble() %>%
ggplot() +
geom_histogram(aes(x = value), color = "white") +
labs(x = "lifespan")
# ---- Programming NIMBLE FUNCTIONS ----
## `run` section gives the function to be executed.
## `theta = double(0)` and `returnType(double(0))` arguments tell that input and output
## are single numeric values (scalars).
## double(1) and double(2) would mean vectors and matrices
## write a function to compute lifespan = - 1/log(theta) as in the CR example
computeLifespan <- nimbleFunction(
run = function(theta = double(0)) { # type declarations
ans <- -1/log(theta)
return(ans)
returnType(double(0)) # return type declaration
} )
## try the function
computeLifespan(0.8)
## compile in C++
CcomputeLifespan <- compileNimble(computeLifespan)
CcomputeLifespan(0.8)
## Use Nimble function in a model
model <- nimbleCode({
# likelihood
survived ~ dbinom(theta, released)
# prior
theta ~ dunif(0, 1)
# derived quantity
lifespan <- computeLifespan(theta)
})
## The rest of the workflow remain the same
my.data <- list(survived = 19, released = 57)
parameters.to.save <- c("theta", "lifespan")
initial.values <- function() list(theta = runif(1,0,1))
n.iter <- 5000
n.burnin <- 1000
n.chains <- 2
mcmc.output <- nimbleMCMC(code = model,
data = my.data,
inits = initial.values,
monitors = parameters.to.save,
niter = n.iter,
nburnin = n.burnin,
nchains = n.chains)
MCMCsummary(object = mcmc.output, round = 2)
# ---- Programming: CALLING R/C++ FUNCTIONS ----
## an example R function
myfunction <- function(x) {
-1/log(x)
}
## to use the function in Nimble
## use nimbleExternallCall() for a C/C++ function
Rmyfunction <-
nimbleRcall(prototype = function(x = double(0)){},
Rfun = 'myfunction',
returnType = double(0))
## call the function in a model
model <- nimbleCode({
# likelihood
survived ~ dbinom(theta, released)
# prior
theta ~ dunif(0, 1)
lifespan <- Rmyfunction(theta)
})
## The rest of the workflow remain the same
my.data <- list(survived = 19, released = 57)
parameters.to.save <- c("theta", "lifespan")
initial.values <- function() list(theta = runif(1,0,1))
n.iter <- 5000
n.burnin <- 1000
n.chains <- 2
mcmc.output <- nimbleMCMC(code = model,
data = my.data,
inits = initial.values,
monitors = parameters.to.save,
niter = n.iter,
nburnin = n.burnin,
nchains = n.chains)
MCMCsummary(object = mcmc.output, round = 2)
# ---- USER DEFINED DISTRIBUTIONS ----
## write a binomial distribution
# density
dmybinom <- nimbleFunction(
run = function(x = double(0),
size = double(0),
prob = double(0),
log = integer(0, default = 1)) {
returnType(double(0))
# compute binomial coefficient
lchoose <- lfactorial(size) - lfactorial(x) - lfactorial(size - x)
# binomial density function
logProb <- lchoose + x * log(prob) + (size - x) * log(1 - prob)
if(log) return(logProb)
else return(exp(logProb))
})
# simulation using the coin flip method (p. 524 in Devroye 1986)
rmybinom <- nimbleFunction(
run = function(n = integer(0, default = 1),
size = double(0),
prob = double(0)) {
returnType(double(0))
x <- 0
y <- runif(n = size, min = 0, max = 1)
for (j in 1:size){
if (y[j] < prob){
x <- x + 1
}else{
x <- x
}
}
return(x)
})
# define the nimbleFunctions in R global environment
assign('dmybinom', dmybinom, .GlobalEnv)
assign('rmybinom', rmybinom, .GlobalEnv)
## try your function
rmybinom(n = 1, size = 5, prob = 0.1)
rmybinom(n = 1, size = 5, prob = 0.8)
## usual workflow
model <- nimbleCode({
# likelihood
survived ~ dmybinom(prob = theta, size = released)
# prior
theta ~ dunif(0, 1)
})
mcmc.output <- nimbleMCMC(code = model,
data = my.data,
inits = initial.values,
niter = n.iter,
nburnin = n.burnin,
nchains = n.chains)
MCMCsummary(mcmc.output)
# ---- DETAILED NIMBLE WORKFLOW ----
## As before: model code, read in data and pick initial values
model <- nimbleCode({
# likelihood
survived ~ dbinom(theta, released)
# prior
theta ~ dunif(0, 1)
# derived quantity
lifespan <- -1/log(theta)
})
my.data <- list(survived = 19, released = 57)
initial.values <- list(theta = 0.5)
## 1. Create the model as an R object
survival <- nimbleModel(code = model,
data = my.data,
inits = initial.values)
## Look at the values stored at each node
survival$getNodeNames()
survival$theta
survival$survived
survival$lifespan
# this is
-1/log(0.5)
## calculate the log-likelihood at the initial value for theta
survival$calculate()
# this is
dbinom(x = 19, size = 57, prob = 0.5, log = TRUE)
## 2. Compile Nimble
# generate C++ code that can be used in R
Csurvival <- compileNimble(survival)
Csurvival$theta
## Performing Maximum Likelihood Estimation with NIMBLE
# using either `survival` or `Csurvival`
# function for negative log-likelihood to minimize
f <- function(par) {
Csurvival[['theta']] <- par # assign par to theta
ll <- Csurvival$calculate() # update log-likelihood with par value
return(-ll) # return negative log-likelihood
}
# evaluate function at 0.5 and 0.9
f(0.5)
f(0.9)
## MLE
# minimize function
out <- optimize(f, interval = c(0,1))
round(out$minimum, 2)
## 3. MCMC configuration
survivalConf <- configureMCMC(survival)
# add a parameter to monitor
survivalConf$addMonitors(c("lifespan"))
survivalConf
## 4. Create MCMC function
# build MCMC function
survivalMCMC <- buildMCMC(survivalConf)
# compile MCMC function
CsurvivalMCMC <- compileNimble(survivalMCMC, project = survival)
## 5. Run Nimble
n.iter <- 5000
n.burnin <- 1000
# use runMCMC()
samples <- runMCMC(mcmc = CsurvivalMCMC,
niter = n.iter,
nburnin = n.burnin)
head(samples)
## obtain numerical summaries
samplesSummary(samples)
# ---- MANAGE MCMC SAMPLERS ----
## print samplers
# survivalConf <- configureMCMC(survival)
survivalConf$printSamplers()
# to see Nimble available samplers
?samplers
## change samplers
# removing default sampler
survivalConf$removeSamplers(c('theta'))
survivalConf$printSamplers()
# change it for a slice sampler
survivalConf$addSampler(target = c('theta'),
type = 'slice')
survivalConf$printSamplers()
## resume with the Nimble workflow
# build and compile MCMC
survivalMCMC2 <- buildMCMC(survivalConf)
CsurvivalMCMC2 <- compileNimble(survivalMCMC2,
project = survival,
resetFunctions = TRUE)
# run NIMBLE
samples2 <- runMCMC(mcmc = CsurvivalMCMC2,
niter = n.iter,
nburnin = n.burnin)
# obtain numerical summaries:
samplesSummary(samples2)
# ---- TIPS AND TRICKS ----
## Updating MCMC chains
niter_ad <- 6000
CsurvivalMCMC$run(niter_ad, reset = FALSE)
# extract previous MCMC samples augmented with new samples
more_samples <- as.matrix(CsurvivalMCMC$mvSamples)
samplesSummary(more_samples)
# 4000 first iterations + 6000 added iterations
dim(more_samples)