Skip to content

Commit a7e762e

Browse files
authored
Merge pull request #418 from spsanderson/development
Fixes #417
2 parents eed9d67 + 81aeea4 commit a7e762e

File tree

3 files changed

+113
-0
lines changed

3 files changed

+113
-0
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ export(tidy_inverse_weibull)
6161
export(tidy_kurtosis_vec)
6262
export(tidy_logistic)
6363
export(tidy_lognormal)
64+
export(tidy_mcmc_sampling)
6465
export(tidy_mixture_density)
6566
export(tidy_multi_dist_autoplot)
6667
export(tidy_multi_single_dist)

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ None
77
1. Fix #405 - Add function `quantile_normalization()` to normalize data using quantiles.
88
2. Fix #409 - Add function `check_duplilcate_rows()` to check for duplicate rows in a data frame.
99
3. Fix #414 - Add function `util_chisquare_param_estimate()` to estimate the parameters of the chi-square distribution.
10+
4. Fix #417 - Add function `tidy_mcmc_sampling()` to sample from a distribution using MCMC.
11+
This outputs the function sampled data and a diagnostic plot.
1012

1113
## Minor Fixes and Improvements
1214
1. Fix #401 - Update `tidy_multi_single_dist()` to respect the `.return_tibble` parameter

R/utils-mcmc-sampling.R

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#' Tidy MCMC Sampling
2+
#'
3+
#' Perform MCMC sampling and return tidy data and a plot.
4+
#'
5+
#' @family Utility
6+
#'
7+
#' @author Steven P. Sanderson II, MPH
8+
#'
9+
#' @description
10+
#' This function performs Markov Chain Monte Carlo (MCMC) sampling on the input
11+
#' data and returns tidy data and a plot representing the results.
12+
#'
13+
#' @details
14+
#' The function takes a data vector as input and performs MCMC sampling with the
15+
#' specified number of simulations. It applies user-defined functions to each
16+
#' MCMC sample and to the cumulative MCMC samples. The resulting data is
17+
#' formatted in a tidy format, suitable for further analysis. Additionally, a
18+
#' plot is generated to visualize the MCMC samples and cumulative statistics.
19+
#'
20+
#' @param .x The data vector for MCMC sampling.
21+
#' @param .fns The function(s) to apply to each MCMC sample. Default is "mean".
22+
#' @param .cum_fns The function(s) to apply to the cumulative MCMC samples. Default is "cmean".
23+
#' @param .num_sims The number of simulations. Default is 2000.
24+
#'
25+
#' @return A list containing tidy data and a plot.
26+
#'
27+
#' @examples
28+
#' # Generate MCMC samples
29+
#' set.seed(123)
30+
#' data <- rnorm(100)
31+
#' result <- tidy_mcmc_sampling(data, "median", "cmedian", 500)
32+
#' result
33+
#'
34+
#' @rdname tidy_mcmc_sampling
35+
NULL
36+
37+
#' @name tidy_mcmc_sampling
38+
#' @export
39+
tidy_mcmc_sampling <- function(.x, .fns = "mean", .cum_fns = "cmean",
40+
.num_sims = 2000) {
41+
42+
# Error handling for data argument
43+
if (!is.vector(.x)) {
44+
rlang::abort(
45+
message = "Error: '.x' argument must be a vector.",
46+
use_cli_format = TRUE
47+
)
48+
}
49+
50+
# Error handling for function arguments
51+
if (!exists(.fns)) {
52+
rlang::abort(
53+
message = "Error: '.fns' argument must be a valid function name. Make sure
54+
any necessary libraries are loaded.",
55+
use_cli_format = TRUE
56+
)
57+
}
58+
59+
if (!exists(.cum_fns)) {
60+
rlang::abort(
61+
message = "Error: '.cum_fns' argument must be a valid function name. Make sure
62+
any necessary libraries are loaded.",
63+
use_cli_format = TRUE
64+
)
65+
}
66+
67+
# Validate number of simulations
68+
nsims <- ifelse(.num_sims > 0, as.integer(.num_sims), 1L)
69+
70+
fns <- match.fun(.fns)
71+
fns_name <- as.character(.fns)
72+
cum_fns <- match.fun(.cum_fns)
73+
cum_fns_name <- as.character(.cum_fns)
74+
nsims <- as.integer(.num_sims)
75+
fctr_lvl_nms <- c(
76+
paste0(".sample_", fns_name),
77+
paste0(".cum_stat_", cum_fns_name)
78+
)
79+
80+
df <- TidyDensity::tidy_bootstrap(.x = .x, .num_sims = nsims) |>
81+
dplyr::mutate(.sample = purrr::map(bootstrap_samples, \(x) fns(x))) |>
82+
dplyr::select(sim_number, .sample) |>
83+
tidyr::unnest(cols = .sample) |>
84+
dplyr::rename_with(~paste0(., "_", fns_name), .cols = .sample)
85+
86+
mcmc_data <- df |>
87+
dplyr::mutate(.cum_stat = cum_fns(df[[2]])) |>
88+
dplyr::rename_with(~paste0(., "_", cum_fns_name), .cols = .cum_stat) |>
89+
tidyr::pivot_longer(-sim_number) |>
90+
dplyr::mutate(name = factor(name, levels = fctr_lvl_nms))
91+
92+
plt <- mcmc_data |>
93+
ggplot2::ggplot(ggplot2::aes(x = as.numeric(sim_number), y = value)) +
94+
ggplot2::facet_wrap(~name, scales = "free") +
95+
ggplot2::geom_line() +
96+
ggplot2::labs(
97+
y = "Value",
98+
x = "Simulation Number",
99+
title = "MCMC Sampling"
100+
) +
101+
ggplot2::theme_minimal()
102+
103+
# Return
104+
mcmc_list <- list(
105+
mcmc_data = mcmc_data,
106+
plt = plt
107+
)
108+
109+
return(mcmc_list)
110+
}

0 commit comments

Comments
 (0)