Skip to content

Commit 07bda16

Browse files
authored
Merge pull request #484 from spsanderson/development
Fixes #470
2 parents 626eac0 + 8803be0 commit 07bda16

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+484
-34
lines changed

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ export(util_pareto_stats_tbl)
133133
export(util_poisson_aic)
134134
export(util_poisson_param_estimate)
135135
export(util_poisson_stats_tbl)
136+
export(util_rztnbinom_aic)
136137
export(util_t_stats_tbl)
137138
export(util_triangular_param_estimate)
138139
export(util_triangular_stats_tbl)
@@ -142,6 +143,7 @@ export(util_uniform_stats_tbl)
142143
export(util_weibull_aic)
143144
export(util_weibull_param_estimate)
144145
export(util_weibull_stats_tbl)
146+
export(util_ztn_binomial_param_estimate)
145147
importFrom(data.table,.SD)
146148
importFrom(data.table,as.data.table)
147149
importFrom(data.table,melt)

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ None
55

66
## New Features
77
1. #468 - Add function `util_negative_binomial_aic()` to calculate the AIC for the negative binomial distribution.
8+
2. #470 - Add function `util_ztn_binomial_param_estimate()` and `util_rztnbinom_aic()` to estimate the parameters and calculate the AIC for the zero-truncated negative binomial distribution.
89

910
## Minor Improvements and Fixes
1011
1. Fix #468 - Update `util_negative_binomial_param_estimate()` to add the use of

R/est-param-ztn-binmoial.R

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
#' Estimate Zero Truncated Negative Binomial Parameters
2+
#'
3+
#' @family Parameter Estimation
4+
#' @family Binomial
5+
#' @family Zero Truncated Negative Distribution
6+
#'
7+
#' @author Steven P. Sanderson II, MPH
8+
#'
9+
#' @details This function will attempt to estimate the zero truncated negative
10+
#' binomial size and prob parameters given some vector of values.
11+
#'
12+
#' @description The function will return a list output by default, and if the parameter
13+
#' `.auto_gen_empirical` is set to `TRUE` then the empirical data given to the
14+
#' parameter `.x` will be run through the `tidy_empirical()` function and combined
15+
#' with the estimated negative binomial data.
16+
#'
17+
#' One method of estimating the parameters is done via:
18+
#' - MLE via \code{\link[stats]{optim}} function.
19+
#'
20+
#' @param .x The vector of data to be passed to the function.
21+
#' @param .auto_gen_empirical This is a boolean value of TRUE/FALSE with default
22+
#' set to TRUE. This will automatically create the `tidy_empirical()` output
23+
#' for the `.x` parameter and use the `tidy_combine_distributions()`. The user
24+
#' can then plot out the data using `$combined_data_tbl` from the function output.
25+
#'
26+
#' @examples
27+
#' library(dplyr)
28+
#' library(ggplot2)
29+
#' library(actuar)
30+
#'
31+
#' x <- as.integer(mtcars$mpg)
32+
#' output <- util_ztn_binomial_param_estimate(x)
33+
#'
34+
#' output$parameter_tbl
35+
#'
36+
#' output$combined_data_tbl |>
37+
#' tidy_combined_autoplot()
38+
#'
39+
#' set.seed(123)
40+
#' t <- rztnbinom(100, 10, .1)
41+
#' util_ztn_binomial_param_estimate(t)$parameter_tbl
42+
#'
43+
#' @return
44+
#' A tibble/list
45+
#'
46+
#' @export
47+
#'
48+
49+
util_ztn_binomial_param_estimate <- function(.x, .auto_gen_empirical = TRUE) {
50+
51+
# Check if actuar library is installed
52+
if (!requireNamespace("actuar", quietly = TRUE)) {
53+
stop("The 'actuar' package is needed for this function. Please install it with: install.packages('actuar')")
54+
}
55+
56+
# Tidyeval ----
57+
x_term <- as.numeric(.x)
58+
sum_x <- sum(x_term, na.rm = TRUE)
59+
minx <- min(x_term)
60+
maxx <- max(x_term)
61+
m <- mean(x_term, na.rm = TRUE)
62+
n <- length(x_term)
63+
64+
# Negative log-likelihood function for optimization
65+
nll_func <- function(params) {
66+
size <- params[1]
67+
prob <- params[2]
68+
-sum(actuar::dztnbinom(x_term, size = size, prob = prob, log = TRUE))
69+
}
70+
71+
# Initial parameter guesses
72+
initial_params <- c(size = 1, prob = 0.5) # Adjust based on your data
73+
74+
# Optimization using optim()
75+
optim_result <- optim(initial_params, nll_func) |>
76+
suppressWarnings()
77+
78+
# Extract estimated parameters
79+
mle_size <- optim_result$par[1]
80+
mle_prob <- optim_result$par[2]
81+
82+
# Create output tibble
83+
ret <- tibble::tibble(
84+
dist_type = "Zero-Truncated Negative Binomial",
85+
samp_size = n,
86+
min = minx,
87+
max = maxx,
88+
mean = m,
89+
method = "MLE_Optim",
90+
size = mle_size,
91+
prob = mle_prob
92+
)
93+
94+
# Attach attributes
95+
attr(ret, "tibble_type") <- "parameter_estimation"
96+
attr(ret, "family") <- "zero_truncated_negative_binomial"
97+
attr(ret, "x_term") <- .x
98+
attr(ret, "n") <- n
99+
100+
if (.auto_gen_empirical) {
101+
# Generate empirical data
102+
# Assuming tidy_empirical and tidy_combine_distributions functions exist
103+
te <- tidy_empirical(.x = x_term)
104+
td <- tidy_zero_truncated_negative_binomial(
105+
.n = n,
106+
.size = round(mle_size, 3),
107+
.prob = round(mle_prob, 3)
108+
)
109+
combined_tbl <- tidy_combine_distributions(te, td)
110+
111+
output <- list(
112+
combined_data_tbl = combined_tbl,
113+
parameter_tbl = ret
114+
)
115+
} else {
116+
output <- list(
117+
parameter_tbl = ret
118+
)
119+
}
120+
121+
return(output)
122+
}

R/utils-aic-ztn-binomial.R

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#' Calculate Akaike Information Criterion (AIC) for Zero-Truncated Negative Binomial Distribution
2+
#'
3+
#' This function calculates the Akaike Information Criterion (AIC) for a
4+
#' zero-truncated negative binomial (ZTNB) distribution fitted to the provided data.
5+
#'
6+
#' @family Utility
7+
#' @author Steven P. Sanderson II, MPH
8+
#'
9+
#' @description
10+
#' This function estimates the parameters (`size` and `prob`) of a ZTNB
11+
#' distribution from the provided data using maximum likelihood estimation
12+
#' (via the `optim()` function), and then calculates the AIC value based on the
13+
#' fitted distribution.
14+
#'
15+
#' @param .x A numeric vector containing the data (non-zero counts) to be
16+
#' fitted to a ZTNB distribution.
17+
#'
18+
#' @details
19+
#' **Initial parameter estimates:** The choice of initial values for `size`
20+
#' and `prob` can impact the convergence of the optimization. Consider using
21+
#' prior knowledge or method of moments estimates to obtain reasonable starting
22+
#' values.
23+
#'
24+
#' **Optimization method:** The default optimization method used is
25+
#' "Nelder-Mead". You might explore other optimization methods available in
26+
#' `optim()` for potentially better performance or different constraint
27+
#' requirements.
28+
#'
29+
#' **Data requirements:** The input data `.x` should consist of non-zero counts,
30+
#' as the ZTNB distribution does not include zero values.
31+
#'
32+
#' **Goodness-of-fit:** While AIC is a useful metric for model comparison, it's
33+
#' recommended to also assess the goodness-of-fit of the chosen ZTNB model using
34+
#' visualization (e.g., probability plots, histograms) and other statistical
35+
#' tests (e.g., chi-square goodness-of-fit test) to ensure it adequately
36+
#' describes the data.
37+
#'
38+
#' @examples
39+
#' library(actuar)
40+
#'
41+
#' # Example data
42+
#' set.seed(123)
43+
#' x <- rztnbinom(30, size = 2, prob = 0.4)
44+
#'
45+
#' # Calculate AIC
46+
#' util_rztnbinom_aic(x)
47+
#'
48+
#' @return The AIC value calculated based on the fitted ZTNB distribution to
49+
#' the provided data.
50+
#'
51+
#' @name util_rztnbinom_aic
52+
NULL
53+
54+
#' @export
55+
#' @rdname util_rztnbinom_aic
56+
57+
util_rztnbinom_aic <- function(.x) {
58+
# Check if actuar library is installed
59+
if (!requireNamespace("actuar", quietly = TRUE)) {
60+
stop("The 'actuar' package is needed for this function. Please install it with: install.packages('actuar')")
61+
}
62+
63+
# Tidyeval
64+
x <- as.numeric(.x)
65+
66+
# Get parameters
67+
pe <- util_ztn_binomial_param_estimate(x)$parameter_tbl
68+
69+
# Negative log-likelihood function for zero-truncated negative binomial distribution
70+
neg_log_lik_rztnbinom <- function(par, data) {
71+
size <- par[1]
72+
prob <- par[2]
73+
-sum(actuar::dztnbinom(data, size = size, prob = prob, log = TRUE))
74+
}
75+
76+
# Fit zero-truncated negative binomial distribution to data
77+
fit_rztnbinom <- optim(
78+
par = c(size = round(pe$size, 3), prob = round(pe$prob, 3)),
79+
fn = neg_log_lik_rztnbinom,
80+
data = x
81+
) |>
82+
suppressWarnings()
83+
84+
# Extract log-likelihood and number of parameters
85+
logLik_rztnbinom <- -fit_rztnbinom$value
86+
k_rztnbinom <- 2 # Number of parameters (size and prob)
87+
88+
# Calculate AIC
89+
AIC_rztnbinom <- 2 * k_rztnbinom - 2 * logLik_rztnbinom
90+
91+
# Return AIC value
92+
return(AIC_rztnbinom)
93+
}

man/check_duplicate_rows.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/convert_to_ts.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/quantile_normalize.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/tidy_binomial.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/tidy_mcmc_sampling.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/tidy_negative_binomial.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/tidy_zero_truncated_binomial.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/tidy_zero_truncated_negative_binomial.Rd

Lines changed: 5 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/util_bernoulli_param_estimate.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/util_beta_aic.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/util_beta_param_estimate.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/util_binomial_aic.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/util_binomial_param_estimate.Rd

Lines changed: 4 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/util_binomial_stats_tbl.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/util_burr_param_estimate.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/util_cauchy_aic.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)