Skip to content

Commit c7f0198

Browse files
committed
dedicated lm function
1 parent 5535f88 commit c7f0198

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+670
-720
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ Package: capybara
22
Type: Package
33
Title: Fast and Memory Efficient Fitting of Linear Models With High-Dimensional
44
Fixed Effects
5-
Version: 0.7.0
5+
Version: 0.8.0
66
Authors@R: c(
77
person(
88
given = "Mauricio",

NEWS.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# capybara 0.8.0
2+
3+
* Dedicated functions for linear models to avoid the overhead of running
4+
the GLM function with a Gaussian link.
5+
16
# capybara 0.7.0
27

38
* The predict method now allows to pass new data to predict the outcome.

R/autoplot.r renamed to R/autoplot.R

+62-3
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ autoplot.feglm <- function(object, ...) {
5656
}
5757

5858
# stop if the object is not of class feglm or felm
59-
if (!inherits(object, "feglm") && !inherits(object, "felm")) {
60-
stop("The object must be of class 'feglm' or 'felm'")
59+
if (!inherits(object, "feglm")) {
60+
stop("The object must be of class 'feglm'")
6161
}
6262

6363
# if conf_level is not provided, set it to 0.95
@@ -137,5 +137,64 @@ autoplot.feglm <- function(object, ...) {
137137
#'
138138
#' @export
139139
autoplot.felm <- function(object, ...) {
140-
autoplot.feglm(object, ...)
140+
# stop if ggplot2 is not installed
141+
if (!requireNamespace("ggplot2", quietly = TRUE)) {
142+
stop("The 'ggplot2' package is required to use this function")
143+
}
144+
145+
# stop if the object is not of class feglm or felm
146+
if (!inherits(object, "felm")) {
147+
stop("The object must be of class 'felm'")
148+
}
149+
150+
# if conf_level is not provided, set it to 0.95
151+
if (!"conf_level" %in% names(list(...))) {
152+
conf_level <- 0.95
153+
} else {
154+
conf_level <- list(...)$conf_level
155+
}
156+
157+
# check that conf_level is between 0 and 1
158+
if (conf_level <= 0 || conf_level >= 1) {
159+
stop("The confidence level must be between 0 and 1")
160+
}
161+
162+
# Extract the summary of the felm object
163+
res <- summary(object)$cm
164+
colnames(res) <- c("estimate", "std.error", "statistic", "p.value")
165+
166+
# Calculate the critical value for the specified confidence conf_level
167+
alpha <- 1 - conf_level
168+
z <- qnorm(1 - alpha / 2)
169+
170+
# Compute the confidence intervals
171+
conf_data <- data.frame(
172+
term = rownames(res),
173+
estimate = res[, "estimate"],
174+
conf_low = res[, "estimate"] - z * res[, "std.error"],
175+
conf_high = res[, "estimate"] + z * res[, "std.error"]
176+
)
177+
178+
p <- ggplot(conf_data, aes(x = !!sym("term"), y = !!sym("estimate"))) +
179+
geom_errorbar(
180+
aes(
181+
ymin = !!sym("conf_low"),
182+
ymax = !!sym("conf_high")
183+
),
184+
width = 0.1,
185+
color = "#165976"
186+
) +
187+
geom_point() +
188+
labs(
189+
title = sprintf(
190+
"Coefficient Estimates with Confidence Intervals at %s%%",
191+
round(conf_level * 100, 0)
192+
),
193+
x = "Term",
194+
y = "Estimate"
195+
) +
196+
theme_minimal() +
197+
coord_flip()
198+
199+
p
141200
}

R/cpp11.R

+4
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,7 @@ feglm_fit_ <- function(beta_r, eta_r, y_r, x_r, wt_r, theta, family, control, k_
3131
feglm_offset_fit_ <- function(eta_r, y_r, offset_r, wt_r, family, control, k_list) {
3232
.Call(`_capybara_feglm_offset_fit_`, eta_r, y_r, offset_r, wt_r, family, control, k_list)
3333
}
34+
35+
felm_fit_ <- function(y_r, x_r, wt_r, control, k_list) {
36+
.Call(`_capybara_felm_fit_`, y_r, x_r, wt_r, control, k_list)
37+
}

R/feglm_helpers.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ get_index_list_ <- function(k_vars, data) {
511511
#' @description Computes the score matrix
512512
#' @param object Result list
513513
#' @noRd
514-
get_score_matrix_ <- function(object) {
514+
get_score_matrix_feglm_ <- function(object) {
515515
# Extract required quantities from result list
516516
control <- object[["control"]]
517517
data <- object[["data"]]

R/felm.R

+84-18
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#' srr_stats
2-
#' @srrstats {G1.0} Implements a wrapper around `feglm` for linear models with high-dimensional fixed effects.
2+
#' @srrstats {G1.0} Implements linear models with high-dimensional fixed effects.
33
#' @srrstats {G2.1a} Ensures the input `formula` is correctly specified and includes fixed effects.
44
#' @srrstats {G2.1b} Validates that the input `data` is non-empty and of class `data.frame`.
55
#' @srrstats {G2.3a} Uses structured checks for parameters like `weights` and starting values.
@@ -98,21 +98,87 @@ NULL
9898
#' summary(mod)
9999
#'
100100
#' @export
101-
felm <- function(formula = NULL, data = NULL, weights = NULL) {
102-
# Use 'feglm' to estimate the model
103-
# Using felm_fit_ directly leads to the incorrect yhat = Xb
104-
# we need iteratively reweighted least squares
105-
reslist <- feglm(
106-
formula = formula, data = data, weights = weights, family = gaussian()
107-
)
108-
109-
names(reslist)[which(names(reslist) == "eta")] <- "fitted.values"
110-
111-
reslist[["conv"]] <- NULL
112-
reslist[["iter"]] <- NULL
113-
reslist[["family"]] <- NULL
114-
reslist[["deviance"]] <- NULL
115-
116-
# Return result list
117-
structure(reslist, class = "felm")
101+
felm <- function(formula = NULL, data = NULL, weights = NULL, control = NULL) {
102+
# Check validity of formula ----
103+
check_formula_(formula)
104+
105+
# Check validity of data ----
106+
check_data_(data)
107+
108+
# Check validity of control + Extract control list ----
109+
control <- check_control_(control)
110+
111+
# Update formula and do further validity check ----
112+
formula <- update_formula_(formula)
113+
114+
# Generate model.frame
115+
lhs <- NA # just to avoid global variable warning
116+
nobs_na <- NA
117+
nobs_full <- NA
118+
model_frame_(data, formula, weights)
119+
120+
# Get names of the fixed effects variables and sort ----
121+
k_vars <- attr(terms(formula, rhs = 2L), "term.labels")
122+
123+
# Generate temporary variable ----
124+
tmp_var <- temp_var_(data)
125+
126+
# Transform fixed effects and clusters to factors ----
127+
data <- transform_fe_(data, formula, k_vars)
128+
129+
# Determine the number of dropped observations ----
130+
nt <- nrow(data)
131+
nobs <- nobs_(nobs_full, nobs_na, nt)
132+
133+
# Extract model response and regressor matrix ----
134+
nms_sp <- NA
135+
p <- NA
136+
model_response_(data, formula)
137+
138+
# Check for linear dependence ----
139+
# check_linear_dependence_(x, p)
140+
check_linear_dependence_(cbind(y,x), p + 1L)
141+
142+
# Extract weights if required ----
143+
if (is.null(weights)) {
144+
wt <- rep(1.0, nt)
145+
} else {
146+
wt <- data[[weights]]
147+
}
148+
149+
# Check validity of weights ----
150+
check_weights_(wt)
151+
152+
# Get names and number of levels in each fixed effects category ----
153+
nms_fe <- lapply(select(data, all_of(k_vars)), levels)
154+
lvls_k <- vapply(nms_fe, length, integer(1))
155+
156+
# Generate auxiliary list of indexes for different sub panels ----
157+
k_list <- get_index_list_(k_vars, data)
158+
159+
# Fit linear model ----
160+
if (is.integer(y)) {
161+
y <- as.numeric(y)
162+
}
163+
fit <- felm_fit_(y, x, wt, control, k_list)
164+
165+
y <- NULL
166+
x <- NULL
167+
168+
# Add names to beta, hessian, and mx (if provided) ----
169+
names(fit[["coefficients"]]) <- nms_sp
170+
if (control[["keep_mx"]]) {
171+
colnames(fit[["mx"]]) <- nms_sp
172+
}
173+
174+
# Add to fit list ----
175+
fit[["nobs"]] <- nobs
176+
fit[["lvls_k"]] <- lvls_k
177+
fit[["nms_fe"]] <- nms_fe
178+
fit[["formula"]] <- formula
179+
fit[["data"]] <- data
180+
fit[["control"]] <- control
181+
182+
# Return result list ----
183+
structure(fit, class = "felm")
118184
}

R/felm_helpers.R

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#' srr_stats
2+
#' @srrstats {G1.0} Provides modular helper functions for computations in linear models with fixed effects.
3+
#' @noRd
4+
NULL
5+
6+
#' @title Get score matrix
7+
#' @description Computes the score matrix
8+
#' @param object Result list
9+
#' @noRd
10+
get_score_matrix_felm_ <- function(object) {
11+
# Extract required quantities from result list
12+
control <- object[["control"]]
13+
data <- object[["data"]]
14+
w <- object[["weights"]]
15+
16+
# Update weights and dependent variable
17+
y <- data[[1L]]
18+
19+
# Center regressor matrix (if required)
20+
if (control[["keep_mx"]]) {
21+
mx <- object[["mx"]]
22+
} else {
23+
# Extract additional required quantities from result list
24+
formula <- object[["formula"]]
25+
k_vars <- names(object[["lvls_k"]])
26+
27+
# Generate auxiliary list of indexes to project out the fixed effects
28+
k_list <- get_index_list_(k_vars, data)
29+
30+
# Extract regressor matrix
31+
x <- model.matrix(formula, data, rhs = 1L)[, -1L, drop = FALSE]
32+
nms_sp <- attr(x, "dimnames")[[2L]]
33+
attr(x, "dimnames") <- NULL
34+
35+
# Center variables
36+
mx <- center_variables_r_(x, w, k_list, control[["center_tol"]], 10000L)
37+
colnames(mx) <- nms_sp
38+
}
39+
40+
# Return score matrix
41+
mx * (y * w)
42+
}

0 commit comments

Comments
 (0)