some speedups

pachadotdev · pachadotdev · commit 5738441e0f9f · 2025-05-16T19:38:47.000-04:00
diff --git a/R/feglm.R b/R/feglm.R
@@ -165,6 +165,8 @@ feglm <- function(
   lhs <- NA # just to avoid global variable warning
   nobs_na <- NA
   nobs_full <- NA
+  weights_vec <- NA
+  weights_col <- NA
   model_frame_(data, formula, weights)
 
   # Ensure that model response is in line with the chosen model ----
@@ -202,13 +204,13 @@ feglm <- function(
   # Extract weights if required ----
   if (is.null(weights)) {
     wt <- rep(1.0, nt)
-  } else if (exists("weights_vec")) {
+  } else if (!all(is.na(weights_vec))) {
     # Weights provided as vector
     wt <- weights_vec
     if (length(wt) != nrow(data)) {
       stop("Length of weights vector must equal number of observations.", call. = FALSE)
     }
-  } else if (exists("weights_col")) {
+  } else if (!all(is.na(weights_col))) {
     # Weights provided as formula - use the extracted column name
     wt <- data[[weights_col]]
   } else {
diff --git a/R/felm.R b/R/felm.R
@@ -112,6 +112,8 @@ felm <- function(formula = NULL, data = NULL, weights = NULL, control = NULL) {
   lhs <- NA # just to avoid global variable warning
   nobs_na <- NA
   nobs_full <- NA
+  weights_vec <- NA
+  weights_col <- NA
   model_frame_(data, formula, weights)
 
   # Get names of the fixed effects variables and sort ----
@@ -143,13 +145,13 @@ felm <- function(formula = NULL, data = NULL, weights = NULL, control = NULL) {
   # Extract weights if required ----
   if (is.null(weights)) {
     wt <- rep(1.0, nt)
-  } else if (exists("weights_vec")) {
+  } else if (!all(is.na(weights_vec))) {
     # Weights provided as vector
     wt <- weights_vec
     if (length(wt) != nrow(data)) {
       stop("Length of weights vector must equal number of observations.", call. = FALSE)
     }
-  } else if (exists("weights_col")) {
+  } else if (!all(is.na(weights_col))) {
     # Weights provided as formula - use the extracted column name
     wt <- data[[weights_col]]
   } else {
diff --git a/src/00_main.h b/src/00_main.h
@@ -1,11 +1,23 @@
+#pragma once
+
 #include <armadillo.hpp>
 #include <cpp11.hpp>
 #include <cpp11armadillo.hpp>
 #include <regex>
 #include <unordered_map>
 
-using namespace arma;
-using namespace cpp11;
+// using namespace arma;
+using arma::field;
+using arma::mat;
+using arma::uvec;
+using arma::uword;
+using arma::vec;
+
+// using namespace cpp11;
+using cpp11::doubles;
+using cpp11::doubles_matrix;
+using cpp11::integers;
+using cpp11::list;
 
 // used across the scripts
 
diff --git a/src/01_linear_algebra.cpp b/src/01_linear_algebra.cpp
@@ -6,10 +6,12 @@
 [[cpp11::register]] int check_linear_dependence_svd_(const doubles &y,
                                                      const doubles_matrix<> &x,
                                                      const int &p) {
-  mat Y = as_mat(y);
-  mat X = as_mat(x);
-  X = join_rows(Y, X); // paste y and x together
-  int r = rank(X);
+  const mat Y = as_mat(y);
+  const mat X = as_mat(x);
+  mat Z(Y.n_rows, 1 + X.n_cols);
+  Z.col(0) = Y;
+  Z.cols(1, Z.n_cols - 1) = X;
+  int r = rank(Z);
   if (r < p) {
     return 1;
   }
@@ -42,7 +44,6 @@ vec solve_beta_(mat MX, const mat &MNU, const vec &w) {
   const vec sqrt_w = sqrt(w);
 
   MX.each_col() %= sqrt_w;
-  mat WMNU = MNU.each_col() % sqrt_w;
 
   mat XtX = MX.t() * MX;
   vec XtY = MX.t() * (MNU.each_col() % sqrt_w);
diff --git a/src/02_center_variables.cpp b/src/02_center_variables.cpp
@@ -1,3 +1,4 @@
+// 02_center_variables.cpp (refactored using Armadillo types)
 #include "00_main.h"
 
 // Method of alternating projections (Halperin)
@@ -12,108 +13,86 @@ void center_variables_(mat &V, const vec &w, const list &klist,
   const double inv_sw = 1.0 / accu(w);
 
   // Auxiliary variables (storage)
-  size_t iter, j, k, l, m, p, L, J,
-      iter_check_interrupt = iter_check_interrupt0,
-      iter_check_ssr = iter_check_ssr0;
-  double xbar, ratio, ratio0, ssr, ssr0, vprod, ssq, coef;
-  vec x(N), x0(N);
+  size_t iter, j, k, p, J, iter_check_interrupt = iter_check_interrupt0,
+                           iter_check_ssr = iter_check_ssr0;
+  double coef, xbar, ratio, ssr, ssq, ratio0, ssr0;
+  vec x(N), x0(N), Gx(N), G2x(N), deltaG(N), delta2(N);
   field<field<uvec>> group_indices(K);
   field<vec> group_inverse_weights(K);
-
-  // Precompute group indices and weights
   for (k = 0; k < K; ++k) {
     const list &jlist = klist[k];
     J = jlist.size();
-
-    field<uvec> indices(J);
-    vec inverse_weights(J);
-
+    field<uvec> idxs(J);
+    vec invs(J);
     for (j = 0; j < J; ++j) {
-      indices(j) = as_uvec(as_cpp<integers>(jlist[j]));
-      inverse_weights(j) = 1.0 / accu(w.elem(indices(j)));
+      idxs(j) = as_uvec(as_cpp<integers>(jlist[j]));
+      ;
+      invs(j) = 1.0 / accu(w.elem(idxs(j)));
     }
-
-    group_indices(k) = indices;
-    group_inverse_weights(k) = inverse_weights;
+    group_indices(k) = idxs;
+    group_inverse_weights(k) = invs;
   }
 
-  // Pre-allocate vectors for acceleration (outside the loop to avoid
-  // reallocation)
-  vec G_x(N), G2_x(N), delta_G_x(N), delta2_x(N);
-
-  // Halperin projections parallelizing over columns
   for (p = 0; p < P; ++p) {
     x = V.col(p);
-    ratio0 = std::numeric_limits<double>::max();
-    ssr0 = std::numeric_limits<double>::max();
+    ratio0 = std::numeric_limits<double>::infinity();
+    ssr0 = std::numeric_limits<double>::infinity();
 
     for (iter = 0; iter < I; ++iter) {
-      // Check for user interrupt less frequently
       if (iter == iter_check_interrupt) {
         check_user_interrupt();
         iter_check_interrupt += iter_check_interrupt0;
       }
 
-      x0 = x; // Save current x
-
-      // Apply the Halperin projection
-      for (l = 0; l < K; ++l) {
-        L = group_indices(l).size();
-        if (L == 0)
-          continue;
+      x0 = x;
 
-        for (m = 0; m < L; ++m) {
-          const uvec &coords = group_indices(l)(m);
-          xbar =
-              dot(w.elem(coords), x.elem(coords)) * group_inverse_weights(l)(m);
+      // Halperin projection
+      for (k = 0; k < K; ++k) {
+        field<uvec> &idxs = group_indices(k);
+        J = idxs.n_elem;
+        vec &invs = group_inverse_weights(k);
+        for (j = 0; j < J; ++j) {
+          const uvec &coords = idxs(j);
+          xbar = dot(w.elem(coords), x.elem(coords)) * invs(j);
           x.elem(coords) -= xbar;
         }
       }
 
-      // First convergence check
+      // Convergence check
       ratio = dot(abs(x - x0) / (1.0 + abs(x0)), w) * inv_sw;
       if (ratio < tol)
         break;
 
-      // Apply acceleration less frequently - only every 5 iterations instead of
-      // 3 This reduces overhead while still getting acceleration benefits
-      if (iter > 5 && iter % 5 == 0) {
-        G_x = x; // G(x) - the result after one projection
-
-        // Apply another projection to get G(G(x))
-        for (l = 0; l < K; ++l) {
-          L = group_indices(l).size();
-          if (L == 0)
-            continue;
-
-          for (m = 0; m < L; ++m) {
-            const uvec &coords = group_indices(l)(m);
-            xbar = dot(w.elem(coords), G_x.elem(coords)) *
-                   group_inverse_weights(l)(m);
-            G_x.elem(coords) -= xbar;
+      // Acceleration every 5 iters
+      if (iter > 5 && (iter % 5) == 0) {
+        Gx = x;
+        // Second projection
+        for (size_t k = 0; k < K; ++k) {
+          field<uvec> &idxs = group_indices(k);
+          vec &invs = group_inverse_weights(k);
+          for (j = 0; j < idxs.n_elem; ++j) {
+            const uvec &coords = idxs(j);
+            xbar = dot(w.elem(coords), Gx.elem(coords)) * invs(j);
+            Gx.elem(coords) -= xbar;
           }
         }
-        G2_x = G_x; // G²(x)
-
-        // Irons & Tuck acceleration formula
-        delta_G_x = G2_x - x;
-        delta2_x = G2_x - 2 * x + x0;
-
-        ssq = dot(delta2_x, delta2_x);
-        if (ssq > 1e-10) { // Add numerical stability threshold
-          vprod = dot(delta_G_x, delta2_x);
-          coef = vprod / ssq;
-
-          // Limit coefficient to prevent excessive extrapolation
-          if (coef > 0 && coef < 2.0) {
-            x = G2_x - coef * delta_G_x;
+        G2x = Gx;
+
+        // Compute deltas
+        deltaG = G2x - x;
+        delta2 = G2x - 2.0 * x + x0;
+        ssq = dot(delta2, delta2);
+        if (ssq > 1e-10) {
+          coef = dot(deltaG, delta2) / ssq;
+          if (coef > 0.0 && coef < 2.0) {
+            x = G2x - coef * deltaG;
           } else {
-            x = G2_x; // Use G2_x if coefficient is out of bounds
+            x = G2x;
           }
         }
       }
 
-      // Check SSR improvement less frequently
+      // SSR check
       if (iter == iter_check_ssr && iter > 0) {
         check_user_interrupt();
         iter_check_ssr += iter_check_ssr0;
@@ -123,15 +102,13 @@ void center_variables_(mat &V, const vec &w, const list &klist,
         ssr0 = ssr;
       }
 
-      // Early stopping based on ratio improvement
-      if (iter > 3 && ratio0 / ratio < 1.1 && ratio < tol * 20) {
+      // Early exit
+      if (iter > 3 && (ratio0 / ratio) < 1.1 && ratio < tol * 20)
         break;
-      }
-
       ratio0 = ratio;
     }
 
-    V.col(p) = std::move(x);
+    V.col(p) = x;
   }
 }
 
@@ -140,7 +117,7 @@ center_variables_r_(const doubles_matrix<> &V_r, const doubles &w_r,
                     const list &klist, const double &tol, const int &max_iter,
                     const int &iter_interrupt, const int &iter_ssr) {
   mat V = as_mat(V_r);
-  vec w = as_col(w_r);
-  center_variables_(V, w, klist, tol, max_iter, iter_interrupt, iter_ssr);
+  center_variables_(V, as_col(w_r), klist, tol, max_iter, iter_interrupt,
+                    iter_ssr);
   return as_doubles_matrix(V);
 }
diff --git a/src/03_lm_fit.cpp b/src/03_lm_fit.cpp
@@ -6,10 +6,9 @@
                                    const list &k_list) {
   // Type conversion
 
-  vec y = as_Col(y_r);
   mat X = as_Mat(x_r);
-  vec MNU = vec(y.n_elem, fill::zeros);
-  vec w = as_Col(wt_r);
+  const vec y = as_Col(y_r);
+  const vec w = as_Col(wt_r);
 
   // Auxiliary variables (fixed)
 
@@ -20,23 +19,35 @@
 
   // Auxiliary variables (storage)
 
-  mat MX, H;
+  mat H(X.n_cols, X.n_cols);
+  vec MNU(y.n_elem);
 
   // Center variables
 
-  MNU += y;
-  center_variables_(MNU, w, k_list, center_tol, iter_center_max, iter_interrupt,
-                    iter_ssr);
-  center_variables_(X, w, k_list, center_tol, iter_center_max, iter_interrupt,
-                    iter_ssr);
+  if (k_list.size() > 0) {
+    // Initial response + centering for fixed effects
+    MNU = y;
+    center_variables_(MNU, w, k_list, center_tol, iter_center_max,
+                      iter_interrupt, iter_ssr);
+    center_variables_(X, w, k_list, center_tol, iter_center_max, iter_interrupt,
+                      iter_ssr);
+  } else {
+    // No fixed effects
+    MNU = vec(y.n_elem, fill::zeros);
+  }
 
   // Solve the normal equations
 
   vec beta = solve_beta_(X, MNU, w);
 
   // Fitted values
 
-  vec fitted = y - MNU + X * beta;
+  vec fitted;
+  if (k_list.size() > 0) {
+    fitted = y - MNU + X * beta;
+  } else {
+    fitted = X * beta;
+  }
 
   // Recompute Hessian
 
diff --git a/src/04_glm_fit.cpp b/src/04_glm_fit.cpp
@@ -248,12 +248,12 @@ vec variance_(const vec &mu, const double &theta,
                                     const std::string &family,
                                     const list &control, const list &k_list) {
   // Type conversion
+  mat MX = as_Mat(x_r);
   vec beta = as_Col(beta_r);
   vec eta = as_Col(eta_r);
-  vec y = as_Col(y_r);
-  mat MX = as_Mat(x_r);
+  const vec y = as_Col(y_r);
   vec MNU = vec(y.n_elem, fill::zeros);
-  vec wt = as_Col(wt_r);
+  const vec wt = as_Col(wt_r);
 
   // Auxiliary variables (fixed)
 
@@ -362,11 +362,6 @@ vec variance_(const vec &mu, const double &theta,
     stop("Algorithm did not converge.");
   }
 
-  // Update weights and dependent variable
-
-  mu_eta = mu_eta_(eta, family_type);
-  w = (wt % square(mu_eta)) / variance_(mu, theta, family_type);
-
   // Compute Hessian
 
   H = crossprod_(MX, w);
diff --git a/src/05_glm_offset_fit.cpp b/src/05_glm_offset_fit.cpp
@@ -9,9 +9,9 @@ feglm_offset_fit_(const doubles &eta_r, const doubles &y_r,
 
   vec eta = as_Col(eta_r);
   vec y = as_Col(y_r);
-  vec offset = as_Col(offset_r);
+  const vec offset = as_Col(offset_r);
   vec Myadj = vec(y.n_elem, fill::zeros);
-  vec wt = as_Col(wt_r);
+  const vec wt = as_Col(wt_r);
 
   // Auxiliary variables (fixed)
 
@@ -31,7 +31,7 @@ feglm_offset_fit_(const doubles &eta_r, const doubles &y_r,
   vec mu = link_inv_(eta, family_type);
   double dev = dev_resids_(y, mu, 0.0, wt, family_type);
 
-  const int n = y.n_elem;
+  const size_t n = y.n_elem;
   vec mu_eta(n), yadj(n), w(n);
 
   bool dev_crit, val_crit, imp_crit;
diff --git a/src/06_get_alpha.cpp b/src/06_get_alpha.cpp
diff --git a/src/07_group_sums.cpp b/src/07_group_sums.cpp