ENH: sparse pca evaluation

stnava · stnava · commit 9209ccbecada · 2025-10-31T10:31:36.000-04:00
diff --git a/R/multiscaleSVDxpts.R b/R/multiscaleSVDxpts.R
@@ -11730,7 +11730,6 @@ digraph NSA_Flow_FA {
 #' of the original `sparse_pca_imp()` while improving efficiency by:
 #'  - precomputing X'X, using it for gradient/energy,
 #'  - in-place trial updates to reduce allocations,
-#'  - lazy orthogonalization (every `orth_every` iterations),
 #'  - retaining Armijo backtracking and adaptive LR scheduling.
 #'
 #' @param X numeric matrix (n x p), rows = observations, cols = variables
@@ -11747,7 +11746,6 @@ digraph NSA_Flow_FA {
 #' @param grad_tol numeric, gradient-norm tolerance for convergence
 #' @param nsa_flow_fn optional, nsa_flow function to use (default: nsa_flow)
 #' @param verbose logical, print iteration diagnostics
-#' @param orth_every integer >=1, perform orthogonalization every this many iterations (default 5)
 #'
 #' @return list with components:
 #'   \item{Y}{best p x k loading matrix found}
@@ -11768,8 +11766,7 @@ nsa_flow_pca <- function(X, k,
                          w_pca = 1.0, nsa_w = 0.5,
                          apply_soft_thresh_in_nns = FALSE,
                          tol = 1e-6, retraction = def_ret,
-                         grad_tol = 1e-4, nsa_flow_fn = nsa_flow_autograd, verbose = FALSE,
-                         orth_every = 5) {
+                         grad_tol = 1e-4, nsa_flow_fn = nsa_flow_autograd, verbose = FALSE) {
   # --- argument checks ---
   if (!is.matrix(X) || any(!is.finite(X))) stop("X must be a finite numeric matrix")
   n <- nrow(X); p <- ncol(X)
@@ -11779,8 +11776,6 @@ nsa_flow_pca <- function(X, k,
   if (w_pca <= 0) stop("w_pca must be positive")
   if (nsa_w < 0 || nsa_w > 1) stop("nsa_w must be in [0,1]")
   proximal_type <- match.arg(proximal_type)
-  if (!is.integer(orth_every) && orth_every != as.integer(orth_every)) orth_every <- as.integer(orth_every)
-  if (orth_every < 1) orth_every <- 1
 
   # Preserve previous behavior: when using nsa_flow proximal with nonzero nsa_w,
   # disable the L1 lambda to avoid double regularization (as in your original impl).
@@ -11838,7 +11833,7 @@ nsa_flow_pca <- function(X, k,
 
   for (iter in seq_len(max_iter)) {
     t_start <- Sys.time()
-
+    if (proximal_type != "nsa_flow") Y <- qr.Q(qr(Y))
     # Euclidean gradient: - (XtX %*% Y) / n scaled by w_pca
     grad_p <- - (XtX %*% Y) / n   # p x k
     eu_grad <- w_pca * grad_p
@@ -11882,9 +11877,9 @@ nsa_flow_pca <- function(X, k,
     } else if (proximal_type == "nsa_flow") {
       # call nsa_flow; we assume it takes arguments (Y0, X0=NULL, w=..., retraction=...)
       # use X0 = NULL to indicate proximal-only processing of Y_ret
-      prox_res <- nsa_flow_fn( Y_ret, nsa_w )
+      prox_res <- nsa_flow_fn( Y_ret, w=nsa_w )
       if (!is.list(prox_res) || is.null(prox_res$Y)) stop("nsa_flow returned unexpected result")
-      Y_new <- prox_res$Y
+      Y_new <- prox_res$Y %>% apply( 2, function(x) (x - min(x)) / (max(x) - min(x)))
     } else {
       stop("unknown proximal_type")
     }
@@ -11897,16 +11892,8 @@ nsa_flow_pca <- function(X, k,
     if (k == 1) {
       Q <- Y_new / sqrt(sum(Y_new^2))
     } else {
-      if ((iter %% orth_every) == 0 || iter == max_iter) {
-        qr_decomp <- qr(Y_new)
-        Q <- qr.Q(qr_decomp)
-        # optionally replace Y_new with Q so iterate stays more orthonormal
-        Y_new <- Q
-      } else {
-        # use Q only for explained variance computation (do not change Y_new)
-        qr_decomp_tmp <- qr(Y_new)
-        Q <- qr.Q(qr_decomp_tmp)
-      }
+      qr_decomp <- qr(Y_new)
+      Q <- qr.Q(qr_decomp)
     }
 
     # explained variance ratio
diff --git a/R/nsa_flow_torch.R b/R/nsa_flow_torch.R
@@ -653,10 +653,10 @@ nsa_flow_autograd <- function(
   # Try to call python function and handle errors clearly
   res_py <- tryCatch(
     {
-      do.call(pynsa$nsa_flow_autograd, py_args)
+      do.call(pynsa$nsa_flow_orth, py_args)
     },
     error = function(e) {
-      stop("Error calling Python nsa_flow_autograd():\n", e$message)
+      stop("Error calling Python nsa_flow_orth():\n", e$message)
     }
   )
 
diff --git a/man/nsa_flow_pca.Rd b/man/nsa_flow_pca.Rd
diff --git a/vignettes/nsa_flow.Rmd b/vignettes/nsa_flow.Rmd
@@ -330,7 +330,7 @@ Y0_toy <- matrix(runif(12, 0, 1), 4, 3)
 omega_default = 0.5
 # if ( ! exists("ini_default") ) 
 lropts=c('armijo', 'armijo_aggressive', 'exponential', 'linear', 'random', 'adaptive', 'momentum_boost', 'entropy', 'poly_decay', 'bayes')
-ini_default = 'bayes' # 
+ini_default = 'armijo' # 
 optype='asgd' # for torch backend
 def_ret = "soft_polar"
 nsa_default <- function(Y0, w = omega_default,
@@ -344,7 +344,7 @@ nsa_default <- function(Y0, w = omega_default,
     verbose = verbose,
     seed = 42,
     apply_nonneg = TRUE,
-    tol = 1e-6,
+    tol = 1e-8,
     window_size=10,
     fidelity_type = "scale_invariant", #"symmetric",
     orth_type = "scale_invariant",
@@ -573,7 +573,7 @@ X0 = generate_synth_data( p, k, corrval=0.35, noise=0.05, sparse_prob=0.0, inclu
 ###
 w_seq <- c( 0.005,  0.05, 0.1, 0.2, 0.5 )
 w_seq <- c( 0.001,  0.005, 0.01, 0.05, 0.25 )
-w_seq <- c( 0.001,  0.25, 0.5, 0.75, 0.9 )
+w_seq <- c( 0.1,  0.25, 0.5, 0.75, 0.9 )
 mytit = paste0("w = ", round(w_seq,3))
 mats <- list()
 convergeplots <- list()
@@ -603,10 +603,9 @@ for(i in seq_along(mats)) {
 }
 grid.arrange(grobs = lapply(swplots, function(x) x$gtable), ncol = 3)
 if ( length(convergeplots) >=4  ) {
-  grid.arrange(grobs=convergeplots[c(1,2,3,5)], 
-  top='Convergence Plots for Different w Values', ncol=2 )
+  grid.arrange(grobs=convergeplots[c(1,2,3,5)], top='Convergence Plots for Different w Values', ncol=2 )
 }
-
+#########
 # darkk #
 ####################
 ```
@@ -1048,6 +1047,7 @@ golub_scaled <- scale(data.matrix(golub_df))
 
 ```{r golub_sparse_pca_analysis, echo=FALSE, fig.width=5,  message=FALSE, warning=FALSE,cache=FALSE}
 
+
 set.seed(1)
 myk  <- 3
 mxit <- 100
@@ -1057,32 +1057,35 @@ golub_scaled_ss <- golub_scaled[, ss]
 ## --- PCA Variants ------------------------------------------------------------
 pca_std <- prcomp(golub_scaled_ss, rank. = myk)
 proj_std <- pca_std$x
-
-res_basic <- nsa_flow_pca(golub_scaled_ss, myk,lambda = 0.1, alpha = 0.01,
+myalph=0.1
+res_basic <- nsa_flow_pca(golub_scaled_ss, myk, lambda = 0.1, alpha = myalph,
                             max_iter = mxit, proximal_type = "basic", tol = 1e-5,
-                            nsa_w = omega_default, verbose = F)
-res_nns <- nsa_flow_pca(golub_scaled_ss, myk, lambda = 0.1, alpha = 0.01,
+                            nsa_w = omega_default, verbose = TRUE )
+res_nns <- nsa_flow_pca(golub_scaled_ss, myk, lambda = 0.1, alpha = myalph,
                           max_iter = mxit, proximal_type = "nsa_flow", tol = 1e-5,
-                          nsa_w = omega_default, nsa_flow_fn = nsa_default, verbose = FALSE)
+                          nsa_w = omega_default, nsa_flow_fn = nsa_default, verbose = TRUE)
 ## --- Core Metrics ------------------------------------------------------------
 metrics_pca_g   <- compute_core_metrics(pca_std$rotation, golub_scaled_ss)
 metrics_basic_g <- compute_core_metrics(res_basic$Y, golub_scaled_ss)
 metrics_nns_g   <- compute_core_metrics(res_nns$Y, golub_scaled_ss)
+proj_basic <- golub_scaled_ss %*% res_basic$Y
+proj_nns   <- golub_scaled_ss %*% res_nns$Y
 
 ## --- Classification Performance ---------------------------------------------
 cv_acc <- function(proj, labels) {
   colnames(proj) <- paste0("V", seq_len(ncol(proj)))
   ctrl <- trainControl(method = "repeatedcv", number = 5, repeats = 50)
-  model <- train(proj, labels, method = "knn",
-                 trControl = ctrl, tuneGrid = data.frame(k = 3))
+  tune_grid <- expand.grid(mtry = seq(1, ncol(proj), length.out = 5))  # Example: tune mtry over 5 values
+  model <- train(proj, labels, method = "rf",
+                 trControl = ctrl, tuneGrid = tune_grid)
   tibble(Accuracy = model$results$Accuracy, AccuracySD = model$results$AccuracySD)
 }
-
-proj_basic <- golub_scaled_ss %*% res_basic$Y
-proj_nns   <- golub_scaled_ss %*% res_nns$Y
 acc_std   <- cv_acc(proj_std, labels)
 acc_basic <- cv_acc(proj_basic, labels)
 acc_nns   <- cv_acc(proj_nns, labels)
+# acc_std
+# acc_basic
+# acc_nns
 
 ## --- Integrated Results Table -----------------------------------------------
 golub_metrics <- tibble(
@@ -1093,6 +1096,7 @@ golub_metrics <- tibble(
   CV_Accuracy         = c(acc_std$Accuracy, acc_basic$Accuracy, acc_nns$Accuracy),
   CV_Accuracy_SD      = c(acc_std$AccuracySD, acc_basic$AccuracySD, acc_nns$AccuracySD)
 )
+
 #####
 ```
 

Original file line number	Diff line number	Diff line change
`@@ -653,10 +653,10 @@ nsa_flow_autograd <- function(`
`653`	`653`	`# Try to call python function and handle errors clearly`
`654`	`654`	`res_py <- tryCatch(`
`655`	`655`	`{`
`656`		`- do.call(pynsa$nsa_flow_autograd, py_args)`
	`656`	`+ do.call(pynsa$nsa_flow_orth, py_args)`
`657`	`657`	`},`
`658`	`658`	`error = function(e) {`
`659`		`- stop("Error calling Python nsa_flow_autograd():\n", e$message)`
	`659`	`+ stop("Error calling Python nsa_flow_orth():\n", e$message)`
`660`	`660`	`}`
`661`	`661`	`)`
`662`	`662`