Skip to content

Commit 357be7b

Browse files
committed
new configure to use MKL (Niagara)
1 parent 5738441 commit 357be7b

File tree

6 files changed

+79
-52
lines changed

6 files changed

+79
-52
lines changed

configure

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,22 @@ for flag in -fopenmp -qopenmp -openmp -xopenmp; do
4545
CXXFLAGS="$save_CXXFLAGS"
4646
done
4747

48+
# Link BLAS/LAPACK against MKL if MKLROOT is set
49+
R_BLAS_LIBS=$("${R_HOME}/bin/R" CMD config BLAS_LIBS)
50+
R_LAPACK_LIBS=$("${R_HOME}/bin/R" CMD config LAPACK_LIBS)
51+
R_FLIBS=$("${R_HOME}/bin/R" CMD config FLIBS)
52+
53+
: ${BLAS_LIBS:="$R_BLAS_LIBS"}
54+
: ${LAPACK_LIBS:="$R_LAPACK_LIBS"}
55+
: ${FLIBS:="$R_FLIBS"}
56+
57+
if [ -n "$MKLROOT" ]; then
58+
echo "detected MKLROOT=$MKLROOT → linking BLAS/LAPACK via MKL"
59+
BLAS_LIBS="-L${MKLROOT}/lib/intel64_lin -lmkl_rt"
60+
LAPACK_LIBS="$BLAS_LIBS"
61+
fi
62+
63+
# Check for OpenMP support
4864
if [ -n "$OPENMP_FLAG" ]; then
4965
echo "OpenMP is enabled: $OPENMP_FLAG"
5066
PKG_CXXFLAGS="$OPENMP_FLAG $PKG_CXXFLAGS"
@@ -100,6 +116,8 @@ fi
100116
echo "creating src/Makevars"
101117
sed -e "s|@ncores@|${num_cores}|g" \
102118
-e "s|@OPTIMIZATION_FLAGS@|${OPTIMIZATION_FLAGS}|g" \
119+
-e "s|@PKG_CXXFLAGS@|${PKG_CXXFLAGS}|g" \
120+
-e "s|@PKG_LIBS@|${PKG_LIBS}|g" \
103121
src/Makevars.in > src/Makevars
104122

105123
# Success

src/00_main.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#pragma once
22

33
#include <armadillo.hpp>
4+
#include <cmath>
45
#include <cpp11.hpp>
56
#include <cpp11armadillo.hpp>
7+
#include <limits>
68
#include <regex>
79
#include <unordered_map>
810

src/02_center_variables.cpp

Lines changed: 46 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
// 02_center_variables.cpp (refactored using Armadillo types)
21
#include "00_main.h"
32

43
// Method of alternating projections (Halperin)
@@ -7,16 +6,16 @@ void center_variables_(mat &V, const vec &w, const list &klist,
76
const int &iter_interrupt, const int &iter_ssr) {
87
// Auxiliary variables (fixed)
98
const size_t I = static_cast<size_t>(max_iter), N = V.n_rows, P = V.n_cols,
10-
K = klist.size(),
11-
iter_check_interrupt0 = static_cast<size_t>(iter_interrupt),
12-
iter_check_ssr0 = static_cast<size_t>(iter_ssr);
9+
K = klist.size(), iint0 = static_cast<size_t>(iter_interrupt),
10+
isr0 = static_cast<size_t>(iter_ssr);
1311
const double inv_sw = 1.0 / accu(w);
1412

1513
// Auxiliary variables (storage)
16-
size_t iter, j, k, p, J, iter_check_interrupt = iter_check_interrupt0,
17-
iter_check_ssr = iter_check_ssr0;
18-
double coef, xbar, ratio, ssr, ssq, ratio0, ssr0;
14+
size_t iter, iint, isr, j, jj, k, n, p, J, JJ;
15+
double num, coef, xbar, ratio, ssr, ssq, ratio0, ssr0;
1916
vec x(N), x0(N), Gx(N), G2x(N), deltaG(N), delta2(N);
17+
18+
// Precompute groups into fields
2019
field<field<uvec>> group_indices(K);
2120
field<vec> group_inverse_weights(K);
2221
for (k = 0; k < K; ++k) {
@@ -26,59 +25,60 @@ void center_variables_(mat &V, const vec &w, const list &klist,
2625
vec invs(J);
2726
for (j = 0; j < J; ++j) {
2827
idxs(j) = as_uvec(as_cpp<integers>(jlist[j]));
29-
;
3028
invs(j) = 1.0 / accu(w.elem(idxs(j)));
3129
}
32-
group_indices(k) = idxs;
33-
group_inverse_weights(k) = invs;
30+
group_indices(k) = std::move(idxs);
31+
group_inverse_weights(k) = std::move(invs);
3432
}
3533

34+
// Single nested‐field projection helper
35+
auto project = [&](vec &v) {
36+
J = group_indices.n_elem;
37+
for (j = 0; j < J; ++j) {
38+
auto &idxs = group_indices(j);
39+
auto &invs = group_inverse_weights(j);
40+
JJ = idxs.n_elem;
41+
for (jj = 0; jj < JJ; ++jj) {
42+
const uvec &coords = idxs(jj);
43+
xbar = dot(w.elem(coords), v.elem(coords)) * invs(jj);
44+
v.elem(coords) -= xbar;
45+
}
46+
}
47+
};
48+
49+
// Column‐wise centering
3650
for (p = 0; p < P; ++p) {
3751
x = V.col(p);
3852
ratio0 = std::numeric_limits<double>::infinity();
3953
ssr0 = std::numeric_limits<double>::infinity();
4054

55+
// reset per‐column interrupt
56+
iint = iint0;
57+
isr = isr0;
58+
4159
for (iter = 0; iter < I; ++iter) {
42-
if (iter == iter_check_interrupt) {
60+
if (iter == iint) {
4361
check_user_interrupt();
44-
iter_check_interrupt += iter_check_interrupt0;
62+
iint += iint0;
4563
}
4664

4765
x0 = x;
4866

49-
// Halperin projection
50-
for (k = 0; k < K; ++k) {
51-
field<uvec> &idxs = group_indices(k);
52-
J = idxs.n_elem;
53-
vec &invs = group_inverse_weights(k);
54-
for (j = 0; j < J; ++j) {
55-
const uvec &coords = idxs(j);
56-
xbar = dot(w.elem(coords), x.elem(coords)) * invs(j);
57-
x.elem(coords) -= xbar;
58-
}
67+
// 1) main projection
68+
project(x);
69+
num = 0.0;
70+
for (n = 0; n < N; ++n) {
71+
num += std::abs(x[n] - x0[n]) / (1.0 + std::abs(x0[n])) * w[n];
5972
}
60-
61-
// Convergence check
62-
ratio = dot(abs(x - x0) / (1.0 + abs(x0)), w) * inv_sw;
73+
ratio = num * inv_sw;
6374
if (ratio < tol)
6475
break;
6576

66-
// Acceleration every 5 iters
67-
if (iter > 5 && (iter % 5) == 0) {
77+
// 2) acceleration every 5 iters
78+
if (iter >= 5 && (iter % 5) == 0) {
6879
Gx = x;
69-
// Second projection
70-
for (size_t k = 0; k < K; ++k) {
71-
field<uvec> &idxs = group_indices(k);
72-
vec &invs = group_inverse_weights(k);
73-
for (j = 0; j < idxs.n_elem; ++j) {
74-
const uvec &coords = idxs(j);
75-
xbar = dot(w.elem(coords), Gx.elem(coords)) * invs(j);
76-
Gx.elem(coords) -= xbar;
77-
}
78-
}
80+
project(Gx);
7981
G2x = Gx;
80-
81-
// Compute deltas
8282
deltaG = G2x - x;
8383
delta2 = G2x - 2.0 * x + x0;
8484
ssq = dot(delta2, delta2);
@@ -92,17 +92,17 @@ void center_variables_(mat &V, const vec &w, const list &klist,
9292
}
9393
}
9494

95-
// SSR check
96-
if (iter == iter_check_ssr && iter > 0) {
95+
// 3) SSR‐based early exit
96+
if (iter == isr && iter > 0) {
9797
check_user_interrupt();
98-
iter_check_ssr += iter_check_ssr0;
98+
isr += isr0;
9999
ssr = dot(x % x, w) * inv_sw;
100-
if (fabs(ssr - ssr0) / (1.0 + fabs(ssr0)) < tol)
100+
if (std::fabs(ssr - ssr0) / (1.0 + std::fabs(ssr0)) < tol)
101101
break;
102102
ssr0 = ssr;
103103
}
104104

105-
// Early exit
105+
// 4) early exit
106106
if (iter > 3 && (ratio0 / ratio) < 1.1 && ratio < tol * 20)
107107
break;
108108
ratio0 = ratio;
@@ -114,8 +114,8 @@ void center_variables_(mat &V, const vec &w, const list &klist,
114114

115115
[[cpp11::register]] doubles_matrix<>
116116
center_variables_r_(const doubles_matrix<> &V_r, const doubles &w_r,
117-
const list &klist, const double &tol, const int &max_iter,
118-
const int &iter_interrupt, const int &iter_ssr) {
117+
const list &klist, const double tol, const int max_iter,
118+
const int iter_interrupt, const int iter_ssr) {
119119
mat V = as_mat(V_r);
120120
center_variables_(V, as_col(w_r), klist, tol, max_iter, iter_interrupt,
121121
iter_ssr);

src/Makevars

Lines changed: 0 additions & 2 deletions
This file was deleted.

src/Makevars.in

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,11 @@
1-
PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_NO_DEBUG -DARMA_USE_BLAS -DARMA_USE_LAPACK -DARMA_OPENMP_THREADS=@ncores@ @OPTIMIZATION_FLAGS@
2-
PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
1+
PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) \
2+
-DARMA_NO_DEBUG \
3+
-DARMA_USE_BLAS \
4+
-DARMA_USE_LAPACK \
5+
-DARMA_OPENMP_THREADS=@ncores@ \
6+
@OPTIMIZATION_FLAGS@
7+
8+
PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) \
9+
$(LAPACK_LIBS) \
10+
$(BLAS_LIBS) \
11+
$(FLIBS)

src/cpp11.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ extern "C" SEXP _capybara_check_linear_dependence_svd_(SEXP y, SEXP x, SEXP p) {
1313
END_CPP11
1414
}
1515
// 02_center_variables.cpp
16-
doubles_matrix<> center_variables_r_(const doubles_matrix<> & V_r, const doubles & w_r, const list & klist, const double & tol, const int & max_iter, const int & iter_interrupt, const int & iter_ssr);
16+
doubles_matrix<> center_variables_r_(const doubles_matrix<> & V_r, const doubles & w_r, const list & klist, const double tol, const int max_iter, const int iter_interrupt, const int iter_ssr);
1717
extern "C" SEXP _capybara_center_variables_r_(SEXP V_r, SEXP w_r, SEXP klist, SEXP tol, SEXP max_iter, SEXP iter_interrupt, SEXP iter_ssr) {
1818
BEGIN_CPP11
19-
return cpp11::as_sexp(center_variables_r_(cpp11::as_cpp<cpp11::decay_t<const doubles_matrix<> &>>(V_r), cpp11::as_cpp<cpp11::decay_t<const doubles &>>(w_r), cpp11::as_cpp<cpp11::decay_t<const list &>>(klist), cpp11::as_cpp<cpp11::decay_t<const double &>>(tol), cpp11::as_cpp<cpp11::decay_t<const int &>>(max_iter), cpp11::as_cpp<cpp11::decay_t<const int &>>(iter_interrupt), cpp11::as_cpp<cpp11::decay_t<const int &>>(iter_ssr)));
19+
return cpp11::as_sexp(center_variables_r_(cpp11::as_cpp<cpp11::decay_t<const doubles_matrix<> &>>(V_r), cpp11::as_cpp<cpp11::decay_t<const doubles &>>(w_r), cpp11::as_cpp<cpp11::decay_t<const list &>>(klist), cpp11::as_cpp<cpp11::decay_t<const double>>(tol), cpp11::as_cpp<cpp11::decay_t<const int>>(max_iter), cpp11::as_cpp<cpp11::decay_t<const int>>(iter_interrupt), cpp11::as_cpp<cpp11::decay_t<const int>>(iter_ssr)));
2020
END_CPP11
2121
}
2222
// 03_lm_fit.cpp

0 commit comments

Comments
 (0)