Skip to content

Commit aaeb71e

Browse files
committed
start incorporate ALO
1 parent ddde041 commit aaeb71e

7 files changed

+855
-159
lines changed

alo.ipynb

+166
Large diffs are not rendered by default.

classo/alo.py

+308
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,308 @@
1+
"""Utility functions for implementing and testing out ALO for c-lasso.
2+
"""
3+
4+
import functools
5+
from typing import Tuple
6+
7+
import multiprocessing
8+
import numpy as np
9+
import scipy.linalg
10+
import tqdm
11+
import sklearn.linear_model
12+
13+
from classo import classo_problem
14+
from classo.solve_R1 import pathlasso_R1, problem_R1
15+
16+
17+
def generate_data(n, p, k, d, sigma=1, seed=None):
18+
"""Generate random c-lasso problem.
19+
20+
Parameters
21+
----------
22+
n : int
23+
Number of observations
24+
p : int
25+
Number of parameters
26+
k : int
27+
Number of ground truth non-zero parameters.
28+
d : int
29+
Number of constraints
30+
sigma : float
31+
Standard deviation of additive noise.
32+
seed : int, optional
33+
Optional integer used to seed the random number generator
34+
for reproducibility.
35+
"""
36+
rng = np.random.Generator(np.random.Philox(seed))
37+
38+
X = rng.normal(scale = 1 / np.sqrt(k), size=(n, p))
39+
C = rng.normal(size=(d, p))
40+
beta_nz = np.ones(k)
41+
C_k = C[:, :k]
42+
43+
# ensure that beta verifies the constraint by projecting.
44+
beta_nz = beta_nz - C_k.T @ scipy.linalg.lstsq(C_k.T, beta_nz)[0]
45+
beta_nz /= np.mean(beta_nz ** 2)
46+
beta = np.concatenate((beta_nz, np.zeros(p - k)))
47+
48+
eps = rng.normal(scale=sigma, size=(n,))
49+
50+
y = X @ beta + eps
51+
return (X, C, y), beta
52+
53+
54+
def solve_cls(X, y, C):
55+
"""Solve the constrained least-squares problem.
56+
57+
This currently uses a very naive method based on explicit inversion.
58+
A better method would use a Cholesky decomposition or similar.
59+
60+
Parameters
61+
----------
62+
X : np.array
63+
Design matrix
64+
y : np.array
65+
Observation vector
66+
C : np.array
67+
Constraint matrix
68+
"""
69+
K = X.T @ X
70+
K_inv = np.linalg.inv(K)
71+
P = K_inv - K_inv @ C.T @ np.linalg.inv(C @ K_inv @ C.T) @ C @ K_inv
72+
return P @ (X.T @ y)
73+
74+
75+
def alo_cls_h_naive(X: np.ndarray, C: np.ndarray) -> np.ndarray:
76+
"""Computes the ALO leverages for the CLS (constrained least-squares).
77+
78+
Note that just like for the OLS, the CLS is a linear smoother, and hence
79+
the ALO leverages are exact LOO leverages.
80+
81+
This is the reference implementation which uses "obvious" linear algebra.
82+
See `alo_cls_h` for a better implementation.
83+
84+
Parameters
85+
----------
86+
X : np.ndarray
87+
A numpy array of size [n, p] containing the design matrix.
88+
C : np.ndarray
89+
A numpy array of size [d, p] containing the constraints.
90+
91+
Returns
92+
-------
93+
np.ndarray
94+
A 1-dimensional array of size n, representing the computed leverage of
95+
each observation.
96+
"""
97+
K = X.T @ X
98+
K_inv = np.linalg.inv(K)
99+
P = K_inv - K_inv @ C.T @ np.linalg.inv(C @ K_inv @ C.T) @ C @ K_inv
100+
return np.diag(X @ P @ X.T)
101+
102+
103+
def alo_cls_h(X: np.ndarray, C: np.ndarray) -> np.ndarray:
104+
"""Computes the ALO leverages for the CLS.
105+
106+
Note that just like for the OLS, the CLS is a linear smoother, and hence
107+
the ALO leverages are exact LOO leverages.
108+
109+
See `alo_cls_h_naive` for the mathematically convenient expression. This
110+
function implements the computation in a much more efficient manner by
111+
relying extensively on the cholesky decomposition.
112+
"""
113+
K = X.T @ X
114+
K_cho, _ = scipy.linalg.cho_factor(K, overwrite_a=True, lower=True, check_finite=False)
115+
K_inv_2_C = scipy.linalg.solve_triangular(K_cho, C.T, lower=True, check_finite=False)
116+
K_inv_2_Xt = scipy.linalg.solve_triangular(K_cho, X.T, lower=True, check_finite=False)
117+
118+
C_Ki_C = K_inv_2_C.T @ K_inv_2_C
119+
120+
CKC_cho, _ = scipy.linalg.cho_factor(C_Ki_C, overwrite_a=True, lower=True, check_finite=False)
121+
F = scipy.linalg.solve_triangular(CKC_cho, K_inv_2_C.T, lower=True, check_finite=False)
122+
return (K_inv_2_Xt ** 2).sum(axis=0) - ((F @ K_inv_2_Xt) ** 2).sum(axis=0)
123+
124+
125+
def alo_h(X: np.ndarray, beta: np.ndarray, y: np.ndarray, C: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
126+
"""Computes the ALO leverage and residual for the c-lasso.
127+
128+
Due to its L1 structure, the ALO for the constrained lasso corresponds
129+
to the ALO of the CLS reduced to the equi-correlation set. This function directly
130+
extracts the equi-correlation set and delegates to `alo_cls_h` for computing
131+
the ALO leverage.
132+
133+
Parameters
134+
----------
135+
X : np.ndarray
136+
A numpy array of size [n, p] representing the design matrix.
137+
beta : np.ndarray
138+
A numpy array of size [p] representing the solution at which to
139+
compute the ALO risk.
140+
y : np.ndarray
141+
A numpy array of size [n] representing the observations.
142+
C : np.ndarray
143+
A numpy array of size [d, p] representing the constraint matrix.
144+
145+
Returns
146+
-------
147+
alo_residual : np.ndarray
148+
A numpy array of size [n] representing the estimated ALO residuals
149+
h : np.ndarray
150+
A numpy array of size [n] representing the ALO leverage at each observation.
151+
"""
152+
E = np.flatnonzero(beta)
153+
154+
if len(E) == 0:
155+
return (y - X @ beta), np.zeros(X.shape[0])
156+
157+
X_E = X[:, E]
158+
C_E = C[:, E]
159+
160+
h = alo_cls_h(X_E, C_E)
161+
return (y - X @ beta) / (1 - h), h
162+
163+
164+
165+
def alo_classo_risk(X: np.ndarray, C: np.ndarray, y: np.ndarray, betas: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
166+
"""Computes the ALO risk for the c-lasso at the given estimates.
167+
168+
Parameters
169+
----------
170+
X : np.ndarray
171+
A numpy array of size [n, p] representing the design matrix.
172+
C : np.ndarray
173+
A numpy array of size [d, p] representing the constraint matrix.
174+
y : np.ndarray
175+
A numpy array of size [n] representing the observations.
176+
betas : np.ndarray
177+
A numpy array of size [m, p], where ``m`` denotes the number of solutions
178+
in the path, representing the solution at each point in the path.
179+
180+
Returns
181+
-------
182+
mse : np.ndarray
183+
A numpy array of size [m], representing the ALO estimate of the mean squared error
184+
at each solution along the path.
185+
df : np.ndarray
186+
A numpy array of size [m], representing the estimated normalized degrees of freedom
187+
at each solution along the path.
188+
"""
189+
mse = np.empty(len(betas))
190+
df = np.empty(len(betas))
191+
192+
for i, beta in enumerate(betas):
193+
res, h = alo_h(X, beta, y, C)
194+
df[i] = np.mean(h)
195+
mse[i] = np.mean(np.square(res))
196+
197+
return mse, df
198+
199+
200+
201+
def solve_standard(X, C, y, lambdas=None):
202+
"""Utility function to solve standard c-lasso formulation."""
203+
problem = problem_R1((X, C, y), 'Path-Alg')
204+
problem.tol = 1e-6
205+
206+
if lambdas is None:
207+
lambdas = np.logspace(0, 1, num=80, base=1e-3)
208+
else:
209+
lambdas = lambdas / problem.lambdamax
210+
211+
if lambdas[0] < lambdas[-1]:
212+
lambdas = lambdas[::-1]
213+
214+
beta = pathlasso_R1(problem, lambdas)
215+
return np.array(beta), lambdas * problem.lambdamax
216+
217+
218+
def solve_loo_i(X, C, y, i, lambdas):
219+
X = np.concatenate((X[:i], X[i+1:]))
220+
y = np.concatenate((y[:i], y[i+1:]))
221+
return solve_standard(X, C, y, lambdas)
222+
223+
def _solve_loo_i_beta(i, X, C, y, lambdas):
224+
return solve_loo_i(X, C, y, i, lambdas)[0]
225+
226+
227+
def _set_sequential_mkl():
228+
import os
229+
try:
230+
import mkl
231+
mkl.set_num_threads(1)
232+
except ImportError:
233+
os.environ['MKL_NUM_THREADS'] = '1'
234+
os.environ['OMP_NUM_THREADS'] = '1'
235+
236+
237+
def solve_loo(X, C, y, progress=False):
238+
"""Solves the leave-one-out problem for each observation.
239+
240+
This function makes use of python multi-processing in order
241+
to accelerate the computation across all the cores.
242+
"""
243+
_, lambdas = solve_standard(X, C, y)
244+
245+
ctx = multiprocessing.get_context('spawn')
246+
247+
with ctx.Pool(initializer=_set_sequential_mkl) as pool:
248+
result = pool.imap(functools.partial(_solve_loo_i_beta, X=X, C=C, y=y, lambdas=lambdas), range(X.shape[0]))
249+
if progress:
250+
result = tqdm.tqdm(result, total=X.shape[0])
251+
result = list(result)
252+
253+
return np.stack(result, axis=0), lambdas
254+
255+
256+
257+
# The functions below are simply helper functions which implement the same functionality for the LASSO (not the C-LASSO)
258+
# They are mostly intended for debugging and do not need to be integrated.
259+
260+
def solve_lasso(X, y, lambdas=None):
261+
lambdas, betas, _ = sklearn.linear_model.lasso_path(X, y, intercept=False, lambdas=lambdas)
262+
return lambdas, betas.T
263+
264+
265+
def alo_lasso_h(X, y, beta, tol=1e-4):
266+
E = np.abs(beta) > tol
267+
if E.sum() == 0:
268+
return y - X @ beta, np.zeros(X.shape[0])
269+
270+
X = X[:, E]
271+
272+
K = X.T @ X
273+
H = X @ scipy.linalg.solve(K, X.T, assume_a='pos')
274+
275+
h = np.diag(H)
276+
return (y - X @ beta[E]) / (1 - h), h
277+
278+
279+
def alo_lasso_risk(X, y, betas):
280+
mse = np.empty(len(betas))
281+
df = np.empty(len(betas))
282+
283+
for i, beta in enumerate(betas):
284+
res, h = alo_lasso_h(X, y, beta)
285+
df[i] = np.mean(h)
286+
mse[i] = np.mean(np.square(res))
287+
288+
return mse, df
289+
290+
291+
def _lasso_loo(i, X, y, lambdas):
292+
X_i = np.concatenate((X[:i], X[i+1:]))
293+
y_i = np.concatenate((y[:i], y[i+1:]))
294+
return solve_lasso(X_i, y_i, lambdas)[1]
295+
296+
297+
def solve_lasso_loo(X, y, lambdas=None, progress=False):
298+
if lambdas is None:
299+
lambdas, _ = solve_lasso(X, y)
300+
301+
with multiprocessing.Pool(initializer=_set_sequential_mkl) as pool:
302+
result = pool.imap(functools.partial(_lasso_loo, X=X, y=y, lambdas=lambdas), range(X.shape[0]))
303+
if progress:
304+
result = tqdm.tqdm(result, total=X.shape[0])
305+
result = list(result)
306+
307+
return lambdas, np.stack(result, axis=0)
308+

0 commit comments

Comments
 (0)