11"""Utility functions for implementing and testing out ALO for c-lasso.
22"""
33
4- import functools
5- from typing import Tuple
6-
7- import multiprocessing
84import numpy as np
95import scipy .linalg
10- import tqdm
11- import sklearn .linear_model
12-
13- from classo import classo_problem
14- from classo .solve_R1 import pathlasso_R1 , problem_R1
156
167
17- def generate_data (n , p , k , d , sigma = 1 , seed = None ):
18- """Generate random c-lasso problem.
19-
20- Parameters
21- ----------
22- n : int
23- Number of observations
24- p : int
25- Number of parameters
26- k : int
27- Number of ground truth non-zero parameters.
28- d : int
29- Number of constraints
30- sigma : float
31- Standard deviation of additive noise.
32- seed : int, optional
33- Optional integer used to seed the random number generator
34- for reproducibility.
35- """
36- rng = np .random .Generator (np .random .Philox (seed ))
37-
38- X = rng .normal (scale = 1 / np .sqrt (k ), size = (n , p ))
39- C = rng .normal (size = (d , p ))
40- beta_nz = np .ones (k )
41- C_k = C [:, :k ]
42-
43- # ensure that beta verifies the constraint by projecting.
44- beta_nz = beta_nz - C_k .T @ scipy .linalg .lstsq (C_k .T , beta_nz )[0 ]
45- beta_nz /= np .mean (beta_nz ** 2 )
46- beta = np .concatenate ((beta_nz , np .zeros (p - k )))
47-
48- eps = rng .normal (scale = sigma , size = (n ,))
49-
50- y = X @ beta + eps
51- return (X , C , y ), beta
52-
538
549def solve_cls (X , y , C ):
5510 """Solve the constrained least-squares problem.
@@ -134,7 +89,7 @@ def alo_cls_h(X: np.ndarray, C: np.ndarray) -> np.ndarray:
13489
13590def alo_h (
13691 X : np .ndarray , beta : np .ndarray , y : np .ndarray , C : np .ndarray
137- ) -> Tuple [ np . ndarray , np . ndarray ] :
92+ ):
13893 """Computes the ALO leverage and residual for the c-lasso.
13994
14095 Due to its L1 structure, the ALO for the constrained lasso corresponds
@@ -175,7 +130,7 @@ def alo_h(
175130
176131def alo_classo_risk (
177132 X : np .ndarray , C : np .ndarray , y : np .ndarray , betas : np .ndarray
178- ) -> Tuple [ np . ndarray , np . ndarray ] :
133+ ):
179134 """Computes the ALO risk for the c-lasso at the given estimates.
180135
181136 Parameters
@@ -210,8 +165,63 @@ def alo_classo_risk(
210165 return mse , df
211166
212167
168+
169+ """
170+ Not used for now.
171+ import functools
172+ from typing import Tuple
173+
174+ import multiprocessing
175+ import numpy as np
176+ import scipy.linalg
177+ import tqdm
178+ import sklearn.linear_model
179+
180+ from classo import classo_problem
181+ from classo.solve_R1 import pathlasso_R1, problem_R1
182+
183+
184+
185+
186+ def generate_data(n, p, k, d, sigma=1, seed=None):
187+ ""Generate random c-lasso problem.
188+
189+ Parameters
190+ ----------
191+ n : int
192+ Number of observations
193+ p : int
194+ Number of parameters
195+ k : int
196+ Number of ground truth non-zero parameters.
197+ d : int
198+ Number of constraints
199+ sigma : float
200+ Standard deviation of additive noise.
201+ seed : int, optional
202+ Optional integer used to seed the random number generator
203+ for reproducibility.
204+ ""
205+ rng = np.random.Generator(np.random.Philox(seed))
206+
207+ X = rng.normal(scale=1 / np.sqrt(k), size=(n, p))
208+ C = rng.normal(size=(d, p))
209+ beta_nz = np.ones(k)
210+ C_k = C[:, :k]
211+
212+ # ensure that beta verifies the constraint by projecting.
213+ beta_nz = beta_nz - C_k.T @ scipy.linalg.lstsq(C_k.T, beta_nz)[0]
214+ beta_nz /= np.mean(beta_nz ** 2)
215+ beta = np.concatenate((beta_nz, np.zeros(p - k)))
216+
217+ eps = rng.normal(scale=sigma, size=(n,))
218+
219+ y = X @ beta + eps
220+ return (X, C, y), beta
221+
222+
213223def solve_standard(X, C, y, lambdas=None):
214- """ Utility function to solve standard c-lasso formulation." ""
224+ ""Utility function to solve standard c-lasso formulation.""
215225 problem = problem_R1((X, C, y), "Path-Alg")
216226 problem.tol = 1e-6
217227
@@ -226,6 +236,26 @@ def solve_standard(X, C, y, lambdas=None):
226236 beta = pathlasso_R1(problem, lambdas)
227237 return np.array(beta), lambdas * problem.lambdamax
228238
239+ def solve_loo(X, C, y):
240+ ""Solves the leave-one-out problem for each observation.
241+
242+ This function makes use of python multi-processing in order
243+ to accelerate the computation across all the cores.
244+ ""
245+ _, lambdas = solve_standard(X, C, y)
246+
247+ ctx = multiprocessing.get_context("spawn")
248+
249+ with ctx.Pool(initializer=_set_sequential_mkl) as pool:
250+ result = pool.imap(
251+ functools.partial(_solve_loo_i_beta, X=X, C=C, y=y, lambdas=lambdas),
252+ range(X.shape[0]),
253+ )
254+
255+ result = list(result)
256+
257+ return np.stack(result, axis=0), lambdas
258+
229259
230260def solve_loo_i(X, C, y, i, lambdas):
231261 X = np.concatenate((X[:i], X[i + 1 :]))
@@ -249,28 +279,10 @@ def _set_sequential_mkl():
249279 os.environ["OMP_NUM_THREADS"] = "1"
250280
251281
252- def solve_loo (X , C , y , progress = False ):
253- """Solves the leave-one-out problem for each observation.
254-
255- This function makes use of python multi-processing in order
256- to accelerate the computation across all the cores.
257- """
258- _ , lambdas = solve_standard (X , C , y )
259-
260- ctx = multiprocessing .get_context ("spawn" )
261-
262- with ctx .Pool (initializer = _set_sequential_mkl ) as pool :
263- result = pool .imap (
264- functools .partial (_solve_loo_i_beta , X = X , C = C , y = y , lambdas = lambdas ),
265- range (X .shape [0 ]),
266- )
267- if progress :
268- result = tqdm .tqdm (result , total = X .shape [0 ])
269- result = list (result )
270-
271- return np .stack (result , axis = 0 ), lambdas
282+ """
272283
273284
285+ """
274286# The functions below are simply helper functions which implement the same functionality for the LASSO (not the C-LASSO)
275287# They are mostly intended for debugging and do not need to be integrated.
276288
@@ -327,3 +339,4 @@ def solve_lasso_loo(X, y, lambdas=None, progress=False):
327339 result = list(result)
328340
329341 return lambdas, np.stack(result, axis=0)
342+ """
0 commit comments