1
1
"""Utility functions for implementing and testing out ALO for c-lasso.
2
2
"""
3
3
4
- import functools
5
- from typing import Tuple
6
-
7
- import multiprocessing
8
4
import numpy as np
9
5
import scipy .linalg
10
- import tqdm
11
- import sklearn .linear_model
12
-
13
- from classo import classo_problem
14
- from classo .solve_R1 import pathlasso_R1 , problem_R1
15
6
16
7
17
- def generate_data (n , p , k , d , sigma = 1 , seed = None ):
18
- """Generate random c-lasso problem.
19
-
20
- Parameters
21
- ----------
22
- n : int
23
- Number of observations
24
- p : int
25
- Number of parameters
26
- k : int
27
- Number of ground truth non-zero parameters.
28
- d : int
29
- Number of constraints
30
- sigma : float
31
- Standard deviation of additive noise.
32
- seed : int, optional
33
- Optional integer used to seed the random number generator
34
- for reproducibility.
35
- """
36
- rng = np .random .Generator (np .random .Philox (seed ))
37
-
38
- X = rng .normal (scale = 1 / np .sqrt (k ), size = (n , p ))
39
- C = rng .normal (size = (d , p ))
40
- beta_nz = np .ones (k )
41
- C_k = C [:, :k ]
42
-
43
- # ensure that beta verifies the constraint by projecting.
44
- beta_nz = beta_nz - C_k .T @ scipy .linalg .lstsq (C_k .T , beta_nz )[0 ]
45
- beta_nz /= np .mean (beta_nz ** 2 )
46
- beta = np .concatenate ((beta_nz , np .zeros (p - k )))
47
-
48
- eps = rng .normal (scale = sigma , size = (n ,))
49
-
50
- y = X @ beta + eps
51
- return (X , C , y ), beta
52
-
53
8
54
9
def solve_cls (X , y , C ):
55
10
"""Solve the constrained least-squares problem.
@@ -134,7 +89,7 @@ def alo_cls_h(X: np.ndarray, C: np.ndarray) -> np.ndarray:
134
89
135
90
def alo_h (
136
91
X : np .ndarray , beta : np .ndarray , y : np .ndarray , C : np .ndarray
137
- ) -> Tuple [ np . ndarray , np . ndarray ] :
92
+ ):
138
93
"""Computes the ALO leverage and residual for the c-lasso.
139
94
140
95
Due to its L1 structure, the ALO for the constrained lasso corresponds
@@ -175,7 +130,7 @@ def alo_h(
175
130
176
131
def alo_classo_risk (
177
132
X : np .ndarray , C : np .ndarray , y : np .ndarray , betas : np .ndarray
178
- ) -> Tuple [ np . ndarray , np . ndarray ] :
133
+ ):
179
134
"""Computes the ALO risk for the c-lasso at the given estimates.
180
135
181
136
Parameters
@@ -210,8 +165,63 @@ def alo_classo_risk(
210
165
return mse , df
211
166
212
167
168
+
169
+ """
170
+ Not used for now.
171
+ import functools
172
+ from typing import Tuple
173
+
174
+ import multiprocessing
175
+ import numpy as np
176
+ import scipy.linalg
177
+ import tqdm
178
+ import sklearn.linear_model
179
+
180
+ from classo import classo_problem
181
+ from classo.solve_R1 import pathlasso_R1, problem_R1
182
+
183
+
184
+
185
+
186
+ def generate_data(n, p, k, d, sigma=1, seed=None):
187
+ ""Generate random c-lasso problem.
188
+
189
+ Parameters
190
+ ----------
191
+ n : int
192
+ Number of observations
193
+ p : int
194
+ Number of parameters
195
+ k : int
196
+ Number of ground truth non-zero parameters.
197
+ d : int
198
+ Number of constraints
199
+ sigma : float
200
+ Standard deviation of additive noise.
201
+ seed : int, optional
202
+ Optional integer used to seed the random number generator
203
+ for reproducibility.
204
+ ""
205
+ rng = np.random.Generator(np.random.Philox(seed))
206
+
207
+ X = rng.normal(scale=1 / np.sqrt(k), size=(n, p))
208
+ C = rng.normal(size=(d, p))
209
+ beta_nz = np.ones(k)
210
+ C_k = C[:, :k]
211
+
212
+ # ensure that beta verifies the constraint by projecting.
213
+ beta_nz = beta_nz - C_k.T @ scipy.linalg.lstsq(C_k.T, beta_nz)[0]
214
+ beta_nz /= np.mean(beta_nz ** 2)
215
+ beta = np.concatenate((beta_nz, np.zeros(p - k)))
216
+
217
+ eps = rng.normal(scale=sigma, size=(n,))
218
+
219
+ y = X @ beta + eps
220
+ return (X, C, y), beta
221
+
222
+
213
223
def solve_standard(X, C, y, lambdas=None):
214
- """ Utility function to solve standard c-lasso formulation." ""
224
+ ""Utility function to solve standard c-lasso formulation.""
215
225
problem = problem_R1((X, C, y), "Path-Alg")
216
226
problem.tol = 1e-6
217
227
@@ -226,6 +236,26 @@ def solve_standard(X, C, y, lambdas=None):
226
236
beta = pathlasso_R1(problem, lambdas)
227
237
return np.array(beta), lambdas * problem.lambdamax
228
238
239
+ def solve_loo(X, C, y):
240
+ ""Solves the leave-one-out problem for each observation.
241
+
242
+ This function makes use of python multi-processing in order
243
+ to accelerate the computation across all the cores.
244
+ ""
245
+ _, lambdas = solve_standard(X, C, y)
246
+
247
+ ctx = multiprocessing.get_context("spawn")
248
+
249
+ with ctx.Pool(initializer=_set_sequential_mkl) as pool:
250
+ result = pool.imap(
251
+ functools.partial(_solve_loo_i_beta, X=X, C=C, y=y, lambdas=lambdas),
252
+ range(X.shape[0]),
253
+ )
254
+
255
+ result = list(result)
256
+
257
+ return np.stack(result, axis=0), lambdas
258
+
229
259
230
260
def solve_loo_i(X, C, y, i, lambdas):
231
261
X = np.concatenate((X[:i], X[i + 1 :]))
@@ -249,28 +279,10 @@ def _set_sequential_mkl():
249
279
os.environ["OMP_NUM_THREADS"] = "1"
250
280
251
281
252
- def solve_loo (X , C , y , progress = False ):
253
- """Solves the leave-one-out problem for each observation.
254
-
255
- This function makes use of python multi-processing in order
256
- to accelerate the computation across all the cores.
257
- """
258
- _ , lambdas = solve_standard (X , C , y )
259
-
260
- ctx = multiprocessing .get_context ("spawn" )
261
-
262
- with ctx .Pool (initializer = _set_sequential_mkl ) as pool :
263
- result = pool .imap (
264
- functools .partial (_solve_loo_i_beta , X = X , C = C , y = y , lambdas = lambdas ),
265
- range (X .shape [0 ]),
266
- )
267
- if progress :
268
- result = tqdm .tqdm (result , total = X .shape [0 ])
269
- result = list (result )
270
-
271
- return np .stack (result , axis = 0 ), lambdas
282
+ """
272
283
273
284
285
+ """
274
286
# The functions below are simply helper functions which implement the same functionality for the LASSO (not the C-LASSO)
275
287
# They are mostly intended for debugging and do not need to be integrated.
276
288
@@ -327,3 +339,4 @@ def solve_lasso_loo(X, y, lambdas=None, progress=False):
327
339
result = list(result)
328
340
329
341
return lambdas, np.stack(result, axis=0)
342
+ """
0 commit comments