1
1
# Authors: The scikit-learn developers
2
2
# SPDX-License-Identifier: BSD-3-Clause
3
3
4
- import sys
5
4
6
5
import numpy as np
7
6
import pytest
8
7
from numpy .testing import assert_array_equal
9
8
9
+ from sklearn .datasets import make_low_rank_matrix
10
10
from sklearn .decomposition import PCA , MiniBatchSparsePCA , SparsePCA
11
11
from sklearn .utils import check_random_state
12
12
from sklearn .utils ._testing import (
@@ -57,48 +57,58 @@ def test_correct_shapes():
57
57
assert U .shape == (12 , 13 )
58
58
59
59
60
- def test_fit_transform ():
60
+ def test_fit_transform (global_random_seed ):
61
61
alpha = 1
62
- rng = np .random .RandomState (0 )
62
+ rng = np .random .RandomState (global_random_seed )
63
63
Y , _ , _ = generate_toy_data (3 , 10 , (8 , 8 ), random_state = rng ) # wide array
64
- spca_lars = SparsePCA (n_components = 3 , method = "lars" , alpha = alpha , random_state = 0 )
64
+ spca_lars = SparsePCA (
65
+ n_components = 3 , method = "lars" , alpha = alpha , random_state = global_random_seed
66
+ )
65
67
spca_lars .fit (Y )
66
68
67
69
# Test that CD gives similar results
68
- spca_lasso = SparsePCA (n_components = 3 , method = "cd" , random_state = 0 , alpha = alpha )
70
+ spca_lasso = SparsePCA (
71
+ n_components = 3 , method = "cd" , random_state = global_random_seed , alpha = alpha
72
+ )
69
73
spca_lasso .fit (Y )
70
74
assert_array_almost_equal (spca_lasso .components_ , spca_lars .components_ )
71
75
72
76
73
77
@if_safe_multiprocessing_with_blas
74
- def test_fit_transform_parallel ():
78
+ def test_fit_transform_parallel (global_random_seed ):
75
79
alpha = 1
76
- rng = np .random .RandomState (0 )
80
+ rng = np .random .RandomState (global_random_seed )
77
81
Y , _ , _ = generate_toy_data (3 , 10 , (8 , 8 ), random_state = rng ) # wide array
78
- spca_lars = SparsePCA (n_components = 3 , method = "lars" , alpha = alpha , random_state = 0 )
82
+ spca_lars = SparsePCA (
83
+ n_components = 3 , method = "lars" , alpha = alpha , random_state = global_random_seed
84
+ )
79
85
spca_lars .fit (Y )
80
86
U1 = spca_lars .transform (Y )
81
87
# Test multiple CPUs
82
88
spca = SparsePCA (
83
- n_components = 3 , n_jobs = 2 , method = "lars" , alpha = alpha , random_state = 0
89
+ n_components = 3 ,
90
+ n_jobs = 2 ,
91
+ method = "lars" ,
92
+ alpha = alpha ,
93
+ random_state = global_random_seed ,
84
94
).fit (Y )
85
95
U2 = spca .transform (Y )
86
96
assert not np .all (spca_lars .components_ == 0 )
87
97
assert_array_almost_equal (U1 , U2 )
88
98
89
99
90
- def test_transform_nan ():
100
+ def test_transform_nan (global_random_seed ):
91
101
# Test that SparsePCA won't return NaN when there is 0 feature in all
92
102
# samples.
93
- rng = np .random .RandomState (0 )
103
+ rng = np .random .RandomState (global_random_seed )
94
104
Y , _ , _ = generate_toy_data (3 , 10 , (8 , 8 ), random_state = rng ) # wide array
95
105
Y [:, 0 ] = 0
96
- estimator = SparsePCA (n_components = 8 )
106
+ estimator = SparsePCA (n_components = 8 , random_state = global_random_seed )
97
107
assert not np .any (np .isnan (estimator .fit_transform (Y )))
98
108
99
109
100
- def test_fit_transform_tall ():
101
- rng = np .random .RandomState (0 )
110
+ def test_fit_transform_tall (global_random_seed ):
111
+ rng = np .random .RandomState (global_random_seed )
102
112
Y , _ , _ = generate_toy_data (3 , 65 , (8 , 8 ), random_state = rng ) # tall array
103
113
spca_lars = SparsePCA (n_components = 3 , method = "lars" , random_state = rng )
104
114
U1 = spca_lars .fit_transform (Y )
@@ -107,8 +117,8 @@ def test_fit_transform_tall():
107
117
assert_array_almost_equal (U1 , U2 )
108
118
109
119
110
- def test_initialization ():
111
- rng = np .random .RandomState (0 )
120
+ def test_initialization (global_random_seed ):
121
+ rng = np .random .RandomState (global_random_seed )
112
122
U_init = rng .randn (5 , 3 )
113
123
V_init = rng .randn (3 , 4 )
114
124
model = SparsePCA (
@@ -135,65 +145,32 @@ def test_mini_batch_correct_shapes():
135
145
assert U .shape == (12 , 13 )
136
146
137
147
138
- # XXX: test always skipped
139
- @pytest .mark .skipif (True , reason = "skipping mini_batch_fit_transform." )
140
- def test_mini_batch_fit_transform ():
141
- alpha = 1
142
- rng = np .random .RandomState (0 )
143
- Y , _ , _ = generate_toy_data (3 , 10 , (8 , 8 ), random_state = rng ) # wide array
144
- spca_lars = MiniBatchSparsePCA (n_components = 3 , random_state = 0 , alpha = alpha ).fit (Y )
145
- U1 = spca_lars .transform (Y )
146
- # Test multiple CPUs
147
- if sys .platform == "win32" : # fake parallelism for win32
148
- import joblib
149
-
150
- _mp = joblib .parallel .multiprocessing
151
- joblib .parallel .multiprocessing = None
152
- try :
153
- spca = MiniBatchSparsePCA (
154
- n_components = 3 , n_jobs = 2 , alpha = alpha , random_state = 0
155
- )
156
- U2 = spca .fit (Y ).transform (Y )
157
- finally :
158
- joblib .parallel .multiprocessing = _mp
159
- else : # we can efficiently use parallelism
160
- spca = MiniBatchSparsePCA (n_components = 3 , n_jobs = 2 , alpha = alpha , random_state = 0 )
161
- U2 = spca .fit (Y ).transform (Y )
162
- assert not np .all (spca_lars .components_ == 0 )
163
- assert_array_almost_equal (U1 , U2 )
164
- # Test that CD gives similar results
165
- spca_lasso = MiniBatchSparsePCA (
166
- n_components = 3 , method = "cd" , alpha = alpha , random_state = 0
167
- ).fit (Y )
168
- assert_array_almost_equal (spca_lasso .components_ , spca_lars .components_ )
169
-
170
-
171
- def test_scaling_fit_transform ():
148
+ def test_scaling_fit_transform (global_random_seed ):
172
149
alpha = 1
173
- rng = np .random .RandomState (0 )
150
+ rng = np .random .RandomState (global_random_seed )
174
151
Y , _ , _ = generate_toy_data (3 , 1000 , (8 , 8 ), random_state = rng )
175
152
spca_lars = SparsePCA (n_components = 3 , method = "lars" , alpha = alpha , random_state = rng )
176
153
results_train = spca_lars .fit_transform (Y )
177
154
results_test = spca_lars .transform (Y [:10 ])
178
155
assert_allclose (results_train [0 ], results_test [0 ])
179
156
180
157
181
- def test_pca_vs_spca ():
182
- rng = np .random .RandomState (0 )
158
+ def test_pca_vs_spca (global_random_seed ):
159
+ rng = np .random .RandomState (global_random_seed )
183
160
Y , _ , _ = generate_toy_data (3 , 1000 , (8 , 8 ), random_state = rng )
184
161
Z , _ , _ = generate_toy_data (3 , 10 , (8 , 8 ), random_state = rng )
185
- spca = SparsePCA (alpha = 0 , ridge_alpha = 0 , n_components = 2 )
186
- pca = PCA (n_components = 2 )
162
+ spca = SparsePCA (alpha = 0 , ridge_alpha = 0 , n_components = 2 , random_state = rng )
163
+ pca = PCA (n_components = 2 , random_state = rng )
187
164
pca .fit (Y )
188
165
spca .fit (Y )
189
166
results_test_pca = pca .transform (Z )
190
167
results_test_spca = spca .transform (Z )
191
168
assert_allclose (
192
- np .abs (spca .components_ .dot (pca .components_ .T )), np .eye (2 ), atol = 1e-5
169
+ np .abs (spca .components_ .dot (pca .components_ .T )), np .eye (2 ), atol = 1e-4
193
170
)
194
171
results_test_pca *= np .sign (results_test_pca [0 , :])
195
172
results_test_spca *= np .sign (results_test_spca [0 , :])
196
- assert_allclose (results_test_pca , results_test_spca )
173
+ assert_allclose (results_test_pca , results_test_spca , atol = 1e-4 )
197
174
198
175
199
176
@pytest .mark .parametrize ("SPCA" , [SparsePCA , MiniBatchSparsePCA ])
@@ -236,26 +213,31 @@ def test_sparse_pca_dtype_match(SPCA, method, data_type, expected_type):
236
213
237
214
@pytest .mark .parametrize ("SPCA" , (SparsePCA , MiniBatchSparsePCA ))
238
215
@pytest .mark .parametrize ("method" , ("lars" , "cd" ))
239
- def test_sparse_pca_numerical_consistency (SPCA , method ):
216
+ def test_sparse_pca_numerical_consistency (SPCA , method , global_random_seed ):
240
217
# Verify numericall consistentency among np.float32 and np.float64
241
- rtol = 1e-3
242
- alpha = 2
243
- n_samples , n_features , n_components = 12 , 10 , 3
244
- rng = np .random .RandomState (0 )
245
- input_array = rng .randn (n_samples , n_features )
218
+ n_samples , n_features , n_components = 20 , 20 , 5
219
+ input_array = make_low_rank_matrix (
220
+ n_samples = n_samples ,
221
+ n_features = n_features ,
222
+ effective_rank = n_components ,
223
+ random_state = global_random_seed ,
224
+ )
246
225
247
226
model_32 = SPCA (
248
- n_components = n_components , alpha = alpha , method = method , random_state = 0
227
+ n_components = n_components ,
228
+ method = method ,
229
+ random_state = global_random_seed ,
249
230
)
250
231
transformed_32 = model_32 .fit_transform (input_array .astype (np .float32 ))
251
232
252
233
model_64 = SPCA (
253
- n_components = n_components , alpha = alpha , method = method , random_state = 0
234
+ n_components = n_components ,
235
+ method = method ,
236
+ random_state = global_random_seed ,
254
237
)
255
238
transformed_64 = model_64 .fit_transform (input_array .astype (np .float64 ))
256
-
257
- assert_allclose (transformed_64 , transformed_32 , rtol = rtol )
258
- assert_allclose (model_64 .components_ , model_32 .components_ , rtol = rtol )
239
+ assert_allclose (transformed_64 , transformed_32 )
240
+ assert_allclose (model_64 .components_ , model_32 .components_ )
259
241
260
242
261
243
@pytest .mark .parametrize ("SPCA" , [SparsePCA , MiniBatchSparsePCA ])
@@ -324,17 +306,20 @@ def test_equivalence_components_pca_spca(global_random_seed):
324
306
assert_allclose (pca .components_ , spca .components_ )
325
307
326
308
327
- def test_sparse_pca_inverse_transform ():
309
+ def test_sparse_pca_inverse_transform (global_random_seed ):
328
310
"""Check that `inverse_transform` in `SparsePCA` and `PCA` are similar."""
329
- rng = np .random .RandomState (0 )
311
+ rng = np .random .RandomState (global_random_seed )
330
312
n_samples , n_features = 10 , 5
331
313
X = rng .randn (n_samples , n_features )
332
314
333
315
n_components = 2
334
316
spca = SparsePCA (
335
- n_components = n_components , alpha = 1e-12 , ridge_alpha = 1e-12 , random_state = 0
317
+ n_components = n_components ,
318
+ alpha = 1e-12 ,
319
+ ridge_alpha = 1e-12 ,
320
+ random_state = global_random_seed ,
336
321
)
337
- pca = PCA (n_components = n_components , random_state = 0 )
322
+ pca = PCA (n_components = n_components , random_state = global_random_seed )
338
323
X_trans_spca = spca .fit_transform (X )
339
324
X_trans_pca = pca .fit_transform (X )
340
325
assert_allclose (
@@ -343,17 +328,20 @@ def test_sparse_pca_inverse_transform():
343
328
344
329
345
330
@pytest .mark .parametrize ("SPCA" , [SparsePCA , MiniBatchSparsePCA ])
346
- def test_transform_inverse_transform_round_trip (SPCA ):
331
+ def test_transform_inverse_transform_round_trip (SPCA , global_random_seed ):
347
332
"""Check the `transform` and `inverse_transform` round trip with no loss of
348
333
information.
349
334
"""
350
- rng = np .random .RandomState (0 )
335
+ rng = np .random .RandomState (global_random_seed )
351
336
n_samples , n_features = 10 , 5
352
337
X = rng .randn (n_samples , n_features )
353
338
354
339
n_components = n_features
355
340
spca = SPCA (
356
- n_components = n_components , alpha = 1e-12 , ridge_alpha = 1e-12 , random_state = 0
341
+ n_components = n_components ,
342
+ alpha = 1e-12 ,
343
+ ridge_alpha = 1e-12 ,
344
+ random_state = global_random_seed ,
357
345
)
358
346
X_trans_spca = spca .fit_transform (X )
359
347
assert_allclose (spca .inverse_transform (X_trans_spca ), X )
0 commit comments