Skip to content

Commit ceac4a8

Browse files
TST use global_random_seed in sklearn/decomposition/tests/test_sparse_pca.py (scikit-learn#31213)
Co-authored-by: Jérémie du Boisberranger <[email protected]>
1 parent 5fee5ad commit ceac4a8

File tree

1 file changed

+63
-75
lines changed

1 file changed

+63
-75
lines changed

sklearn/decomposition/tests/test_sparse_pca.py

+63-75
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# Authors: The scikit-learn developers
22
# SPDX-License-Identifier: BSD-3-Clause
33

4-
import sys
54

65
import numpy as np
76
import pytest
87
from numpy.testing import assert_array_equal
98

9+
from sklearn.datasets import make_low_rank_matrix
1010
from sklearn.decomposition import PCA, MiniBatchSparsePCA, SparsePCA
1111
from sklearn.utils import check_random_state
1212
from sklearn.utils._testing import (
@@ -57,48 +57,58 @@ def test_correct_shapes():
5757
assert U.shape == (12, 13)
5858

5959

60-
def test_fit_transform():
60+
def test_fit_transform(global_random_seed):
6161
alpha = 1
62-
rng = np.random.RandomState(0)
62+
rng = np.random.RandomState(global_random_seed)
6363
Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng) # wide array
64-
spca_lars = SparsePCA(n_components=3, method="lars", alpha=alpha, random_state=0)
64+
spca_lars = SparsePCA(
65+
n_components=3, method="lars", alpha=alpha, random_state=global_random_seed
66+
)
6567
spca_lars.fit(Y)
6668

6769
# Test that CD gives similar results
68-
spca_lasso = SparsePCA(n_components=3, method="cd", random_state=0, alpha=alpha)
70+
spca_lasso = SparsePCA(
71+
n_components=3, method="cd", random_state=global_random_seed, alpha=alpha
72+
)
6973
spca_lasso.fit(Y)
7074
assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)
7175

7276

7377
@if_safe_multiprocessing_with_blas
74-
def test_fit_transform_parallel():
78+
def test_fit_transform_parallel(global_random_seed):
7579
alpha = 1
76-
rng = np.random.RandomState(0)
80+
rng = np.random.RandomState(global_random_seed)
7781
Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng) # wide array
78-
spca_lars = SparsePCA(n_components=3, method="lars", alpha=alpha, random_state=0)
82+
spca_lars = SparsePCA(
83+
n_components=3, method="lars", alpha=alpha, random_state=global_random_seed
84+
)
7985
spca_lars.fit(Y)
8086
U1 = spca_lars.transform(Y)
8187
# Test multiple CPUs
8288
spca = SparsePCA(
83-
n_components=3, n_jobs=2, method="lars", alpha=alpha, random_state=0
89+
n_components=3,
90+
n_jobs=2,
91+
method="lars",
92+
alpha=alpha,
93+
random_state=global_random_seed,
8494
).fit(Y)
8595
U2 = spca.transform(Y)
8696
assert not np.all(spca_lars.components_ == 0)
8797
assert_array_almost_equal(U1, U2)
8898

8999

90-
def test_transform_nan():
100+
def test_transform_nan(global_random_seed):
91101
# Test that SparsePCA won't return NaN when there is 0 feature in all
92102
# samples.
93-
rng = np.random.RandomState(0)
103+
rng = np.random.RandomState(global_random_seed)
94104
Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng) # wide array
95105
Y[:, 0] = 0
96-
estimator = SparsePCA(n_components=8)
106+
estimator = SparsePCA(n_components=8, random_state=global_random_seed)
97107
assert not np.any(np.isnan(estimator.fit_transform(Y)))
98108

99109

100-
def test_fit_transform_tall():
101-
rng = np.random.RandomState(0)
110+
def test_fit_transform_tall(global_random_seed):
111+
rng = np.random.RandomState(global_random_seed)
102112
Y, _, _ = generate_toy_data(3, 65, (8, 8), random_state=rng) # tall array
103113
spca_lars = SparsePCA(n_components=3, method="lars", random_state=rng)
104114
U1 = spca_lars.fit_transform(Y)
@@ -107,8 +117,8 @@ def test_fit_transform_tall():
107117
assert_array_almost_equal(U1, U2)
108118

109119

110-
def test_initialization():
111-
rng = np.random.RandomState(0)
120+
def test_initialization(global_random_seed):
121+
rng = np.random.RandomState(global_random_seed)
112122
U_init = rng.randn(5, 3)
113123
V_init = rng.randn(3, 4)
114124
model = SparsePCA(
@@ -135,65 +145,32 @@ def test_mini_batch_correct_shapes():
135145
assert U.shape == (12, 13)
136146

137147

138-
# XXX: test always skipped
139-
@pytest.mark.skipif(True, reason="skipping mini_batch_fit_transform.")
140-
def test_mini_batch_fit_transform():
141-
alpha = 1
142-
rng = np.random.RandomState(0)
143-
Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng) # wide array
144-
spca_lars = MiniBatchSparsePCA(n_components=3, random_state=0, alpha=alpha).fit(Y)
145-
U1 = spca_lars.transform(Y)
146-
# Test multiple CPUs
147-
if sys.platform == "win32": # fake parallelism for win32
148-
import joblib
149-
150-
_mp = joblib.parallel.multiprocessing
151-
joblib.parallel.multiprocessing = None
152-
try:
153-
spca = MiniBatchSparsePCA(
154-
n_components=3, n_jobs=2, alpha=alpha, random_state=0
155-
)
156-
U2 = spca.fit(Y).transform(Y)
157-
finally:
158-
joblib.parallel.multiprocessing = _mp
159-
else: # we can efficiently use parallelism
160-
spca = MiniBatchSparsePCA(n_components=3, n_jobs=2, alpha=alpha, random_state=0)
161-
U2 = spca.fit(Y).transform(Y)
162-
assert not np.all(spca_lars.components_ == 0)
163-
assert_array_almost_equal(U1, U2)
164-
# Test that CD gives similar results
165-
spca_lasso = MiniBatchSparsePCA(
166-
n_components=3, method="cd", alpha=alpha, random_state=0
167-
).fit(Y)
168-
assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)
169-
170-
171-
def test_scaling_fit_transform():
148+
def test_scaling_fit_transform(global_random_seed):
172149
alpha = 1
173-
rng = np.random.RandomState(0)
150+
rng = np.random.RandomState(global_random_seed)
174151
Y, _, _ = generate_toy_data(3, 1000, (8, 8), random_state=rng)
175152
spca_lars = SparsePCA(n_components=3, method="lars", alpha=alpha, random_state=rng)
176153
results_train = spca_lars.fit_transform(Y)
177154
results_test = spca_lars.transform(Y[:10])
178155
assert_allclose(results_train[0], results_test[0])
179156

180157

181-
def test_pca_vs_spca():
182-
rng = np.random.RandomState(0)
158+
def test_pca_vs_spca(global_random_seed):
159+
rng = np.random.RandomState(global_random_seed)
183160
Y, _, _ = generate_toy_data(3, 1000, (8, 8), random_state=rng)
184161
Z, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)
185-
spca = SparsePCA(alpha=0, ridge_alpha=0, n_components=2)
186-
pca = PCA(n_components=2)
162+
spca = SparsePCA(alpha=0, ridge_alpha=0, n_components=2, random_state=rng)
163+
pca = PCA(n_components=2, random_state=rng)
187164
pca.fit(Y)
188165
spca.fit(Y)
189166
results_test_pca = pca.transform(Z)
190167
results_test_spca = spca.transform(Z)
191168
assert_allclose(
192-
np.abs(spca.components_.dot(pca.components_.T)), np.eye(2), atol=1e-5
169+
np.abs(spca.components_.dot(pca.components_.T)), np.eye(2), atol=1e-4
193170
)
194171
results_test_pca *= np.sign(results_test_pca[0, :])
195172
results_test_spca *= np.sign(results_test_spca[0, :])
196-
assert_allclose(results_test_pca, results_test_spca)
173+
assert_allclose(results_test_pca, results_test_spca, atol=1e-4)
197174

198175

199176
@pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA])
@@ -236,26 +213,31 @@ def test_sparse_pca_dtype_match(SPCA, method, data_type, expected_type):
236213

237214
@pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA))
238215
@pytest.mark.parametrize("method", ("lars", "cd"))
239-
def test_sparse_pca_numerical_consistency(SPCA, method):
216+
def test_sparse_pca_numerical_consistency(SPCA, method, global_random_seed):
240217
# Verify numericall consistentency among np.float32 and np.float64
241-
rtol = 1e-3
242-
alpha = 2
243-
n_samples, n_features, n_components = 12, 10, 3
244-
rng = np.random.RandomState(0)
245-
input_array = rng.randn(n_samples, n_features)
218+
n_samples, n_features, n_components = 20, 20, 5
219+
input_array = make_low_rank_matrix(
220+
n_samples=n_samples,
221+
n_features=n_features,
222+
effective_rank=n_components,
223+
random_state=global_random_seed,
224+
)
246225

247226
model_32 = SPCA(
248-
n_components=n_components, alpha=alpha, method=method, random_state=0
227+
n_components=n_components,
228+
method=method,
229+
random_state=global_random_seed,
249230
)
250231
transformed_32 = model_32.fit_transform(input_array.astype(np.float32))
251232

252233
model_64 = SPCA(
253-
n_components=n_components, alpha=alpha, method=method, random_state=0
234+
n_components=n_components,
235+
method=method,
236+
random_state=global_random_seed,
254237
)
255238
transformed_64 = model_64.fit_transform(input_array.astype(np.float64))
256-
257-
assert_allclose(transformed_64, transformed_32, rtol=rtol)
258-
assert_allclose(model_64.components_, model_32.components_, rtol=rtol)
239+
assert_allclose(transformed_64, transformed_32)
240+
assert_allclose(model_64.components_, model_32.components_)
259241

260242

261243
@pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA])
@@ -324,17 +306,20 @@ def test_equivalence_components_pca_spca(global_random_seed):
324306
assert_allclose(pca.components_, spca.components_)
325307

326308

327-
def test_sparse_pca_inverse_transform():
309+
def test_sparse_pca_inverse_transform(global_random_seed):
328310
"""Check that `inverse_transform` in `SparsePCA` and `PCA` are similar."""
329-
rng = np.random.RandomState(0)
311+
rng = np.random.RandomState(global_random_seed)
330312
n_samples, n_features = 10, 5
331313
X = rng.randn(n_samples, n_features)
332314

333315
n_components = 2
334316
spca = SparsePCA(
335-
n_components=n_components, alpha=1e-12, ridge_alpha=1e-12, random_state=0
317+
n_components=n_components,
318+
alpha=1e-12,
319+
ridge_alpha=1e-12,
320+
random_state=global_random_seed,
336321
)
337-
pca = PCA(n_components=n_components, random_state=0)
322+
pca = PCA(n_components=n_components, random_state=global_random_seed)
338323
X_trans_spca = spca.fit_transform(X)
339324
X_trans_pca = pca.fit_transform(X)
340325
assert_allclose(
@@ -343,17 +328,20 @@ def test_sparse_pca_inverse_transform():
343328

344329

345330
@pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA])
346-
def test_transform_inverse_transform_round_trip(SPCA):
331+
def test_transform_inverse_transform_round_trip(SPCA, global_random_seed):
347332
"""Check the `transform` and `inverse_transform` round trip with no loss of
348333
information.
349334
"""
350-
rng = np.random.RandomState(0)
335+
rng = np.random.RandomState(global_random_seed)
351336
n_samples, n_features = 10, 5
352337
X = rng.randn(n_samples, n_features)
353338

354339
n_components = n_features
355340
spca = SPCA(
356-
n_components=n_components, alpha=1e-12, ridge_alpha=1e-12, random_state=0
341+
n_components=n_components,
342+
alpha=1e-12,
343+
ridge_alpha=1e-12,
344+
random_state=global_random_seed,
357345
)
358346
X_trans_spca = spca.fit_transform(X)
359347
assert_allclose(spca.inverse_transform(X_trans_spca), X)

0 commit comments

Comments
 (0)