Skip to content

Commit 5c9b6a4

Browse files
authored
Merge pull request statsmodels#5198 from bashtage/fix-corr-nearest-sparse
BUG/TST/CLN: Fix corr nearest factor
2 parents f6ec19d + 15624de commit 5c9b6a4

File tree

2 files changed

+103
-121
lines changed

2 files changed

+103
-121
lines changed

statsmodels/stats/correlation_tools.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,7 @@ def corr_clipped(corr, threshold=1e-15):
154154

155155
def cov_nearest(cov, method='clipped', threshold=1e-15, n_fact=100,
156156
return_all=False):
157-
158-
'''
157+
"""
159158
Find the nearest covariance matrix that is postive (semi-) definite
160159
161160
This leaves the diagonal, i.e. the variance, unchanged
@@ -169,7 +168,7 @@ def cov_nearest(cov, method='clipped', threshold=1e-15, n_fact=100,
169168
used.if "nearest", then ``corr_nearest`` is used
170169
threshold : float
171170
clipping threshold for smallest eigen value, see Notes
172-
nfact : int or float
171+
n_fact : int or float
173172
factor to determine the maximum number of iterations in
174173
``corr_nearest``. See its doc string
175174
return_all : bool
@@ -205,14 +204,13 @@ def cov_nearest(cov, method='clipped', threshold=1e-15, n_fact=100,
205204
--------
206205
corr_nearest
207206
corr_clipped
208-
209-
'''
207+
"""
210208

211209
from statsmodels.stats.moment_helpers import cov2corr, corr2cov
212210
cov_, std_ = cov2corr(cov, return_std=True)
213211
if method == 'clipped':
214212
corr_ = corr_clipped(cov_, threshold=threshold)
215-
elif method == 'nearest':
213+
else: # method == 'nearest'
216214
corr_ = corr_nearest(cov_, threshold=threshold, n_fact=n_fact)
217215

218216
cov_ = corr2cov(corr_, std_)
@@ -604,10 +602,10 @@ def corr_nearest_factor(corr, rank, ctol=1e-6, lam_min=1e-30,
604602
605603
References
606604
----------
607-
R Borsdof, N Higham, M Raydan (2010). Computing a nearest
608-
correlation matrix with factor structure. SIAM J Matrix Anal
609-
Appl, 31:5, 2603-2622.
610-
http://eprints.ma.man.ac.uk/1523/01/covered/MIMS_ep2009_87.pdf
605+
.. [*] R Borsdof, N Higham, M Raydan (2010). Computing a nearest
606+
correlation matrix with factor structure. SIAM J Matrix Anal Appl,
607+
31:5, 2603-2622.
608+
http://eprints.ma.man.ac.uk/1523/01/covered/MIMS_ep2009_87.pdf
611609
612610
Examples
613611
--------
@@ -681,7 +679,8 @@ def func(X):
681679
ir += bs
682680
return fval
683681

684-
rslt = _spg_optim(func, grad, X, _project_correlation_factors)
682+
rslt = _spg_optim(func, grad, X, _project_correlation_factors, ctol=ctol,
683+
lam_min=lam_min, lam_max=lam_max, maxiter=maxiter)
685684
root = rslt.params
686685
diag = 1 - (root**2).sum(1)
687686
soln = FactoredPSDMatrix(diag, root)

statsmodels/stats/tests/test_corrpsd.py

Lines changed: 93 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ class Holder(object):
8787
0.7445177382760834, 0.7766852947049376, 0.8107916469252828, 1.01
8888
]).reshape(6,6, order='F')
8989

90+
9091
def test_corr_psd():
9192
# test positive definite matrix is unchanged
9293
x = np.array([[1, -0.2, -0.9], [-0.2, 1, -0.2], [-0.9, -0.2, 1]])
@@ -122,7 +123,6 @@ def test_nearest(self):
122123
#print evals[0] / 1e-7 - 1
123124
assert_allclose(evals[0], 1e-7, rtol=1e-6)
124125

125-
126126
def test_clipped(self):
127127
x = self.x
128128
res_r = self.res
@@ -190,37 +190,30 @@ def setup_class(cls):
190190
cls.x = x
191191
cls.res = cov1_r
192192

193-
def test_corrpsd_threshold():
193+
194+
@pytest.mark.parametrize('threshold', [0, 1e-15, 1e-10, 1e-6])
195+
def test_corrpsd_threshold(threshold):
194196
x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]])
195197

196-
#print np.linalg.eigvalsh(x)
197-
for threshold in [0, 1e-15, 1e-10, 1e-6]:
198+
y = corr_nearest(x, n_fact=100, threshold=threshold)
199+
evals = np.linalg.eigvalsh(y)
200+
assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)
198201

199-
y = corr_nearest(x, n_fact=100, threshold=threshold)
200-
evals = np.linalg.eigvalsh(y)
201-
#print 'evals', evals, threshold
202-
assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)
202+
y = corr_clipped(x, threshold=threshold)
203+
evals = np.linalg.eigvalsh(y)
204+
assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
203205

204-
y = corr_clipped(x, threshold=threshold)
205-
evals = np.linalg.eigvalsh(y)
206-
#print 'evals', evals, threshold
207-
assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
206+
y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold)
207+
evals = np.linalg.eigvalsh(y)
208+
assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)
208209

209-
y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold)
210-
evals = np.linalg.eigvalsh(y)
211-
#print 'evals', evals, threshold
212-
#print evals[0] / threshold - 1
213-
assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)
210+
y = cov_nearest(x, n_fact=100, threshold=threshold)
211+
evals = np.linalg.eigvalsh(y)
212+
assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
214213

215-
y = cov_nearest(x, n_fact=100, threshold=threshold)
216-
evals = np.linalg.eigvalsh(y)
217-
#print 'evals', evals, threshold
218-
#print evals[0] / threshold - 1
219-
assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
220214

221215
class Test_Factor(object):
222216

223-
224217
def test_corr_nearest_factor_arrpack(self):
225218

226219
# regression results for svds call
@@ -283,74 +276,67 @@ def test_corr_nearest_factor_arrpack(self):
283276
assert_allclose(u, dsign * u2, rtol=1e-6, atol=1e-14)
284277
assert_allclose(s, s2, rtol=1e-6)
285278

286-
287-
def test_corr_nearest_factor(self):
279+
@pytest.mark.parametrize('dm', [1, 2])
280+
def test_corr_nearest_factor(self, dm):
288281

289282
objvals = [np.array([6241.8, 6241.8, 579.4, 264.6, 264.3]),
290283
np.array([2104.9, 2104.9, 710.5, 266.3, 286.1])]
291284

292285
d = 100
293286

294-
for dm in 1,2:
295-
296-
# Construct a test matrix with exact factor structure
297-
X = np.zeros((d,dm), dtype=np.float64)
298-
x = np.linspace(0, 2*np.pi, d)
299-
np.random.seed(10)
300-
for j in range(dm):
301-
X[:,j] = np.sin(x*(j+1)) + 1e-10 * np.random.randn(d)
302-
303-
_project_correlation_factors(X)
304-
assert np.isfinite(X).all()
305-
X *= 0.7
306-
mat = np.dot(X, X.T)
307-
np.fill_diagonal(mat, 1.)
308-
309-
# Try to recover the structure
310-
rslt = corr_nearest_factor(mat, dm)
311-
err_msg = 'rank=%d, niter=%d' % (dm, len(rslt.objective_values))
312-
assert_allclose(rslt.objective_values[:5], objvals[dm - 1],
313-
rtol=0.5, err_msg=err_msg)
314-
assert_equal(rslt.Converged, True, err_msg=err_msg)
315-
mat1 = rslt.corr.to_matrix()
316-
assert_allclose(mat, mat1, rtol=0.25, atol=1e-3, err_msg=err_msg)
317-
287+
# Construct a test matrix with exact factor structure
288+
X = np.zeros((d, dm), dtype=np.float64)
289+
x = np.linspace(0, 2 * np.pi, d)
290+
np.random.seed(10)
291+
for j in range(dm):
292+
X[:, j] = np.sin(x * (j + 1)) + 1e-10 * np.random.randn(d)
318293

319-
# Test that we get the same result if the input is dense or sparse
320-
def test_corr_nearest_factor_sparse(self):
294+
_project_correlation_factors(X)
295+
assert np.isfinite(X).all()
296+
X *= 0.7
297+
mat = np.dot(X, X.T)
298+
np.fill_diagonal(mat, 1.)
321299

300+
# Try to recover the structure
301+
rslt = corr_nearest_factor(mat, dm, maxiter=10000)
302+
err_msg = 'rank=%d, niter=%d' % (dm, len(rslt.objective_values))
303+
assert_allclose(rslt.objective_values[:5], objvals[dm - 1],
304+
rtol=0.5, err_msg=err_msg)
305+
assert_equal(rslt.Converged, True, err_msg=err_msg)
306+
mat1 = rslt.corr.to_matrix()
307+
assert_allclose(mat, mat1, rtol=0.25, atol=1e-3, err_msg=err_msg)
308+
309+
@pytest.mark.parametrize('dm', [1, 2])
310+
def test_corr_nearest_factor_sparse(self, dm):
311+
# Test that result is the same if the input is dense or sparse
322312
d = 100
323313

324-
for dm in 1,2:
325-
326-
# Generate a test matrix of factors
327-
X = np.zeros((d,dm), dtype=np.float64)
328-
x = np.linspace(0, 2*np.pi, d)
329-
np.random.seed(10)
330-
for j in range(dm):
331-
X[:,j] = np.sin(x*(j+1)) + 1e-10 * np.random.randn(d)
332-
333-
# Get the correlation matrix
334-
_project_correlation_factors(X)
335-
X *= 0.7
336-
mat = np.dot(X, X.T)
337-
np.fill_diagonal(mat, 1)
314+
# Generate a test matrix of factors
315+
X = np.zeros((d, dm), dtype=np.float64)
316+
x = np.linspace(0, 2 * np.pi, d)
317+
np.random.seed(10)
318+
for j in range(dm):
319+
X[:, j] = np.sin(x * (j + 1)) + 1e-10 * np.random.randn(d)
338320

339-
# Threshold it
340-
mat *= (np.abs(mat) >= 0.35)
341-
smat = sparse.csr_matrix(mat)
321+
# Get the correlation matrix
322+
_project_correlation_factors(X)
323+
X *= 0.7
324+
mat = np.dot(X, X.T)
325+
np.fill_diagonal(mat, 1)
342326

343-
rslt = corr_nearest_factor(smat, dm)
344-
assert_equal(rslt.Converged, True)
345-
mat_dense = rslt.corr.to_matrix()
327+
# Threshold it
328+
mat *= (np.abs(mat) >= 0.35)
329+
smat = sparse.csr_matrix(mat)
346330

347-
rslt = corr_nearest_factor(smat, dm)
348-
assert_equal(rslt.Converged, True)
349-
mat_sparse = rslt.corr.to_matrix()
331+
rslt = corr_nearest_factor(mat, dm, maxiter=10000)
332+
assert rslt.Converged is True
333+
mat_dense = rslt.corr.to_matrix()
350334

351-
assert_allclose(mat_dense, mat_sparse, rtol=0.25,
352-
atol=1e-3)
335+
rslt = corr_nearest_factor(smat, dm, maxiter=10000)
336+
assert rslt.Converged is True
337+
mat_sparse = rslt.corr.to_matrix()
353338

339+
assert_allclose(mat_dense, mat_sparse, rtol=.25, atol=1e-3)
354340

355341
# Test on a quadratic function.
356342
def test_spg_optim(self):
@@ -419,52 +405,49 @@ def test_solve(self):
419405
sr2 = np.linalg.solve(mat, rhs)
420406
assert_almost_equal(sr1, sr2)
421407

422-
def test_cov_nearest_factor_homog(self):
408+
@pytest.mark.parametrize('dm', [1, 2])
409+
def test_cov_nearest_factor_homog(self, dm):
423410

424411
d = 100
425412

426-
for dm in 1,2:
427-
428-
# Construct a test matrix with exact factor structure
429-
X = np.zeros((d,dm), dtype=np.float64)
430-
x = np.linspace(0, 2*np.pi, d)
431-
for j in range(dm):
432-
X[:,j] = np.sin(x*(j+1))
433-
mat = np.dot(X, X.T)
434-
np.fill_diagonal(mat, np.diag(mat) + 3.1)
435-
436-
# Try to recover the structure
437-
rslt = cov_nearest_factor_homog(mat, dm)
438-
mat1 = rslt.to_matrix()
413+
# Construct a test matrix with exact factor structure
414+
X = np.zeros((d,dm), dtype=np.float64)
415+
x = np.linspace(0, 2*np.pi, d)
416+
for j in range(dm):
417+
X[:,j] = np.sin(x*(j+1))
418+
mat = np.dot(X, X.T)
419+
np.fill_diagonal(mat, np.diag(mat) + 3.1)
439420

440-
assert_allclose(mat, mat1, rtol=0.25, atol=1e-3)
421+
# Try to recover the structure
422+
rslt = cov_nearest_factor_homog(mat, dm)
423+
mat1 = rslt.to_matrix()
441424

425+
assert_allclose(mat, mat1, rtol=0.25, atol=1e-3)
442426

443-
# Check that dense and sparse inputs give the same result
444-
def test_cov_nearest_factor_homog_sparse(self):
427+
@pytest.mark.parametrize('dm', [1, 2])
428+
def test_cov_nearest_factor_homog_sparse(self, dm):
429+
# Check that dense and sparse inputs give the same result
445430

446431
d = 100
447432

448-
for dm in 1,2:
449-
450-
# Construct a test matrix with exact factor structure
451-
X = np.zeros((d,dm), dtype=np.float64)
452-
x = np.linspace(0, 2*np.pi, d)
453-
for j in range(dm):
454-
X[:,j] = np.sin(x*(j+1))
455-
mat = np.dot(X, X.T)
456-
np.fill_diagonal(mat, np.diag(mat) + 3.1)
433+
# Construct a test matrix with exact factor structure
434+
X = np.zeros((d,dm), dtype=np.float64)
435+
x = np.linspace(0, 2*np.pi, d)
436+
for j in range(dm):
437+
X[:,j] = np.sin(x*(j+1))
438+
mat = np.dot(X, X.T)
439+
np.fill_diagonal(mat, np.diag(mat) + 3.1)
457440

458-
# Fit to dense
459-
rslt = cov_nearest_factor_homog(mat, dm)
460-
mat1 = rslt.to_matrix()
441+
# Fit to dense
442+
rslt = cov_nearest_factor_homog(mat, dm)
443+
mat1 = rslt.to_matrix()
461444

462-
# Fit to sparse
463-
smat = sparse.csr_matrix(mat)
464-
rslt = cov_nearest_factor_homog(smat, dm)
465-
mat2 = rslt.to_matrix()
445+
# Fit to sparse
446+
smat = sparse.csr_matrix(mat)
447+
rslt = cov_nearest_factor_homog(smat, dm)
448+
mat2 = rslt.to_matrix()
466449

467-
assert_allclose(mat1, mat2, rtol=0.25, atol=1e-3)
450+
assert_allclose(mat1, mat2, rtol=0.25, atol=1e-3)
468451

469452
def test_corr_thresholded(self):
470453

0 commit comments

Comments
 (0)