Skip to content

Commit b1de8b7

Browse files
authored
Merge pull request statsmodels#8782 from aglebov/plot_ccf
ENH/TST: ccf to optionally return confidence intervals
2 parents d99bce9 + 299bd60 commit b1de8b7

File tree

3 files changed

+91
-9
lines changed

3 files changed

+91
-9
lines changed

statsmodels/tsa/stattools.py

+38-9
Original file line numberDiff line numberDiff line change
@@ -1045,14 +1045,14 @@ def pacf(x, nlags=None, method="ywadjusted", alpha=None):
10451045
@deprecate_kwarg("unbiased", "adjusted")
10461046
def ccovf(x, y, adjusted=True, demean=True, fft=True):
10471047
"""
1048-
Calculate the crosscovariance between two series.
1048+
Calculate the cross-covariance between two series.
10491049
10501050
Parameters
10511051
----------
10521052
x, y : array_like
10531053
The time series data to use in the calculation.
10541054
adjusted : bool, optional
1055-
If True, then denominators for crosscovariance is n-k, otherwise n.
1055+
If True, then denominators for cross-covariance are n-k, otherwise n.
10561056
demean : bool, optional
10571057
Flag indicating whether to demean x and y.
10581058
fft : bool, default True
@@ -1062,7 +1062,9 @@ def ccovf(x, y, adjusted=True, demean=True, fft=True):
10621062
Returns
10631063
-------
10641064
ndarray
1065-
The estimated crosscovariance function.
1065+
The estimated cross-covariance function: the element at index k
1066+
is the covariance between {x[k], x[k+1], ..., x[n]} and {y[0], y[1], ..., y[m-k]},
1067+
where n and m are the lengths of x and y, respectively.
10661068
"""
10671069
x = array_like(x, "x")
10681070
y = array_like(y, "y")
@@ -1083,11 +1085,11 @@ def ccovf(x, y, adjusted=True, demean=True, fft=True):
10831085
d = n
10841086

10851087
method = "fft" if fft else "direct"
1086-
return correlate(xo, yo, "full", method=method)[n - 1 :] / d
1088+
return correlate(xo, yo, "full", method=method)[n - 1:] / d
10871089

10881090

10891091
@deprecate_kwarg("unbiased", "adjusted")
1090-
def ccf(x, y, adjusted=True, fft=True):
1092+
def ccf(x, y, adjusted=True, fft=True, *, nlags=None, alpha=None):
10911093
"""
10921094
The cross-correlation function.
10931095
@@ -1096,27 +1098,54 @@ def ccf(x, y, adjusted=True, fft=True):
10961098
x, y : array_like
10971099
The time series data to use in the calculation.
10981100
adjusted : bool
1099-
If True, then denominators for cross-correlation is n-k, otherwise n.
1101+
If True, then denominators for cross-correlation are n-k, otherwise n.
11001102
fft : bool, default True
11011103
If True, use FFT convolution. This method should be preferred
11021104
for long time series.
1105+
nlags : int, optional
1106+
Number of lags to return cross-correlations for. If not provided,
1107+
the number of lags equals len(x).
1108+
alpha : float, optional
1109+
If a number is given, the confidence intervals for the given level are
1110+
returned. For instance if alpha=.05, 95 % confidence intervals are
1111+
returned where the standard deviation is computed according to
1112+
1/sqrt(len(x)).
11031113
11041114
Returns
11051115
-------
11061116
ndarray
1107-
The cross-correlation function of x and y.
1117+
The cross-correlation function of x and y: the element at index k
1118+
is the correlation between {x[k], x[k+1], ..., x[n]} and {y[0], y[1], ..., y[m-k]},
1119+
where n and m are the lengths of x and y, respectively.
1120+
confint : ndarray, optional
1121+
Confidence intervals for the CCF at lags 0, 1, ..., nlags-1 using the level given by
1122+
alpha and the standard deviation calculated as 1/sqrt(len(x)) [1]. Shape (nlags, 2).
1123+
Returned if alpha is not None.
11081124
11091125
Notes
11101126
-----
1111-
If adjusted is true, the denominator for the autocovariance is adjusted.
1127+
If adjusted is True, the denominator for the cross-correlation is adjusted.
1128+
1129+
References
1130+
----------
1131+
.. [1] Brockwell and Davis, 2016. Introduction to Time Series and
1132+
Forecasting, 3rd edition, p. 242.
11121133
"""
11131134
x = array_like(x, "x")
11141135
y = array_like(y, "y")
11151136
adjusted = bool_like(adjusted, "adjusted")
11161137
fft = bool_like(fft, "fft", optional=False)
11171138

11181139
cvf = ccovf(x, y, adjusted=adjusted, demean=True, fft=fft)
1119-
return cvf / (np.std(x) * np.std(y))
1140+
ret = cvf / (np.std(x) * np.std(y))
1141+
ret = ret[:nlags]
1142+
1143+
if alpha is not None:
1144+
interval = stats.norm.ppf(1.0 - alpha / 2.0) / np.sqrt(len(x))
1145+
confint = ret.reshape(-1, 1) + interval * np.array([-1, 1])
1146+
return ret, confint
1147+
else:
1148+
return ret
11201149

11211150

11221151
# moved from sandbox.tsa.examples.try_ld_nitime, via nitime
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
ccf
2+
-0.143209851132143
3+
-0.137214062489963
4+
0.0112277082183379
5+
-0.112707803670746
6+
0.0257215540372298
7+
0.0887760977452586
8+
0.0599704040558258
9+
0.0125061572089355
10+
0.105920661816752
11+
0.0986774966768683
12+
-0.0255492787688061
13+
0.0565448657444209
14+
-0.00333420978987776
15+
0.0292053767750115
16+
0.0245959478684983
17+
0.0264416873143308
18+
0.0366034369179944
19+
-0.0118566255778657
20+
-0.0528090477545778
21+
-0.0238963002464966

statsmodels/tsa/tests/test_stattools.py

+32
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pandas import DataFrame, Series, date_range
1919
import pytest
2020
from scipy.interpolate import interp1d
21+
from scipy import stats
2122

2223
from statsmodels.datasets import macrodata, modechoice, nile, randhie, sunspots
2324
from statsmodels.tools.sm_exceptions import (
@@ -38,6 +39,7 @@
3839
arma_order_select_ic,
3940
breakvar_heteroskedasticity_test,
4041
ccovf,
42+
ccf,
4143
coint,
4244
grangercausalitytests,
4345
innovations_algo,
@@ -365,6 +367,36 @@ def test_burg(self):
365367
pacfburg = pacf(self.x, nlags=40, method="burg")
366368
assert_almost_equal(pacfburg_, pacfburg, DECIMAL_8)
367369

370+
371+
class TestCCF:
372+
"""
373+
Test cross-correlation function
374+
"""
375+
376+
data = macrodata.load_pandas()
377+
x = data.data["unemp"].diff().dropna()
378+
y = data.data["infl"].diff().dropna()
379+
filename = os.path.join(CURR_DIR, "results", "results_ccf.csv")
380+
results = pd.read_csv(filename, delimiter=",")
381+
nlags = 20
382+
383+
@classmethod
384+
def setup_class(cls):
385+
cls.ccf = cls.results['ccf']
386+
cls.res1 = ccf(cls.x, cls.y, nlags=cls.nlags, adjusted=False, fft=False)
387+
388+
def test_ccf(self):
389+
assert_almost_equal(self.res1, self.ccf, DECIMAL_8)
390+
391+
def test_confint(self):
392+
alpha = 0.05
393+
res2, confint = ccf(self.x, self.y, nlags=self.nlags, adjusted=False, fft=False, alpha=alpha)
394+
assert_equal(res2, self.res1)
395+
assert_almost_equal(res2 - confint[:, 0], confint[:, 1] - res2, DECIMAL_8)
396+
alpha1 = stats.norm.cdf(confint[:, 1] - res2, scale=1.0 / np.sqrt(len(self.x)))
397+
assert_almost_equal(alpha1, np.repeat(1 - alpha / 2.0, self.nlags), DECIMAL_8)
398+
399+
368400
class TestBreakvarHeteroskedasticityTest:
369401
from scipy.stats import chi2, f
370402

0 commit comments

Comments
 (0)