Skip to content

Commit 803b841

Browse files
authored
Merge pull request statsmodels#8783 from aglebov/plot-acf-funcs
ENH: Plot cross-correlations and auto/cross-correlation matrix
2 parents b1de8b7 + 6e636a8 commit 803b841

File tree

2 files changed

+330
-4
lines changed

2 files changed

+330
-4
lines changed

statsmodels/graphics/tests/test_tsaplots.py

+51
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
from statsmodels.datasets import elnino, macrodata
1313
from statsmodels.graphics.tsaplots import (
1414
month_plot,
15+
plot_accf_grid,
1516
plot_acf,
17+
plot_ccf,
1618
plot_pacf,
1719
plot_predict,
1820
quarter_plot,
@@ -190,6 +192,55 @@ def test_plot_pacf_irregular(close_figures):
190192
plot_pacf(pacf, ax=ax, alpha=None, zero=False)
191193

192194

195+
@pytest.mark.matplotlib
196+
def test_plot_ccf(close_figures):
197+
# Just test that it runs.
198+
fig = plt.figure()
199+
ax = fig.add_subplot(111)
200+
201+
ar = np.r_[1.0, -0.9]
202+
ma = np.r_[1.0, 0.9]
203+
armaprocess = tsp.ArmaProcess(ar, ma)
204+
rs = np.random.RandomState(1234)
205+
x1 = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
206+
x2 = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
207+
plot_ccf(x1, x2)
208+
plot_ccf(x1, x2, ax=ax, lags=10)
209+
plot_ccf(x1, x2, ax=ax)
210+
plot_ccf(x1, x2, ax=ax, alpha=None)
211+
plot_ccf(x1, x2, ax=ax, negative_lags=True)
212+
plot_ccf(x1, x2, ax=ax, adjusted=True)
213+
plot_ccf(x1, x2, ax=ax, fft=True)
214+
plot_ccf(x1, x2, ax=ax, title='CCF')
215+
plot_ccf(x1, x2, ax=ax, auto_ylims=True)
216+
plot_ccf(x1, x2, ax=ax, use_vlines=False)
217+
218+
219+
@pytest.mark.matplotlib
220+
def test_plot_accf_grid(close_figures):
221+
# Just test that it runs.
222+
fig = plt.figure()
223+
224+
ar = np.r_[1.0, -0.9]
225+
ma = np.r_[1.0, 0.9]
226+
armaprocess = tsp.ArmaProcess(ar, ma)
227+
rs = np.random.RandomState(1234)
228+
x = np.vstack([
229+
armaprocess.generate_sample(100, distrvs=rs.standard_normal),
230+
armaprocess.generate_sample(100, distrvs=rs.standard_normal),
231+
]).T
232+
plot_accf_grid(x)
233+
plot_accf_grid(pd.DataFrame({'x': x[:, 0], 'y': x[:, 1]}))
234+
plot_accf_grid(x, fig=fig, lags=10)
235+
plot_accf_grid(x, fig=fig)
236+
plot_accf_grid(x, fig=fig, negative_lags=False)
237+
plot_accf_grid(x, fig=fig, alpha=None)
238+
plot_accf_grid(x, fig=fig, adjusted=True)
239+
plot_accf_grid(x, fig=fig, fft=True)
240+
plot_accf_grid(x, fig=fig, auto_ylims=True)
241+
plot_accf_grid(x, fig=fig, use_vlines=False)
242+
243+
193244
@pytest.mark.matplotlib
194245
def test_plot_month(close_figures):
195246
dta = elnino.load_pandas().data

statsmodels/graphics/tsaplots.py

+279-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
import pandas as pd
88

99
from statsmodels.graphics import utils
10-
from statsmodels.tsa.stattools import acf, pacf
10+
from statsmodels.tools.validation import array_like
11+
from statsmodels.tsa.stattools import acf, pacf, ccf
1112

1213

1314
def _prepare_data_corr_plot(x, lags, zero):
@@ -38,6 +39,7 @@ def _plot_corr(
3839
use_vlines,
3940
vlines_kwargs,
4041
auto_ylims=False,
42+
skip_lag0_confint=True,
4143
**kwargs,
4244
):
4345
if irregular:
@@ -66,13 +68,13 @@ def _plot_corr(
6668
)
6769

6870
if confint is not None:
69-
if lags[0] == 0:
71+
if skip_lag0_confint and lags[0] == 0:
7072
lags = lags[1:]
7173
confint = confint[1:]
7274
acf_x = acf_x[1:]
7375
lags = lags.astype(float)
74-
lags[0] -= 0.5
75-
lags[-1] += 0.5
76+
lags[np.argmin(lags)] -= 0.5
77+
lags[np.argmax(lags)] += 0.5
7678
ax.fill_between(
7779
lags, confint[:, 0] - acf_x, confint[:, 1] - acf_x, alpha=0.25
7880
)
@@ -367,6 +369,279 @@ def plot_pacf(
367369
return fig
368370

369371

372+
def plot_ccf(
373+
x,
374+
y,
375+
*,
376+
ax=None,
377+
lags=None,
378+
negative_lags=False,
379+
alpha=0.05,
380+
use_vlines=True,
381+
adjusted=False,
382+
fft=False,
383+
title="Cross-correlation",
384+
auto_ylims=False,
385+
vlines_kwargs=None,
386+
**kwargs,
387+
):
388+
"""
389+
Plot the cross-correlation function
390+
391+
Correlations between ``x`` and the lags of ``y`` are calculated.
392+
393+
The lags are shown on the horizontal axis and the correlations
394+
on the vertical axis.
395+
396+
Parameters
397+
----------
398+
x, y : array_like
399+
Arrays of time-series values.
400+
ax : AxesSubplot, optional
401+
If given, this subplot is used to plot in, otherwise a new figure with
402+
one subplot is created.
403+
lags : {int, array_like}, optional
404+
An int or array of lag values, used on the horizontal axis. Uses
405+
``np.arange(lags)`` when lags is an int. If not provided,
406+
``lags=np.arange(len(corr))`` is used.
407+
negative_lags: bool, optional
408+
If True, negative lags are shown on the horizontal axis.
409+
alpha : scalar, optional
410+
If a number is given, the confidence intervals for the given level are
411+
plotted, e.g. if alpha=.05, 95 % confidence intervals are shown.
412+
If None, confidence intervals are not shown on the plot.
413+
use_vlines : bool, optional
414+
If True, shows vertical lines and markers for the correlation values.
415+
If False, only shows markers. The default marker is 'o'; it can
416+
be overridden with a ``marker`` kwarg.
417+
adjusted : bool
418+
If True, then denominators for cross-correlations are n-k, otherwise n.
419+
fft : bool, optional
420+
If True, computes the CCF via FFT.
421+
title : str, optional
422+
Title to place on plot. Default is 'Cross-correlation'.
423+
auto_ylims : bool, optional
424+
If True, adjusts automatically the vertical axis limits to CCF values.
425+
vlines_kwargs : dict, optional
426+
Optional dictionary of keyword arguments that are passed to vlines.
427+
**kwargs : kwargs, optional
428+
Optional keyword arguments that are directly passed on to the
429+
Matplotlib ``plot`` and ``axhline`` functions.
430+
431+
Returns
432+
-------
433+
Figure
434+
The figure where the plot is drawn. This is either an existing figure
435+
if the `ax` argument is provided, or a newly created figure
436+
if `ax` is None.
437+
438+
See Also
439+
--------
440+
See notes and references for statsmodels.graphics.tsaplots.plot_acf
441+
442+
Examples
443+
--------
444+
>>> import pandas as pd
445+
>>> import matplotlib.pyplot as plt
446+
>>> import statsmodels.api as sm
447+
448+
>>> dta = sm.datasets.macrodata.load_pandas().data
449+
>>> diffed = dta.diff().dropna()
450+
>>> sm.graphics.tsa.plot_ccf(diffed["unemp"], diffed["infl"])
451+
>>> plt.show()
452+
"""
453+
fig, ax = utils.create_mpl_ax(ax)
454+
455+
lags, nlags, irregular = _prepare_data_corr_plot(x, lags, True)
456+
vlines_kwargs = {} if vlines_kwargs is None else vlines_kwargs
457+
458+
if negative_lags:
459+
lags = -lags
460+
461+
ccf_res = ccf(
462+
x, y, adjusted=adjusted, fft=fft, alpha=alpha, nlags=nlags + 1
463+
)
464+
if alpha is not None:
465+
ccf_xy, confint = ccf_res
466+
else:
467+
ccf_xy = ccf_res
468+
confint = None
469+
470+
_plot_corr(
471+
ax,
472+
title,
473+
ccf_xy,
474+
confint,
475+
lags,
476+
irregular,
477+
use_vlines,
478+
vlines_kwargs,
479+
auto_ylims=auto_ylims,
480+
skip_lag0_confint=False,
481+
**kwargs,
482+
)
483+
484+
return fig
485+
486+
487+
def plot_accf_grid(
488+
x,
489+
*,
490+
varnames=None,
491+
fig=None,
492+
lags=None,
493+
negative_lags=True,
494+
alpha=0.05,
495+
use_vlines=True,
496+
adjusted=False,
497+
fft=False,
498+
missing="none",
499+
zero=True,
500+
auto_ylims=False,
501+
bartlett_confint=False,
502+
vlines_kwargs=None,
503+
**kwargs,
504+
):
505+
"""
506+
Plot auto/cross-correlation grid
507+
508+
Plots lags on the horizontal axis and the correlations
509+
on the vertical axis of each graph.
510+
511+
Parameters
512+
----------
513+
x : array_like
514+
2D array of time-series values: rows are observations,
515+
columns are variables.
516+
varnames: sequence of str, optional
517+
Variable names to use in plot titles. If ``x`` is a pandas dataframe
518+
and ``varnames`` is provided, it overrides the column names
519+
of the dataframe. If ``varnames`` is not provided and ``x`` is not
520+
a dataframe, variable names ``x[0]``, ``x[1]``, etc. are generated.
521+
fig : Matplotlib figure instance, optional
522+
If given, this figure is used to plot in, otherwise a new figure
523+
is created.
524+
lags : {int, array_like}, optional
525+
An int or array of lag values, used on horizontal axes. Uses
526+
``np.arange(lags)`` when lags is an int. If not provided,
527+
``lags=np.arange(len(corr))`` is used.
528+
negative_lags: bool, optional
529+
If True, negative lags are shown on the horizontal axes of plots
530+
below the main diagonal.
531+
alpha : scalar, optional
532+
If a number is given, the confidence intervals for the given level are
533+
plotted, e.g. if alpha=.05, 95 % confidence intervals are shown.
534+
If None, confidence intervals are not shown on the plot.
535+
use_vlines : bool, optional
536+
If True, shows vertical lines and markers for the correlation values.
537+
If False, only shows markers. The default marker is 'o'; it can
538+
be overridden with a ``marker`` kwarg.
539+
adjusted : bool
540+
If True, then denominators for correlations are n-k, otherwise n.
541+
fft : bool, optional
542+
If True, computes the ACF via FFT.
543+
missing : str, optional
544+
A string in ['none', 'raise', 'conservative', 'drop'] specifying how
545+
NaNs are to be treated.
546+
zero : bool, optional
547+
Flag indicating whether to include the 0-lag autocorrelations
548+
(which are always equal to 1). Default is True.
549+
auto_ylims : bool, optional
550+
If True, adjusts automatically the vertical axis limits
551+
to correlation values.
552+
bartlett_confint : bool, default False
553+
If True, use Bartlett's formula to calculate confidence intervals
554+
in auto-correlation plots. See the description of ``plot_acf`` for
555+
details. This argument does not affect cross-correlation plots.
556+
vlines_kwargs : dict, optional
557+
Optional dictionary of keyword arguments that are passed to vlines.
558+
**kwargs : kwargs, optional
559+
Optional keyword arguments that are directly passed on to the
560+
Matplotlib ``plot`` and ``axhline`` functions.
561+
562+
Returns
563+
-------
564+
Figure
565+
If `fig` is None, the created figure. Otherwise, `fig` is returned.
566+
Plots on the grid show the cross-correlation of the row variable
567+
with the lags of the column variable.
568+
569+
See Also
570+
--------
571+
See notes and references for statsmodels.graphics.tsaplots
572+
573+
Examples
574+
--------
575+
>>> import pandas as pd
576+
>>> import matplotlib.pyplot as plt
577+
>>> import statsmodels.api as sm
578+
579+
>>> dta = sm.datasets.macrodata.load_pandas().data
580+
>>> diffed = dta.diff().dropna()
581+
>>> sm.graphics.tsa.plot_accf_grid(diffed[["unemp", "infl"]])
582+
>>> plt.show()
583+
"""
584+
from statsmodels.tools.data import _is_using_pandas
585+
586+
array_like(x, "x", ndim=2)
587+
m = x.shape[1]
588+
589+
fig = utils.create_mpl_fig(fig)
590+
gs = fig.add_gridspec(m, m)
591+
592+
if _is_using_pandas(x, None):
593+
varnames = varnames or list(x.columns)
594+
595+
def get_var(i):
596+
return x.iloc[:, i]
597+
else:
598+
varnames = varnames or [f'x[{i}]' for i in range(m)]
599+
600+
x = np.asarray(x)
601+
602+
def get_var(i):
603+
return x[:, i]
604+
605+
for i in range(m):
606+
for j in range(m):
607+
ax = fig.add_subplot(gs[i, j])
608+
if i == j:
609+
plot_acf(
610+
get_var(i),
611+
ax=ax,
612+
title=f'ACF({varnames[i]})',
613+
lags=lags,
614+
alpha=alpha,
615+
use_vlines=use_vlines,
616+
adjusted=adjusted,
617+
fft=fft,
618+
missing=missing,
619+
zero=zero,
620+
auto_ylims=auto_ylims,
621+
bartlett_confint=bartlett_confint,
622+
vlines_kwargs=vlines_kwargs,
623+
**kwargs,
624+
)
625+
else:
626+
plot_ccf(
627+
get_var(i),
628+
get_var(j),
629+
ax=ax,
630+
title=f'CCF({varnames[i]}, {varnames[j]})',
631+
lags=lags,
632+
negative_lags=negative_lags and i > j,
633+
alpha=alpha,
634+
use_vlines=use_vlines,
635+
adjusted=adjusted,
636+
fft=fft,
637+
auto_ylims=auto_ylims,
638+
vlines_kwargs=vlines_kwargs,
639+
**kwargs,
640+
)
641+
642+
return fig
643+
644+
370645
def seasonal_plot(grouped_x, xticklabels, ylabel=None, ax=None):
371646
"""
372647
Consider using one of month_plot or quarter_plot unless you need

0 commit comments

Comments
 (0)