|
7 | 7 | import pandas as pd
|
8 | 8 |
|
9 | 9 | from statsmodels.graphics import utils
|
10 |
| -from statsmodels.tsa.stattools import acf, pacf |
| 10 | +from statsmodels.tools.validation import array_like |
| 11 | +from statsmodels.tsa.stattools import acf, pacf, ccf |
11 | 12 |
|
12 | 13 |
|
13 | 14 | def _prepare_data_corr_plot(x, lags, zero):
|
@@ -38,6 +39,7 @@ def _plot_corr(
|
38 | 39 | use_vlines,
|
39 | 40 | vlines_kwargs,
|
40 | 41 | auto_ylims=False,
|
| 42 | + skip_lag0_confint=True, |
41 | 43 | **kwargs,
|
42 | 44 | ):
|
43 | 45 | if irregular:
|
@@ -66,13 +68,13 @@ def _plot_corr(
|
66 | 68 | )
|
67 | 69 |
|
68 | 70 | if confint is not None:
|
69 |
| - if lags[0] == 0: |
| 71 | + if skip_lag0_confint and lags[0] == 0: |
70 | 72 | lags = lags[1:]
|
71 | 73 | confint = confint[1:]
|
72 | 74 | acf_x = acf_x[1:]
|
73 | 75 | lags = lags.astype(float)
|
74 |
| - lags[0] -= 0.5 |
75 |
| - lags[-1] += 0.5 |
| 76 | + lags[np.argmin(lags)] -= 0.5 |
| 77 | + lags[np.argmax(lags)] += 0.5 |
76 | 78 | ax.fill_between(
|
77 | 79 | lags, confint[:, 0] - acf_x, confint[:, 1] - acf_x, alpha=0.25
|
78 | 80 | )
|
@@ -367,6 +369,279 @@ def plot_pacf(
|
367 | 369 | return fig
|
368 | 370 |
|
369 | 371 |
|
| 372 | +def plot_ccf( |
| 373 | + x, |
| 374 | + y, |
| 375 | + *, |
| 376 | + ax=None, |
| 377 | + lags=None, |
| 378 | + negative_lags=False, |
| 379 | + alpha=0.05, |
| 380 | + use_vlines=True, |
| 381 | + adjusted=False, |
| 382 | + fft=False, |
| 383 | + title="Cross-correlation", |
| 384 | + auto_ylims=False, |
| 385 | + vlines_kwargs=None, |
| 386 | + **kwargs, |
| 387 | +): |
| 388 | + """ |
| 389 | + Plot the cross-correlation function |
| 390 | +
|
| 391 | + Correlations between ``x`` and the lags of ``y`` are calculated. |
| 392 | +
|
| 393 | + The lags are shown on the horizontal axis and the correlations |
| 394 | + on the vertical axis. |
| 395 | +
|
| 396 | + Parameters |
| 397 | + ---------- |
| 398 | + x, y : array_like |
| 399 | + Arrays of time-series values. |
| 400 | + ax : AxesSubplot, optional |
| 401 | + If given, this subplot is used to plot in, otherwise a new figure with |
| 402 | + one subplot is created. |
| 403 | + lags : {int, array_like}, optional |
| 404 | + An int or array of lag values, used on the horizontal axis. Uses |
| 405 | + ``np.arange(lags)`` when lags is an int. If not provided, |
| 406 | + ``lags=np.arange(len(corr))`` is used. |
| 407 | + negative_lags: bool, optional |
| 408 | + If True, negative lags are shown on the horizontal axis. |
| 409 | + alpha : scalar, optional |
| 410 | + If a number is given, the confidence intervals for the given level are |
| 411 | + plotted, e.g. if alpha=.05, 95 % confidence intervals are shown. |
| 412 | + If None, confidence intervals are not shown on the plot. |
| 413 | + use_vlines : bool, optional |
| 414 | + If True, shows vertical lines and markers for the correlation values. |
| 415 | + If False, only shows markers. The default marker is 'o'; it can |
| 416 | + be overridden with a ``marker`` kwarg. |
| 417 | + adjusted : bool |
| 418 | + If True, then denominators for cross-correlations are n-k, otherwise n. |
| 419 | + fft : bool, optional |
| 420 | + If True, computes the CCF via FFT. |
| 421 | + title : str, optional |
| 422 | + Title to place on plot. Default is 'Cross-correlation'. |
| 423 | + auto_ylims : bool, optional |
| 424 | + If True, adjusts automatically the vertical axis limits to CCF values. |
| 425 | + vlines_kwargs : dict, optional |
| 426 | + Optional dictionary of keyword arguments that are passed to vlines. |
| 427 | + **kwargs : kwargs, optional |
| 428 | + Optional keyword arguments that are directly passed on to the |
| 429 | + Matplotlib ``plot`` and ``axhline`` functions. |
| 430 | +
|
| 431 | + Returns |
| 432 | + ------- |
| 433 | + Figure |
| 434 | + The figure where the plot is drawn. This is either an existing figure |
| 435 | + if the `ax` argument is provided, or a newly created figure |
| 436 | + if `ax` is None. |
| 437 | +
|
| 438 | + See Also |
| 439 | + -------- |
| 440 | + See notes and references for statsmodels.graphics.tsaplots.plot_acf |
| 441 | +
|
| 442 | + Examples |
| 443 | + -------- |
| 444 | + >>> import pandas as pd |
| 445 | + >>> import matplotlib.pyplot as plt |
| 446 | + >>> import statsmodels.api as sm |
| 447 | +
|
| 448 | + >>> dta = sm.datasets.macrodata.load_pandas().data |
| 449 | + >>> diffed = dta.diff().dropna() |
| 450 | + >>> sm.graphics.tsa.plot_ccf(diffed["unemp"], diffed["infl"]) |
| 451 | + >>> plt.show() |
| 452 | + """ |
| 453 | + fig, ax = utils.create_mpl_ax(ax) |
| 454 | + |
| 455 | + lags, nlags, irregular = _prepare_data_corr_plot(x, lags, True) |
| 456 | + vlines_kwargs = {} if vlines_kwargs is None else vlines_kwargs |
| 457 | + |
| 458 | + if negative_lags: |
| 459 | + lags = -lags |
| 460 | + |
| 461 | + ccf_res = ccf( |
| 462 | + x, y, adjusted=adjusted, fft=fft, alpha=alpha, nlags=nlags + 1 |
| 463 | + ) |
| 464 | + if alpha is not None: |
| 465 | + ccf_xy, confint = ccf_res |
| 466 | + else: |
| 467 | + ccf_xy = ccf_res |
| 468 | + confint = None |
| 469 | + |
| 470 | + _plot_corr( |
| 471 | + ax, |
| 472 | + title, |
| 473 | + ccf_xy, |
| 474 | + confint, |
| 475 | + lags, |
| 476 | + irregular, |
| 477 | + use_vlines, |
| 478 | + vlines_kwargs, |
| 479 | + auto_ylims=auto_ylims, |
| 480 | + skip_lag0_confint=False, |
| 481 | + **kwargs, |
| 482 | + ) |
| 483 | + |
| 484 | + return fig |
| 485 | + |
| 486 | + |
| 487 | +def plot_accf_grid( |
| 488 | + x, |
| 489 | + *, |
| 490 | + varnames=None, |
| 491 | + fig=None, |
| 492 | + lags=None, |
| 493 | + negative_lags=True, |
| 494 | + alpha=0.05, |
| 495 | + use_vlines=True, |
| 496 | + adjusted=False, |
| 497 | + fft=False, |
| 498 | + missing="none", |
| 499 | + zero=True, |
| 500 | + auto_ylims=False, |
| 501 | + bartlett_confint=False, |
| 502 | + vlines_kwargs=None, |
| 503 | + **kwargs, |
| 504 | +): |
| 505 | + """ |
| 506 | + Plot auto/cross-correlation grid |
| 507 | +
|
| 508 | + Plots lags on the horizontal axis and the correlations |
| 509 | + on the vertical axis of each graph. |
| 510 | +
|
| 511 | + Parameters |
| 512 | + ---------- |
| 513 | + x : array_like |
| 514 | + 2D array of time-series values: rows are observations, |
| 515 | + columns are variables. |
| 516 | + varnames: sequence of str, optional |
| 517 | + Variable names to use in plot titles. If ``x`` is a pandas dataframe |
| 518 | + and ``varnames`` is provided, it overrides the column names |
| 519 | + of the dataframe. If ``varnames`` is not provided and ``x`` is not |
| 520 | + a dataframe, variable names ``x[0]``, ``x[1]``, etc. are generated. |
| 521 | + fig : Matplotlib figure instance, optional |
| 522 | + If given, this figure is used to plot in, otherwise a new figure |
| 523 | + is created. |
| 524 | + lags : {int, array_like}, optional |
| 525 | + An int or array of lag values, used on horizontal axes. Uses |
| 526 | + ``np.arange(lags)`` when lags is an int. If not provided, |
| 527 | + ``lags=np.arange(len(corr))`` is used. |
| 528 | + negative_lags: bool, optional |
| 529 | + If True, negative lags are shown on the horizontal axes of plots |
| 530 | + below the main diagonal. |
| 531 | + alpha : scalar, optional |
| 532 | + If a number is given, the confidence intervals for the given level are |
| 533 | + plotted, e.g. if alpha=.05, 95 % confidence intervals are shown. |
| 534 | + If None, confidence intervals are not shown on the plot. |
| 535 | + use_vlines : bool, optional |
| 536 | + If True, shows vertical lines and markers for the correlation values. |
| 537 | + If False, only shows markers. The default marker is 'o'; it can |
| 538 | + be overridden with a ``marker`` kwarg. |
| 539 | + adjusted : bool |
| 540 | + If True, then denominators for correlations are n-k, otherwise n. |
| 541 | + fft : bool, optional |
| 542 | + If True, computes the ACF via FFT. |
| 543 | + missing : str, optional |
| 544 | + A string in ['none', 'raise', 'conservative', 'drop'] specifying how |
| 545 | + NaNs are to be treated. |
| 546 | + zero : bool, optional |
| 547 | + Flag indicating whether to include the 0-lag autocorrelations |
| 548 | + (which are always equal to 1). Default is True. |
| 549 | + auto_ylims : bool, optional |
| 550 | + If True, adjusts automatically the vertical axis limits |
| 551 | + to correlation values. |
| 552 | + bartlett_confint : bool, default False |
| 553 | + If True, use Bartlett's formula to calculate confidence intervals |
| 554 | + in auto-correlation plots. See the description of ``plot_acf`` for |
| 555 | + details. This argument does not affect cross-correlation plots. |
| 556 | + vlines_kwargs : dict, optional |
| 557 | + Optional dictionary of keyword arguments that are passed to vlines. |
| 558 | + **kwargs : kwargs, optional |
| 559 | + Optional keyword arguments that are directly passed on to the |
| 560 | + Matplotlib ``plot`` and ``axhline`` functions. |
| 561 | +
|
| 562 | + Returns |
| 563 | + ------- |
| 564 | + Figure |
| 565 | + If `fig` is None, the created figure. Otherwise, `fig` is returned. |
| 566 | + Plots on the grid show the cross-correlation of the row variable |
| 567 | + with the lags of the column variable. |
| 568 | +
|
| 569 | + See Also |
| 570 | + -------- |
| 571 | + See notes and references for statsmodels.graphics.tsaplots |
| 572 | +
|
| 573 | + Examples |
| 574 | + -------- |
| 575 | + >>> import pandas as pd |
| 576 | + >>> import matplotlib.pyplot as plt |
| 577 | + >>> import statsmodels.api as sm |
| 578 | +
|
| 579 | + >>> dta = sm.datasets.macrodata.load_pandas().data |
| 580 | + >>> diffed = dta.diff().dropna() |
| 581 | + >>> sm.graphics.tsa.plot_accf_grid(diffed[["unemp", "infl"]]) |
| 582 | + >>> plt.show() |
| 583 | + """ |
| 584 | + from statsmodels.tools.data import _is_using_pandas |
| 585 | + |
| 586 | + array_like(x, "x", ndim=2) |
| 587 | + m = x.shape[1] |
| 588 | + |
| 589 | + fig = utils.create_mpl_fig(fig) |
| 590 | + gs = fig.add_gridspec(m, m) |
| 591 | + |
| 592 | + if _is_using_pandas(x, None): |
| 593 | + varnames = varnames or list(x.columns) |
| 594 | + |
| 595 | + def get_var(i): |
| 596 | + return x.iloc[:, i] |
| 597 | + else: |
| 598 | + varnames = varnames or [f'x[{i}]' for i in range(m)] |
| 599 | + |
| 600 | + x = np.asarray(x) |
| 601 | + |
| 602 | + def get_var(i): |
| 603 | + return x[:, i] |
| 604 | + |
| 605 | + for i in range(m): |
| 606 | + for j in range(m): |
| 607 | + ax = fig.add_subplot(gs[i, j]) |
| 608 | + if i == j: |
| 609 | + plot_acf( |
| 610 | + get_var(i), |
| 611 | + ax=ax, |
| 612 | + title=f'ACF({varnames[i]})', |
| 613 | + lags=lags, |
| 614 | + alpha=alpha, |
| 615 | + use_vlines=use_vlines, |
| 616 | + adjusted=adjusted, |
| 617 | + fft=fft, |
| 618 | + missing=missing, |
| 619 | + zero=zero, |
| 620 | + auto_ylims=auto_ylims, |
| 621 | + bartlett_confint=bartlett_confint, |
| 622 | + vlines_kwargs=vlines_kwargs, |
| 623 | + **kwargs, |
| 624 | + ) |
| 625 | + else: |
| 626 | + plot_ccf( |
| 627 | + get_var(i), |
| 628 | + get_var(j), |
| 629 | + ax=ax, |
| 630 | + title=f'CCF({varnames[i]}, {varnames[j]})', |
| 631 | + lags=lags, |
| 632 | + negative_lags=negative_lags and i > j, |
| 633 | + alpha=alpha, |
| 634 | + use_vlines=use_vlines, |
| 635 | + adjusted=adjusted, |
| 636 | + fft=fft, |
| 637 | + auto_ylims=auto_ylims, |
| 638 | + vlines_kwargs=vlines_kwargs, |
| 639 | + **kwargs, |
| 640 | + ) |
| 641 | + |
| 642 | + return fig |
| 643 | + |
| 644 | + |
370 | 645 | def seasonal_plot(grouped_x, xticklabels, ylabel=None, ax=None):
|
371 | 646 | """
|
372 | 647 | Consider using one of month_plot or quarter_plot unless you need
|
|
0 commit comments