|
1 | 1 | """
|
2 | 2 | Statistical tools for time series analysis
|
3 | 3 | """
|
| 4 | +from statsmodels.compat.python import (iteritems, range, lrange, string_types, |
| 5 | + lzip, zip, long) |
| 6 | +from statsmodels.compat.scipy import _next_regular |
| 7 | + |
4 | 8 | import numpy as np
|
| 9 | +import pandas as pd |
5 | 10 | from numpy.linalg import LinAlgError
|
6 | 11 | from scipy import stats
|
7 | 12 |
|
8 |
| -from statsmodels.compat.python import (iteritems, range, lrange, string_types, |
9 |
| - lzip, zip, long) |
10 |
| -from statsmodels.compat.scipy import _next_regular |
11 | 13 | from statsmodels.regression.linear_model import OLS, yule_walker
|
12 | 14 | from statsmodels.tools.sm_exceptions import (InterpolationWarning,
|
13 | 15 | MissingDataError,
|
|
20 | 22 |
|
21 | 23 | __all__ = ['acovf', 'acf', 'pacf', 'pacf_yw', 'pacf_ols', 'ccovf', 'ccf',
|
22 | 24 | 'periodogram', 'q_stat', 'coint', 'arma_order_select_ic',
|
23 |
| - 'adfuller', 'kpss', 'bds'] |
| 25 | + 'adfuller', 'kpss', 'bds', 'pacf_burg', 'innovations_algo', |
| 26 | + 'innovations_filter', 'levinson_durbin_pacf', 'levinson_durbin'] |
24 | 27 |
|
25 | 28 | SQRTEPS = np.sqrt(np.finfo(np.double).eps)
|
26 | 29 |
|
@@ -950,6 +953,152 @@ def levinson_durbin_pacf(pacf, nlags=None):
|
950 | 953 | return arcoefs, acf
|
951 | 954 |
|
952 | 955 |
|
| 956 | +def innovations_algo(acov, nobs=None, rtol=None): |
| 957 | + """ |
| 958 | + Innovations algorithm to convert autocovariances to MA parameters |
| 959 | +
|
| 960 | + Parameters |
| 961 | + ---------- |
| 962 | + acov : array-like |
| 963 | + Array containing autocovariances including lag 0 |
| 964 | + nobs : int, optional |
| 965 | + Number of periods to run the algorithm. If not provided, nobs is |
| 966 | + equal to the length of acovf |
| 967 | + rtol : float, optional |
| 968 | + Tolerance used to check for convergence. Default value is 0 which will |
| 969 | + never prematurely end the algorithm. Checks after 10 iterations and |
| 970 | + stops if sigma2[i] - sigma2[i - 10] < rtol * sigma2[0]. When the |
| 971 | + stopping condition is met, the remaining values in theta and sigma2 |
| 972 | + are forward filled using the value of the final iteration. |
| 973 | +
|
| 974 | + Returns |
| 975 | + ------- |
| 976 | + theta : ndarray |
| 977 | + Innovation coefficients of MA representation. Array is (nobs, q) where |
| 978 | + q is the largest index of a non-zero autocovariance. theta |
| 979 | + corresponds to the first q columns of the coefficient matrix in the |
| 980 | + common description of the innovation algorithm. |
| 981 | + sigma2 : ndarray |
| 982 | + The prediction error variance (nobs,). |
| 983 | +
|
| 984 | + Examples |
| 985 | + -------- |
| 986 | + >>> import statsmodels.api as sm |
| 987 | + >>> data = sm.datasets.macrodata.load_pandas() |
| 988 | + >>> rgdpg = data.data['realgdp'].pct_change().dropna() |
| 989 | + >>> acov = sm.tsa.acovf(rgdpg) |
| 990 | + >>> nobs = activity.shape[0] |
| 991 | + >>> theta, sigma2 = innovations_algo(acov[:4], nobs=nobs) |
| 992 | +
|
| 993 | + See also |
| 994 | + -------- |
| 995 | + innovations_filter |
| 996 | +
|
| 997 | + References |
| 998 | + ---------- |
| 999 | + Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series and |
| 1000 | + forecasting. Springer. |
| 1001 | + """ |
| 1002 | + acov = np.squeeze(np.asarray(acov)) |
| 1003 | + if acov.ndim != 1: |
| 1004 | + raise ValueError('acov must be 1-d or squeezable to 1-d.') |
| 1005 | + rtol = 0.0 if rtol is None else rtol |
| 1006 | + if not isinstance(rtol, float): |
| 1007 | + raise ValueError('rtol must be a non-negative float or None.') |
| 1008 | + n = acov.shape[0] if nobs is None else int(nobs) |
| 1009 | + if n != nobs or nobs < 1: |
| 1010 | + raise ValueError('nobs must be a positive integer') |
| 1011 | + max_lag = int(np.max(np.argwhere(acov != 0))) |
| 1012 | + |
| 1013 | + v = np.zeros(n + 1) |
| 1014 | + v[0] = acov[0] |
| 1015 | + # Retain only the relevant columns of theta |
| 1016 | + theta = np.zeros((n + 1, max_lag + 1)) |
| 1017 | + for i in range(1, n): |
| 1018 | + for k in range(max(i - max_lag, 0), i): |
| 1019 | + sub = 0 |
| 1020 | + for j in range(max(i - max_lag, 0), k): |
| 1021 | + sub += theta[k, k - j] * theta[i, i - j] * v[j] |
| 1022 | + theta[i, i - k] = 1. / v[k] * (acov[i - k] - sub) |
| 1023 | + v[i] = acov[0] |
| 1024 | + for j in range(max(i - max_lag, 0), i): |
| 1025 | + v[i] -= theta[i, i - j] ** 2 * v[j] |
| 1026 | + # Break if v has converged |
| 1027 | + if i >= 10: |
| 1028 | + if v[i - 10] - v[i] < v[0] * rtol: |
| 1029 | + # Forward fill all remaining values |
| 1030 | + v[i + 1:] = v[i] |
| 1031 | + theta[i + 1:] = theta[i] |
| 1032 | + break |
| 1033 | + |
| 1034 | + theta = theta[:-1, 1:] |
| 1035 | + v = v[:-1] |
| 1036 | + return theta, v |
| 1037 | + |
| 1038 | + |
| 1039 | +def innovations_filter(endog, theta): |
| 1040 | + """ |
| 1041 | + Filter observations using the innovations algorithm |
| 1042 | +
|
| 1043 | + Parameters |
| 1044 | + ---------- |
| 1045 | + endog : array-like |
| 1046 | + The time series to filter (nobs,). Should be demeaned if not mean 0. |
| 1047 | + theta : ndarray |
| 1048 | + Innovation coefficients of MA representation. Array must be (nobs, q) |
| 1049 | + where q order of the MA. |
| 1050 | +
|
| 1051 | + Returns |
| 1052 | + ------- |
| 1053 | + resid : ndarray |
| 1054 | + Array of filtered innovations |
| 1055 | +
|
| 1056 | + Examples |
| 1057 | + -------- |
| 1058 | + >>> import statsmodels.api as sm |
| 1059 | + >>> data = sm.datasets.macrodata.load_pandas() |
| 1060 | + >>> rgdpg = data.data['realgdp'].pct_change().dropna() |
| 1061 | + >>> acov = sm.tsa.acovf(rgdpg) |
| 1062 | + >>> nobs = activity.shape[0] |
| 1063 | + >>> theta, sigma2 = innovations_algo(acov[:4], nobs=nobs) |
| 1064 | + >>> resid = innovations_filter(rgdpg, theta) |
| 1065 | +
|
| 1066 | + See also |
| 1067 | + -------- |
| 1068 | + innovations_algo |
| 1069 | +
|
| 1070 | + References |
| 1071 | + ---------- |
| 1072 | + Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series and |
| 1073 | + forecasting. Springer. |
| 1074 | + """ |
| 1075 | + orig_endog = endog |
| 1076 | + endog = np.squeeze(np.asarray(endog)) |
| 1077 | + if endog.ndim != 1: |
| 1078 | + raise ValueError('endog must be 1-d or squeezable to 1-d.') |
| 1079 | + nobs = endog.shape[0] |
| 1080 | + n_theta, k = theta.shape |
| 1081 | + if nobs != n_theta: |
| 1082 | + raise ValueError('theta must be (nobs, q) where q is the moder order') |
| 1083 | + is_pandas = isinstance(orig_endog, (pd.DataFrame, pd.Series)) |
| 1084 | + if is_pandas: |
| 1085 | + if len(orig_endog.index) != nobs: |
| 1086 | + msg = 'If endog is a Series or DataFrame, the index must ' \ |
| 1087 | + 'correspond to the number of time series observations.' |
| 1088 | + raise ValueError(msg) |
| 1089 | + u = np.empty(nobs) |
| 1090 | + u[0] = endog[0] |
| 1091 | + for i in range(1, nobs): |
| 1092 | + if i < k: |
| 1093 | + hat = (theta[i, :i] * u[:i][::-1]).sum() |
| 1094 | + else: |
| 1095 | + hat = (theta[i] * u[i - k:i][::-1]).sum() |
| 1096 | + u[i] = endog[i] + hat |
| 1097 | + if is_pandas: |
| 1098 | + u = pd.Series(u, index=orig_endog.index.copy()) |
| 1099 | + return u |
| 1100 | + |
| 1101 | + |
953 | 1102 | def grangercausalitytests(x, maxlag, addconst=True, verbose=True):
|
954 | 1103 | """four tests for granger non causality of 2 timeseries
|
955 | 1104 |
|
|
0 commit comments