Skip to content

Commit a08d4f6

Browse files
committed
ENH: Add innovations algorithm
Add innovations algorithm to convert acov to MA Add filter to filter innovations usign output of ia
1 parent 0c54c49 commit a08d4f6

File tree

2 files changed

+231
-8
lines changed

2 files changed

+231
-8
lines changed

statsmodels/tsa/stattools.py

Lines changed: 153 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
"""
22
Statistical tools for time series analysis
33
"""
4+
from statsmodels.compat.python import (iteritems, range, lrange, string_types,
5+
lzip, zip, long)
6+
from statsmodels.compat.scipy import _next_regular
7+
48
import numpy as np
9+
import pandas as pd
510
from numpy.linalg import LinAlgError
611
from scipy import stats
712

8-
from statsmodels.compat.python import (iteritems, range, lrange, string_types,
9-
lzip, zip, long)
10-
from statsmodels.compat.scipy import _next_regular
1113
from statsmodels.regression.linear_model import OLS, yule_walker
1214
from statsmodels.tools.sm_exceptions import (InterpolationWarning,
1315
MissingDataError,
@@ -20,7 +22,8 @@
2022

2123
__all__ = ['acovf', 'acf', 'pacf', 'pacf_yw', 'pacf_ols', 'ccovf', 'ccf',
2224
'periodogram', 'q_stat', 'coint', 'arma_order_select_ic',
23-
'adfuller', 'kpss', 'bds']
25+
'adfuller', 'kpss', 'bds', 'pacf_burg', 'innovations_algo',
26+
'innovations_filter', 'levinson_durbin_pacf', 'levinson_durbin']
2427

2528
SQRTEPS = np.sqrt(np.finfo(np.double).eps)
2629

@@ -950,6 +953,152 @@ def levinson_durbin_pacf(pacf, nlags=None):
950953
return arcoefs, acf
951954

952955

956+
def innovations_algo(acov, nobs=None, rtol=None):
957+
"""
958+
Innovations algorithm to convert autocovariances to MA parameters
959+
960+
Parameters
961+
----------
962+
acov : array-like
963+
Array containing autocovariances including lag 0
964+
nobs : int, optional
965+
Number of periods to run the algorithm. If not provided, nobs is
966+
equal to the length of acovf
967+
rtol : float, optional
968+
Tolerance used to check for convergence. Default value is 0 which will
969+
never prematurely end the algorithm. Checks after 10 iterations and
970+
stops if sigma2[i] - sigma2[i - 10] < rtol * sigma2[0]. When the
971+
stopping condition is met, the remaining values in theta and sigma2
972+
are forward filled using the value of the final iteration.
973+
974+
Returns
975+
-------
976+
theta : ndarray
977+
Innovation coefficients of MA representation. Array is (nobs, q) where
978+
q is the largest index of a non-zero autocovariance. theta
979+
corresponds to the first q columns of the coefficient matrix in the
980+
common description of the innovation algorithm.
981+
sigma2 : ndarray
982+
The prediction error variance (nobs,).
983+
984+
Examples
985+
--------
986+
>>> import statsmodels.api as sm
987+
>>> data = sm.datasets.macrodata.load_pandas()
988+
>>> rgdpg = data.data['realgdp'].pct_change().dropna()
989+
>>> acov = sm.tsa.acovf(rgdpg)
990+
>>> nobs = activity.shape[0]
991+
>>> theta, sigma2 = innovations_algo(acov[:4], nobs=nobs)
992+
993+
See also
994+
--------
995+
innovations_filter
996+
997+
References
998+
----------
999+
Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series and
1000+
forecasting. Springer.
1001+
"""
1002+
acov = np.squeeze(np.asarray(acov))
1003+
if acov.ndim != 1:
1004+
raise ValueError('acov must be 1-d or squeezable to 1-d.')
1005+
rtol = 0.0 if rtol is None else rtol
1006+
if not isinstance(rtol, float):
1007+
raise ValueError('rtol must be a non-negative float or None.')
1008+
n = acov.shape[0] if nobs is None else int(nobs)
1009+
if n != nobs or nobs < 1:
1010+
raise ValueError('nobs must be a positive integer')
1011+
max_lag = int(np.max(np.argwhere(acov != 0)))
1012+
1013+
v = np.zeros(n + 1)
1014+
v[0] = acov[0]
1015+
# Retain only the relevant columns of theta
1016+
theta = np.zeros((n + 1, max_lag + 1))
1017+
for i in range(1, n):
1018+
for k in range(max(i - max_lag, 0), i):
1019+
sub = 0
1020+
for j in range(max(i - max_lag, 0), k):
1021+
sub += theta[k, k - j] * theta[i, i - j] * v[j]
1022+
theta[i, i - k] = 1. / v[k] * (acov[i - k] - sub)
1023+
v[i] = acov[0]
1024+
for j in range(max(i - max_lag, 0), i):
1025+
v[i] -= theta[i, i - j] ** 2 * v[j]
1026+
# Break if v has converged
1027+
if i >= 10:
1028+
if v[i - 10] - v[i] < v[0] * rtol:
1029+
# Forward fill all remaining values
1030+
v[i + 1:] = v[i]
1031+
theta[i + 1:] = theta[i]
1032+
break
1033+
1034+
theta = theta[:-1, 1:]
1035+
v = v[:-1]
1036+
return theta, v
1037+
1038+
1039+
def innovations_filter(endog, theta):
1040+
"""
1041+
Filter observations using the innovations algorithm
1042+
1043+
Parameters
1044+
----------
1045+
endog : array-like
1046+
The time series to filter (nobs,). Should be demeaned if not mean 0.
1047+
theta : ndarray
1048+
Innovation coefficients of MA representation. Array must be (nobs, q)
1049+
where q order of the MA.
1050+
1051+
Returns
1052+
-------
1053+
resid : ndarray
1054+
Array of filtered innovations
1055+
1056+
Examples
1057+
--------
1058+
>>> import statsmodels.api as sm
1059+
>>> data = sm.datasets.macrodata.load_pandas()
1060+
>>> rgdpg = data.data['realgdp'].pct_change().dropna()
1061+
>>> acov = sm.tsa.acovf(rgdpg)
1062+
>>> nobs = activity.shape[0]
1063+
>>> theta, sigma2 = innovations_algo(acov[:4], nobs=nobs)
1064+
>>> resid = innovations_filter(rgdpg, theta)
1065+
1066+
See also
1067+
--------
1068+
innovations_algo
1069+
1070+
References
1071+
----------
1072+
Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series and
1073+
forecasting. Springer.
1074+
"""
1075+
orig_endog = endog
1076+
endog = np.squeeze(np.asarray(endog))
1077+
if endog.ndim != 1:
1078+
raise ValueError('endog must be 1-d or squeezable to 1-d.')
1079+
nobs = endog.shape[0]
1080+
n_theta, k = theta.shape
1081+
if nobs != n_theta:
1082+
raise ValueError('theta must be (nobs, q) where q is the moder order')
1083+
is_pandas = isinstance(orig_endog, (pd.DataFrame, pd.Series))
1084+
if is_pandas:
1085+
if len(orig_endog.index) != nobs:
1086+
msg = 'If endog is a Series or DataFrame, the index must ' \
1087+
'correspond to the number of time series observations.'
1088+
raise ValueError(msg)
1089+
u = np.empty(nobs)
1090+
u[0] = endog[0]
1091+
for i in range(1, nobs):
1092+
if i < k:
1093+
hat = (theta[i, :i] * u[:i][::-1]).sum()
1094+
else:
1095+
hat = (theta[i] * u[i - k:i][::-1]).sum()
1096+
u[i] = endog[i] + hat
1097+
if is_pandas:
1098+
u = pd.Series(u, index=orig_endog.index.copy())
1099+
return u
1100+
1101+
9531102
def grangercausalitytests(x, maxlag, addconst=True, verbose=True):
9541103
"""four tests for granger non causality of 2 timeseries
9551104

statsmodels/tsa/tests/test_stattools.py

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from statsmodels.compat.python import lrange, PY3
1+
from statsmodels.compat.python import lrange
22

33
import os
44
import warnings
@@ -17,9 +17,8 @@
1717
pacf, grangercausalitytests,
1818
coint, acovf, kpss,
1919
arma_order_select_ic, levinson_durbin,
20-
levinson_durbin_pacf,
21-
pacf_burg)
22-
20+
levinson_durbin_pacf, pacf_burg,
21+
innovations_algo, innovations_filter)
2322

2423
DECIMAL_8 = 8
2524
DECIMAL_6 = 6
@@ -713,3 +712,78 @@ def test_pacf_burg_error():
713712
pacf_burg(np.empty((20,2)), 10)
714713
with pytest.raises(ValueError):
715714
pacf_burg(np.empty(100), 101)
715+
716+
717+
def test_innovations_algo_brockwell_davis():
718+
ma = -0.9
719+
acovf = np.array([1 + ma ** 2, ma])
720+
theta, sigma2 = innovations_algo(acovf, nobs=4)
721+
exp_theta = np.array([[0], [-.4972], [-.6606], [-.7404]])
722+
assert_allclose(theta, exp_theta, rtol=1e-4)
723+
assert_allclose(sigma2, [1.81, 1.3625, 1.2155, 1.1436], rtol=1e-4)
724+
725+
theta, sigma2 = innovations_algo(acovf, nobs=500)
726+
assert_allclose(theta[-1, 0], ma)
727+
assert_allclose(sigma2[-1], 1.0)
728+
729+
730+
def test_innovations_algo_rtol():
731+
ma = np.array([-0.9, 0.5])
732+
acovf = np.array([1 + (ma ** 2).sum(), ma[0] + ma[1] * ma[0], ma[1]])
733+
theta, sigma2 = innovations_algo(acovf, nobs=500)
734+
theta_2, sigma2_2 = innovations_algo(acovf, nobs=500, rtol=1e-8)
735+
assert_allclose(theta, theta_2)
736+
assert_allclose(sigma2, sigma2_2)
737+
738+
739+
def test_innovations_errors():
740+
ma = -0.9
741+
acovf = np.array([1 + ma ** 2, ma])
742+
with pytest.raises(ValueError):
743+
innovations_algo(acovf, nobs=2.2)
744+
with pytest.raises(ValueError):
745+
innovations_algo(acovf, nobs=-1)
746+
with pytest.raises(ValueError):
747+
innovations_algo(np.empty((2, 2)))
748+
with pytest.raises(ValueError):
749+
innovations_algo(acovf, rtol='none')
750+
751+
752+
def test_innovations_filter_brockwell_davis():
753+
ma = -0.9
754+
acovf = np.array([1 + ma ** 2, ma])
755+
theta, sigma2 = innovations_algo(acovf, nobs=4)
756+
e = np.random.randn(5)
757+
endog = e[1:] + ma * e[:-1]
758+
resid = innovations_filter(endog, theta)
759+
expected = [endog[0]]
760+
for i in range(1, 4):
761+
expected.append(endog[i] + theta[i, 0] * expected[-1])
762+
print(expected)
763+
expected = np.array(expected)
764+
assert_allclose(resid, expected)
765+
766+
767+
def test_innovations_filter_pandas():
768+
ma = np.array([-0.9, 0.5])
769+
acovf = np.array([1 + (ma ** 2).sum(), ma[0] + ma[1] * ma[0], ma[1]])
770+
theta, sigma2 = innovations_algo(acovf, nobs=10)
771+
endog = np.random.randn(10)
772+
endog_pd = pd.Series(endog,
773+
index=pd.date_range('2000-01-01', periods=10))
774+
resid = innovations_filter(endog, theta)
775+
resid_pd = innovations_filter(endog_pd, theta)
776+
assert_allclose(resid, resid_pd.values)
777+
pd.testing.assert_index_equal(endog_pd.index, resid_pd.index)
778+
779+
780+
def test_innovations_filter_errors():
781+
ma = -0.9
782+
acovf = np.array([1 + ma ** 2, ma])
783+
theta, sigma2 = innovations_algo(acovf, nobs=4)
784+
with pytest.raises(ValueError):
785+
innovations_filter(np.empty((2, 2)), theta)
786+
with pytest.raises(ValueError):
787+
innovations_filter(np.empty(4), theta[:-1])
788+
with pytest.raises(ValueError):
789+
innovations_filter(pd.DataFrame(np.empty((1, 4))), theta)

0 commit comments

Comments
 (0)