Skip to content

Commit 8766afb

Browse files
authored
Merge pull request statsmodels#5042 from bashtage/innovations-algo
ENH: Add innovations algorithm
2 parents a146631 + 75f857d commit 8766afb

File tree

7 files changed

+244
-20
lines changed

7 files changed

+244
-20
lines changed

statsmodels/base/tests/test_generic_methods.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
1111
Author: Josef Perktold
1212
"""
13+
from statsmodels.compat.pandas import assert_series_equal, assert_index_equal
1314
from statsmodels.compat.python import range
1415

1516
import numpy as np
@@ -660,12 +661,6 @@ def test_predict_missing(self):
660661
ex.iloc[0, 1] = np.nan
661662
predicted1 = self.res.predict(ex)
662663
predicted2 = self.res.predict(ex[1:])
663-
from pandas.util.testing import assert_series_equal
664-
try:
665-
from pandas.util.testing import assert_index_equal
666-
except ImportError:
667-
# for old pandas
668-
from numpy.testing import assert_array_equal as assert_index_equal
669664

670665
assert_index_equal(predicted1.index, ex.index)
671666
assert_series_equal(predicted1[1:], predicted2)

statsmodels/base/tests/test_predict.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
"""
33
Tests for Results.predict
44
"""
5+
from statsmodels.compat.pandas import testing as pdt
56

67
import numpy as np
78
import pandas as pd
89

910
from numpy.testing import assert_allclose, assert_equal
10-
import pandas.util.testing as pdt
1111

1212
from statsmodels.regression.linear_model import OLS
1313
from statsmodels.genmod.generalized_linear_model import GLM

statsmodels/compat/pandas.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,12 @@ def sort_values(df, *args, **kwargs):
2929

3030
data_klasses = (pandas.Series, pandas.DataFrame, pandas.Panel,
3131
pandas.WidePanel)
32+
33+
try:
34+
import pandas.testing as testing
35+
except ImportError:
36+
import pandas.util.testing as testing
37+
38+
assert_frame_equal = testing.assert_frame_equal
39+
assert_index_equal = testing.assert_index_equal
40+
assert_series_equal = testing.assert_series_equal

statsmodels/stats/tests/test_influence.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,12 @@
44
55
Author: Josef Perktold
66
"""
7+
from statsmodels.compat.pandas import testing as pdt
78

89
import os.path
910
import numpy as np
1011
from numpy.testing import assert_allclose
1112
import pandas as pd
12-
try:
13-
import pandas.testing as pdt
14-
except ImportError:
15-
import pandas.util.testing as pdt
1613

1714
import pytest
1815

statsmodels/tools/testing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
"""assert functions from numpy and pandas testing
22
33
"""
4+
from statsmodels.compat.pandas import testing as pdt
45

56
import re
67

78
import numpy.testing as npt
89
import pandas
9-
import pandas.util.testing as pdt
1010

1111
# for pandas version check
1212
def strip_rc(version):

statsmodels/tsa/stattools.py

Lines changed: 153 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
"""
22
Statistical tools for time series analysis
33
"""
4+
from statsmodels.compat.python import (iteritems, range, lrange, string_types,
5+
lzip, zip, long)
6+
from statsmodels.compat.scipy import _next_regular
7+
48
import numpy as np
9+
import pandas as pd
510
from numpy.linalg import LinAlgError
611
from scipy import stats
712

8-
from statsmodels.compat.python import (iteritems, range, lrange, string_types,
9-
lzip, zip, long)
10-
from statsmodels.compat.scipy import _next_regular
1113
from statsmodels.regression.linear_model import OLS, yule_walker
1214
from statsmodels.tools.sm_exceptions import (InterpolationWarning,
1315
MissingDataError,
@@ -20,7 +22,8 @@
2022

2123
__all__ = ['acovf', 'acf', 'pacf', 'pacf_yw', 'pacf_ols', 'ccovf', 'ccf',
2224
'periodogram', 'q_stat', 'coint', 'arma_order_select_ic',
23-
'adfuller', 'kpss', 'bds']
25+
'adfuller', 'kpss', 'bds', 'pacf_burg', 'innovations_algo',
26+
'innovations_filter', 'levinson_durbin_pacf', 'levinson_durbin']
2427

2528
SQRTEPS = np.sqrt(np.finfo(np.double).eps)
2629

@@ -950,6 +953,152 @@ def levinson_durbin_pacf(pacf, nlags=None):
950953
return arcoefs, acf
951954

952955

956+
def innovations_algo(acov, nobs=None, rtol=None):
957+
"""
958+
Innovations algorithm to convert autocovariances to MA parameters
959+
960+
Parameters
961+
----------
962+
acov : array-like
963+
Array containing autocovariances including lag 0
964+
nobs : int, optional
965+
Number of periods to run the algorithm. If not provided, nobs is
966+
equal to the length of acovf
967+
rtol : float, optional
968+
Tolerance used to check for convergence. Default value is 0 which will
969+
never prematurely end the algorithm. Checks after 10 iterations and
970+
stops if sigma2[i] - sigma2[i - 10] < rtol * sigma2[0]. When the
971+
stopping condition is met, the remaining values in theta and sigma2
972+
are forward filled using the value of the final iteration.
973+
974+
Returns
975+
-------
976+
theta : ndarray
977+
Innovation coefficients of MA representation. Array is (nobs, q) where
978+
q is the largest index of a non-zero autocovariance. theta
979+
corresponds to the first q columns of the coefficient matrix in the
980+
common description of the innovation algorithm.
981+
sigma2 : ndarray
982+
The prediction error variance (nobs,).
983+
984+
Examples
985+
--------
986+
>>> import statsmodels.api as sm
987+
>>> data = sm.datasets.macrodata.load_pandas()
988+
>>> rgdpg = data.data['realgdp'].pct_change().dropna()
989+
>>> acov = sm.tsa.acovf(rgdpg)
990+
>>> nobs = activity.shape[0]
991+
>>> theta, sigma2 = innovations_algo(acov[:4], nobs=nobs)
992+
993+
See also
994+
--------
995+
innovations_filter
996+
997+
References
998+
----------
999+
Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series and
1000+
forecasting. Springer.
1001+
"""
1002+
acov = np.squeeze(np.asarray(acov))
1003+
if acov.ndim != 1:
1004+
raise ValueError('acov must be 1-d or squeezable to 1-d.')
1005+
rtol = 0.0 if rtol is None else rtol
1006+
if not isinstance(rtol, float):
1007+
raise ValueError('rtol must be a non-negative float or None.')
1008+
n = acov.shape[0] if nobs is None else int(nobs)
1009+
if n != nobs or nobs < 1:
1010+
raise ValueError('nobs must be a positive integer')
1011+
max_lag = int(np.max(np.argwhere(acov != 0)))
1012+
1013+
v = np.zeros(n + 1)
1014+
v[0] = acov[0]
1015+
# Retain only the relevant columns of theta
1016+
theta = np.zeros((n + 1, max_lag + 1))
1017+
for i in range(1, n):
1018+
for k in range(max(i - max_lag, 0), i):
1019+
sub = 0
1020+
for j in range(max(i - max_lag, 0), k):
1021+
sub += theta[k, k - j] * theta[i, i - j] * v[j]
1022+
theta[i, i - k] = 1. / v[k] * (acov[i - k] - sub)
1023+
v[i] = acov[0]
1024+
for j in range(max(i - max_lag, 0), i):
1025+
v[i] -= theta[i, i - j] ** 2 * v[j]
1026+
# Break if v has converged
1027+
if i >= 10:
1028+
if v[i - 10] - v[i] < v[0] * rtol:
1029+
# Forward fill all remaining values
1030+
v[i + 1:] = v[i]
1031+
theta[i + 1:] = theta[i]
1032+
break
1033+
1034+
theta = theta[:-1, 1:]
1035+
v = v[:-1]
1036+
return theta, v
1037+
1038+
1039+
def innovations_filter(endog, theta):
1040+
"""
1041+
Filter observations using the innovations algorithm
1042+
1043+
Parameters
1044+
----------
1045+
endog : array-like
1046+
The time series to filter (nobs,). Should be demeaned if not mean 0.
1047+
theta : ndarray
1048+
Innovation coefficients of MA representation. Array must be (nobs, q)
1049+
where q order of the MA.
1050+
1051+
Returns
1052+
-------
1053+
resid : ndarray
1054+
Array of filtered innovations
1055+
1056+
Examples
1057+
--------
1058+
>>> import statsmodels.api as sm
1059+
>>> data = sm.datasets.macrodata.load_pandas()
1060+
>>> rgdpg = data.data['realgdp'].pct_change().dropna()
1061+
>>> acov = sm.tsa.acovf(rgdpg)
1062+
>>> nobs = activity.shape[0]
1063+
>>> theta, sigma2 = innovations_algo(acov[:4], nobs=nobs)
1064+
>>> resid = innovations_filter(rgdpg, theta)
1065+
1066+
See also
1067+
--------
1068+
innovations_algo
1069+
1070+
References
1071+
----------
1072+
Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series and
1073+
forecasting. Springer.
1074+
"""
1075+
orig_endog = endog
1076+
endog = np.squeeze(np.asarray(endog))
1077+
if endog.ndim != 1:
1078+
raise ValueError('endog must be 1-d or squeezable to 1-d.')
1079+
nobs = endog.shape[0]
1080+
n_theta, k = theta.shape
1081+
if nobs != n_theta:
1082+
raise ValueError('theta must be (nobs, q) where q is the moder order')
1083+
is_pandas = isinstance(orig_endog, (pd.DataFrame, pd.Series))
1084+
if is_pandas:
1085+
if len(orig_endog.index) != nobs:
1086+
msg = 'If endog is a Series or DataFrame, the index must ' \
1087+
'correspond to the number of time series observations.'
1088+
raise ValueError(msg)
1089+
u = np.empty(nobs)
1090+
u[0] = endog[0]
1091+
for i in range(1, nobs):
1092+
if i < k:
1093+
hat = (theta[i, :i] * u[:i][::-1]).sum()
1094+
else:
1095+
hat = (theta[i] * u[i - k:i][::-1]).sum()
1096+
u[i] = endog[i] + hat
1097+
if is_pandas:
1098+
u = pd.Series(u, index=orig_endog.index.copy())
1099+
return u
1100+
1101+
9531102
def grangercausalitytests(x, maxlag, addconst=True, verbose=True):
9541103
"""four tests for granger non causality of 2 timeseries
9551104

statsmodels/tsa/tests/test_stattools.py

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from statsmodels.compat.python import lrange, PY3
1+
from statsmodels.compat.pandas import assert_index_equal
2+
from statsmodels.compat.python import lrange
23

34
import os
45
import warnings
@@ -17,9 +18,8 @@
1718
pacf, grangercausalitytests,
1819
coint, acovf, kpss,
1920
arma_order_select_ic, levinson_durbin,
20-
levinson_durbin_pacf,
21-
pacf_burg)
22-
21+
levinson_durbin_pacf, pacf_burg,
22+
innovations_algo, innovations_filter)
2323

2424
DECIMAL_8 = 8
2525
DECIMAL_6 = 6
@@ -713,3 +713,77 @@ def test_pacf_burg_error():
713713
pacf_burg(np.empty((20,2)), 10)
714714
with pytest.raises(ValueError):
715715
pacf_burg(np.empty(100), 101)
716+
717+
718+
def test_innovations_algo_brockwell_davis():
719+
ma = -0.9
720+
acovf = np.array([1 + ma ** 2, ma])
721+
theta, sigma2 = innovations_algo(acovf, nobs=4)
722+
exp_theta = np.array([[0], [-.4972], [-.6606], [-.7404]])
723+
assert_allclose(theta, exp_theta, rtol=1e-4)
724+
assert_allclose(sigma2, [1.81, 1.3625, 1.2155, 1.1436], rtol=1e-4)
725+
726+
theta, sigma2 = innovations_algo(acovf, nobs=500)
727+
assert_allclose(theta[-1, 0], ma)
728+
assert_allclose(sigma2[-1], 1.0)
729+
730+
731+
def test_innovations_algo_rtol():
732+
ma = np.array([-0.9, 0.5])
733+
acovf = np.array([1 + (ma ** 2).sum(), ma[0] + ma[1] * ma[0], ma[1]])
734+
theta, sigma2 = innovations_algo(acovf, nobs=500)
735+
theta_2, sigma2_2 = innovations_algo(acovf, nobs=500, rtol=1e-8)
736+
assert_allclose(theta, theta_2)
737+
assert_allclose(sigma2, sigma2_2)
738+
739+
740+
def test_innovations_errors():
741+
ma = -0.9
742+
acovf = np.array([1 + ma ** 2, ma])
743+
with pytest.raises(ValueError):
744+
innovations_algo(acovf, nobs=2.2)
745+
with pytest.raises(ValueError):
746+
innovations_algo(acovf, nobs=-1)
747+
with pytest.raises(ValueError):
748+
innovations_algo(np.empty((2, 2)))
749+
with pytest.raises(ValueError):
750+
innovations_algo(acovf, rtol='none')
751+
752+
753+
def test_innovations_filter_brockwell_davis():
754+
ma = -0.9
755+
acovf = np.array([1 + ma ** 2, ma])
756+
theta, _ = innovations_algo(acovf, nobs=4)
757+
e = np.random.randn(5)
758+
endog = e[1:] + ma * e[:-1]
759+
resid = innovations_filter(endog, theta)
760+
expected = [endog[0]]
761+
for i in range(1, 4):
762+
expected.append(endog[i] + theta[i, 0] * expected[-1])
763+
expected = np.array(expected)
764+
assert_allclose(resid, expected)
765+
766+
767+
def test_innovations_filter_pandas():
768+
ma = np.array([-0.9, 0.5])
769+
acovf = np.array([1 + (ma ** 2).sum(), ma[0] + ma[1] * ma[0], ma[1]])
770+
theta, _ = innovations_algo(acovf, nobs=10)
771+
endog = np.random.randn(10)
772+
endog_pd = pd.Series(endog,
773+
index=pd.date_range('2000-01-01', periods=10))
774+
resid = innovations_filter(endog, theta)
775+
resid_pd = innovations_filter(endog_pd, theta)
776+
assert_allclose(resid, resid_pd.values)
777+
assert_index_equal(endog_pd.index, resid_pd.index)
778+
779+
780+
def test_innovations_filter_errors():
781+
ma = -0.9
782+
acovf = np.array([1 + ma ** 2, ma])
783+
theta, _ = innovations_algo(acovf, nobs=4)
784+
with pytest.raises(ValueError):
785+
innovations_filter(np.empty((2, 2)), theta)
786+
with pytest.raises(ValueError):
787+
innovations_filter(np.empty(4), theta[:-1])
788+
with pytest.raises(ValueError):
789+
innovations_filter(pd.DataFrame(np.empty((1, 4))), theta)

0 commit comments

Comments
 (0)