Merge pull request statsmodels#5042 from bashtage/innovations-algo

josef-pkt · web-flow · commit 8766afbcef07 · 2018-09-16T08:17:24.000-04:00
ENH: Add innovations algorithm
diff --git a/statsmodels/base/tests/test_generic_methods.py b/statsmodels/base/tests/test_generic_methods.py
@@ -10,6 +10,7 @@
 
 Author: Josef Perktold
 """
+from statsmodels.compat.pandas import assert_series_equal, assert_index_equal
 from statsmodels.compat.python import range
 
 import numpy as np
@@ -660,12 +661,6 @@ def test_predict_missing(self):
         ex.iloc[0, 1] = np.nan
         predicted1 = self.res.predict(ex)
         predicted2 = self.res.predict(ex[1:])
-        from pandas.util.testing import assert_series_equal
-        try:
-            from pandas.util.testing import assert_index_equal
-        except ImportError:
-            # for old pandas
-            from numpy.testing import assert_array_equal as assert_index_equal
 
         assert_index_equal(predicted1.index, ex.index)
         assert_series_equal(predicted1[1:], predicted2)
diff --git a/statsmodels/base/tests/test_predict.py b/statsmodels/base/tests/test_predict.py
@@ -2,12 +2,12 @@
 """
 Tests for Results.predict
 """
+from statsmodels.compat.pandas import testing as pdt
 
 import numpy as np
 import pandas as pd
 
 from numpy.testing import assert_allclose, assert_equal
-import pandas.util.testing as pdt
 
 from statsmodels.regression.linear_model import OLS
 from statsmodels.genmod.generalized_linear_model import GLM
diff --git a/statsmodels/compat/pandas.py b/statsmodels/compat/pandas.py
@@ -29,3 +29,12 @@ def sort_values(df, *args, **kwargs):
 
     data_klasses = (pandas.Series, pandas.DataFrame, pandas.Panel,
                     pandas.WidePanel)
+
+try:
+    import pandas.testing as testing
+except ImportError:
+    import pandas.util.testing as testing
+
+assert_frame_equal = testing.assert_frame_equal
+assert_index_equal = testing.assert_index_equal
+assert_series_equal = testing.assert_series_equal
diff --git a/statsmodels/stats/tests/test_influence.py b/statsmodels/stats/tests/test_influence.py
@@ -4,15 +4,12 @@
 
 Author: Josef Perktold
 """
+from statsmodels.compat.pandas import testing as pdt
 
 import os.path
 import numpy as np
 from numpy.testing import assert_allclose
 import pandas as pd
-try:
-    import pandas.testing as pdt
-except ImportError:
-    import pandas.util.testing as pdt
 
 import pytest
 
diff --git a/statsmodels/tools/testing.py b/statsmodels/tools/testing.py
@@ -1,12 +1,12 @@
 """assert functions from numpy and pandas testing
 
 """
+from statsmodels.compat.pandas import testing as pdt
 
 import re
 
 import numpy.testing as npt
 import pandas
-import pandas.util.testing as pdt
 
 # for pandas version check
 def strip_rc(version):
diff --git a/statsmodels/tsa/stattools.py b/statsmodels/tsa/stattools.py
@@ -1,13 +1,15 @@
 """
 Statistical tools for time series analysis
 """
+from statsmodels.compat.python import (iteritems, range, lrange, string_types,
+                                       lzip, zip, long)
+from statsmodels.compat.scipy import _next_regular
+
 import numpy as np
+import pandas as pd
 from numpy.linalg import LinAlgError
 from scipy import stats
 
-from statsmodels.compat.python import (iteritems, range, lrange, string_types,
-                                       lzip, zip, long)
-from statsmodels.compat.scipy import _next_regular
 from statsmodels.regression.linear_model import OLS, yule_walker
 from statsmodels.tools.sm_exceptions import (InterpolationWarning,
                                              MissingDataError,
@@ -20,7 +22,8 @@
 
 __all__ = ['acovf', 'acf', 'pacf', 'pacf_yw', 'pacf_ols', 'ccovf', 'ccf',
            'periodogram', 'q_stat', 'coint', 'arma_order_select_ic',
-           'adfuller', 'kpss', 'bds']
+           'adfuller', 'kpss', 'bds', 'pacf_burg', 'innovations_algo',
+           'innovations_filter', 'levinson_durbin_pacf', 'levinson_durbin']
 
 SQRTEPS = np.sqrt(np.finfo(np.double).eps)
 
@@ -950,6 +953,152 @@ def levinson_durbin_pacf(pacf, nlags=None):
     return arcoefs, acf
 
 
+def innovations_algo(acov, nobs=None, rtol=None):
+    """
+    Innovations algorithm to convert autocovariances to MA parameters
+
+    Parameters
+    ----------
+    acov : array-like
+        Array containing autocovariances including lag 0
+    nobs : int, optional
+        Number of periods to run the algorithm.  If not provided, nobs is
+        equal to the length of acovf
+    rtol : float, optional
+        Tolerance used to check for convergence. Default value is 0 which will
+        never prematurely end the algorithm. Checks after 10 iterations and
+        stops if sigma2[i] - sigma2[i - 10] < rtol * sigma2[0]. When the
+        stopping condition is met, the remaining values in theta and sigma2
+        are forward filled using the value of the final iteration.
+
+    Returns
+    -------
+    theta : ndarray
+        Innovation coefficients of MA representation. Array is (nobs, q) where
+        q is the largest index of a non-zero autocovariance. theta
+        corresponds to the first q columns of the coefficient matrix in the
+        common description of the innovation algorithm.
+    sigma2 : ndarray
+        The prediction error variance (nobs,).
+
+    Examples
+    --------
+    >>> import statsmodels.api as sm
+    >>> data = sm.datasets.macrodata.load_pandas()
+    >>> rgdpg = data.data['realgdp'].pct_change().dropna()
+    >>> acov = sm.tsa.acovf(rgdpg)
+    >>> nobs = activity.shape[0]
+    >>> theta, sigma2  = innovations_algo(acov[:4], nobs=nobs)
+
+    See also
+    --------
+    innovations_filter
+
+    References
+    ----------
+    Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series and
+        forecasting. Springer.
+    """
+    acov = np.squeeze(np.asarray(acov))
+    if acov.ndim != 1:
+        raise ValueError('acov must be 1-d or squeezable to 1-d.')
+    rtol = 0.0 if rtol is None else rtol
+    if not isinstance(rtol, float):
+        raise ValueError('rtol must be a non-negative float or None.')
+    n = acov.shape[0] if nobs is None else int(nobs)
+    if n != nobs or nobs < 1:
+        raise ValueError('nobs must be a positive integer')
+    max_lag = int(np.max(np.argwhere(acov != 0)))
+
+    v = np.zeros(n + 1)
+    v[0] = acov[0]
+    # Retain only the relevant columns of theta
+    theta = np.zeros((n + 1, max_lag + 1))
+    for i in range(1, n):
+        for k in range(max(i - max_lag, 0), i):
+            sub = 0
+            for j in range(max(i - max_lag, 0), k):
+                sub += theta[k, k - j] * theta[i, i - j] * v[j]
+            theta[i, i - k] = 1. / v[k] * (acov[i - k] - sub)
+            v[i] = acov[0]
+        for j in range(max(i - max_lag, 0), i):
+            v[i] -= theta[i, i - j] ** 2 * v[j]
+        # Break if v has converged
+        if i >= 10:
+            if v[i - 10] - v[i] < v[0] * rtol:
+                # Forward fill all remaining values
+                v[i + 1:] = v[i]
+                theta[i + 1:] = theta[i]
+                break
+
+    theta = theta[:-1, 1:]
+    v = v[:-1]
+    return theta, v
+
+
+def innovations_filter(endog, theta):
+    """
+    Filter observations using the innovations algorithm
+
+    Parameters
+    ----------
+    endog : array-like
+        The time series to filter (nobs,). Should be demeaned if not mean 0.
+    theta : ndarray
+        Innovation coefficients of MA representation. Array must be (nobs, q)
+        where q order of the MA.
+
+    Returns
+    -------
+    resid : ndarray
+        Array of filtered innovations
+
+    Examples
+    --------
+    >>> import statsmodels.api as sm
+    >>> data = sm.datasets.macrodata.load_pandas()
+    >>> rgdpg = data.data['realgdp'].pct_change().dropna()
+    >>> acov = sm.tsa.acovf(rgdpg)
+    >>> nobs = activity.shape[0]
+    >>> theta, sigma2  = innovations_algo(acov[:4], nobs=nobs)
+    >>> resid = innovations_filter(rgdpg, theta)
+
+    See also
+    --------
+    innovations_algo
+
+    References
+    ----------
+    Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series and
+        forecasting. Springer.
+    """
+    orig_endog = endog
+    endog = np.squeeze(np.asarray(endog))
+    if endog.ndim != 1:
+        raise ValueError('endog must be 1-d or squeezable to 1-d.')
+    nobs = endog.shape[0]
+    n_theta, k = theta.shape
+    if nobs != n_theta:
+        raise ValueError('theta must be (nobs, q) where q is the moder order')
+    is_pandas = isinstance(orig_endog, (pd.DataFrame, pd.Series))
+    if is_pandas:
+        if len(orig_endog.index) != nobs:
+            msg = 'If endog is a Series or DataFrame, the index must ' \
+                  'correspond to the number of time series observations.'
+            raise ValueError(msg)
+    u = np.empty(nobs)
+    u[0] = endog[0]
+    for i in range(1, nobs):
+        if i < k:
+            hat = (theta[i, :i] * u[:i][::-1]).sum()
+        else:
+            hat = (theta[i] * u[i - k:i][::-1]).sum()
+        u[i] = endog[i] + hat
+    if is_pandas:
+        u = pd.Series(u, index=orig_endog.index.copy())
+    return u
+
+
 def grangercausalitytests(x, maxlag, addconst=True, verbose=True):
     """four tests for granger non causality of 2 timeseries
 
diff --git a/statsmodels/tsa/tests/test_stattools.py b/statsmodels/tsa/tests/test_stattools.py
@@ -1,4 +1,5 @@
-from statsmodels.compat.python import lrange, PY3
+from statsmodels.compat.pandas import assert_index_equal
+from statsmodels.compat.python import lrange
 
 import os
 import warnings
@@ -17,9 +18,8 @@
                                        pacf, grangercausalitytests,
                                        coint, acovf, kpss,
                                        arma_order_select_ic, levinson_durbin,
-                                       levinson_durbin_pacf,
-                                       pacf_burg)
-
+                                       levinson_durbin_pacf, pacf_burg,
+                                       innovations_algo, innovations_filter)
 
 DECIMAL_8 = 8
 DECIMAL_6 = 6
@@ -713,3 +713,77 @@ def test_pacf_burg_error():
         pacf_burg(np.empty((20,2)), 10)
     with pytest.raises(ValueError):
         pacf_burg(np.empty(100), 101)
+
+
+def test_innovations_algo_brockwell_davis():
+    ma = -0.9
+    acovf = np.array([1 + ma ** 2, ma])
+    theta, sigma2 = innovations_algo(acovf, nobs=4)
+    exp_theta = np.array([[0], [-.4972], [-.6606], [-.7404]])
+    assert_allclose(theta, exp_theta, rtol=1e-4)
+    assert_allclose(sigma2, [1.81, 1.3625, 1.2155, 1.1436], rtol=1e-4)
+
+    theta, sigma2 = innovations_algo(acovf, nobs=500)
+    assert_allclose(theta[-1, 0], ma)
+    assert_allclose(sigma2[-1], 1.0)
+
+
+def test_innovations_algo_rtol():
+    ma = np.array([-0.9, 0.5])
+    acovf = np.array([1 + (ma ** 2).sum(), ma[0] + ma[1] * ma[0], ma[1]])
+    theta, sigma2 = innovations_algo(acovf, nobs=500)
+    theta_2, sigma2_2 = innovations_algo(acovf, nobs=500, rtol=1e-8)
+    assert_allclose(theta, theta_2)
+    assert_allclose(sigma2, sigma2_2)
+
+
+def test_innovations_errors():
+    ma = -0.9
+    acovf = np.array([1 + ma ** 2, ma])
+    with pytest.raises(ValueError):
+        innovations_algo(acovf, nobs=2.2)
+    with pytest.raises(ValueError):
+        innovations_algo(acovf, nobs=-1)
+    with pytest.raises(ValueError):
+        innovations_algo(np.empty((2, 2)))
+    with pytest.raises(ValueError):
+        innovations_algo(acovf, rtol='none')
+
+
+def test_innovations_filter_brockwell_davis():
+    ma = -0.9
+    acovf = np.array([1 + ma ** 2, ma])
+    theta, _ = innovations_algo(acovf, nobs=4)
+    e = np.random.randn(5)
+    endog = e[1:] + ma * e[:-1]
+    resid = innovations_filter(endog, theta)
+    expected = [endog[0]]
+    for i in range(1, 4):
+        expected.append(endog[i] + theta[i, 0] * expected[-1])
+    expected = np.array(expected)
+    assert_allclose(resid, expected)
+
+
+def test_innovations_filter_pandas():
+    ma = np.array([-0.9, 0.5])
+    acovf = np.array([1 + (ma ** 2).sum(), ma[0] + ma[1] * ma[0], ma[1]])
+    theta, _ = innovations_algo(acovf, nobs=10)
+    endog = np.random.randn(10)
+    endog_pd = pd.Series(endog,
+                         index=pd.date_range('2000-01-01', periods=10))
+    resid = innovations_filter(endog, theta)
+    resid_pd = innovations_filter(endog_pd, theta)
+    assert_allclose(resid, resid_pd.values)
+    assert_index_equal(endog_pd.index, resid_pd.index)
+
+
+def test_innovations_filter_errors():
+    ma = -0.9
+    acovf = np.array([1 + ma ** 2, ma])
+    theta, _ = innovations_algo(acovf, nobs=4)
+    with pytest.raises(ValueError):
+        innovations_filter(np.empty((2, 2)), theta)
+    with pytest.raises(ValueError):
+        innovations_filter(np.empty(4), theta[:-1])
+    with pytest.raises(ValueError):
+        innovations_filter(pd.DataFrame(np.empty((1, 4))), theta)