BUG: Fix MNLogit cov_params when using pandas

bashtage · bashtage · commit f3fba98d3bb1 · 2019-07-18T16:35:12.000+01:00
Fix cov_params and conf_int when using pandas closes statsmodels#814
diff --git a/statsmodels/base/data.py b/statsmodels/base/data.py
@@ -362,7 +362,9 @@ def cov_names(self):
         If not set, returns param_names
         """
         # for handling names of covariance names in multidimensional models
-        return self._cov_names or self.param_names
+        if self._cov_names is not None:
+            return self._cov_names
+        return self.param_names
 
     @cov_names.setter
     def cov_names(self, value):
@@ -444,6 +446,8 @@ def wrap_output(self, obj, how='columns', names=None):
             return self.attach_generic_columns_2d(obj, names)
         elif how == 'ynames':
             return self.attach_ynames(obj)
+        elif how == 'multivariate_confint':
+            return self.attach_mv_confint(obj)
         else:
             return obj
 
@@ -465,6 +469,9 @@ def attach_rows(self, result):
     def attach_dates(self, result):
         return result
 
+    def attach_mv_confint(self, result):
+        return result
+
     def attach_generic_columns(self, result, *args, **kwargs):
         return result
 
@@ -581,6 +588,11 @@ def attach_dates(self, result):
             return DataFrame(result, index=self.predict_dates,
                              columns=self.ynames)
 
+    def attach_mv_confint(self, result):
+        return DataFrame(result.reshape((-1, 2)),
+                         index=self.cov_names,
+                         columns=['lower', 'upper'])
+
     def attach_ynames(self, result):
         squeezed = result.squeeze()
         # May be zero-dim, for example in the case of forecast one step in tsa
diff --git a/statsmodels/discrete/discrete_model.py b/statsmodels/discrete/discrete_model.py
@@ -16,13 +16,13 @@
 W. Greene. `Econometric Analysis`. Prentice Hall, 5th. edition. 2003.
 """
 __all__ = ["Poisson", "Logit", "Probit", "MNLogit", "NegativeBinomial",
-           "GeneralizedPoisson", "NegativeBinomialP"]
+           "GeneralizedPoisson", "NegativeBinomialP", "CountModel"]
 
 from statsmodels.compat.python import range
 from scipy.special import loggamma
 
 import numpy as np
-from pandas import get_dummies
+from pandas import get_dummies, MultiIndex
 
 from scipy.special import gammaln, digamma, polygamma
 from scipy import stats, special
@@ -2163,7 +2163,20 @@ class MNLogit(MultinomialModel):
     Notes
     -----
     See developer notes for further information on `MNLogit` internals.
-    """ % {'extra_params' : base._missing_param_doc}
+    """ % {'extra_params': base._missing_param_doc}
+
+    def __init__(self, endog, exog, **kwargs):
+        super(MNLogit, self).__init__(endog, exog, **kwargs)
+
+        # Override cov_names since multivariate model
+        yname = self.endog_names
+        ynames = self._ynames_map
+        ynames = MultinomialResults._maybe_convert_ynames_int(ynames)
+        # use range below to ensure sortedness
+        ynames = [ynames[key] for key in range(int(self.J))]
+        idx = MultiIndex.from_product((ynames[1:], self.data.xnames),
+                                      names=(yname, None))
+        self.data.cov_names = idx
 
     def pdf(self, eXB):
         """
@@ -4051,7 +4064,8 @@ def __init__(self, model, mlefit):
         self.J = model.J
         self.K = model.K
 
-    def _maybe_convert_ynames_int(self, ynames):
+    @staticmethod
+    def _maybe_convert_ynames_int(ynames):
         # see if they're integers
         issue_warning = False
         msg = ('endog contains values are that not int-like. Uses string '
@@ -4214,75 +4228,108 @@ def __init__(self, model, mlefit):
 
 class OrderedResultsWrapper(lm.RegressionResultsWrapper):
     pass
+
+
 wrap.populate_wrapper(OrderedResultsWrapper, OrderedResults)
 
+
 class CountResultsWrapper(lm.RegressionResultsWrapper):
     pass
+
+
 wrap.populate_wrapper(CountResultsWrapper, CountResults)
 
+
 class NegativeBinomialResultsWrapper(lm.RegressionResultsWrapper):
     pass
+
+
 wrap.populate_wrapper(NegativeBinomialResultsWrapper,
                       NegativeBinomialResults)
 
+
 class GeneralizedPoissonResultsWrapper(lm.RegressionResultsWrapper):
     pass
+
+
 wrap.populate_wrapper(GeneralizedPoissonResultsWrapper,
                       GeneralizedPoissonResults)
 
+
 class PoissonResultsWrapper(lm.RegressionResultsWrapper):
     pass
-    #_methods = {
-    #        "predict_prob" : "rows",
-    #        }
-    #_wrap_methods = lm.wrap.union_dicts(
-    #                            lm.RegressionResultsWrapper._wrap_methods,
-    #                            _methods)
+
+
 wrap.populate_wrapper(PoissonResultsWrapper, PoissonResults)
 
+
 class L1CountResultsWrapper(lm.RegressionResultsWrapper):
     pass
 
+
 class L1PoissonResultsWrapper(lm.RegressionResultsWrapper):
     pass
-    #_methods = {
+    # _methods = {
     #        "predict_prob" : "rows",
     #        }
-    #_wrap_methods = lm.wrap.union_dicts(
+    # _wrap_methods = lm.wrap.union_dicts(
     #                            lm.RegressionResultsWrapper._wrap_methods,
     #                            _methods)
+
+
 wrap.populate_wrapper(L1PoissonResultsWrapper, L1PoissonResults)
 
+
 class L1NegativeBinomialResultsWrapper(lm.RegressionResultsWrapper):
     pass
+
+
 wrap.populate_wrapper(L1NegativeBinomialResultsWrapper,
                       L1NegativeBinomialResults)
 
+
 class L1GeneralizedPoissonResultsWrapper(lm.RegressionResultsWrapper):
     pass
+
+
 wrap.populate_wrapper(L1GeneralizedPoissonResultsWrapper,
                       L1GeneralizedPoissonResults)
 
+
 class BinaryResultsWrapper(lm.RegressionResultsWrapper):
-    _attrs = {"resid_dev" : "rows",
-              "resid_generalized" : "rows",
-              "resid_pearson" : "rows",
-              "resid_response" : "rows"
+    _attrs = {"resid_dev": "rows",
+              "resid_generalized": "rows",
+              "resid_pearson": "rows",
+              "resid_response": "rows"
               }
     _wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs,
                                    _attrs)
+
+
 wrap.populate_wrapper(BinaryResultsWrapper, BinaryResults)
 
+
 class L1BinaryResultsWrapper(lm.RegressionResultsWrapper):
     pass
+
+
 wrap.populate_wrapper(L1BinaryResultsWrapper, L1BinaryResults)
 
+
 class MultinomialResultsWrapper(lm.RegressionResultsWrapper):
-    _attrs = {"resid_misclassified" : "rows"}
+    _attrs = {"resid_misclassified": "rows"}
     _wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs,
-            _attrs)
+                                   _attrs)
+    _methods = {'conf_int': 'multivariate_confint'}
+    _wrap_methods = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_methods,
+                                     _methods)
+
+
 wrap.populate_wrapper(MultinomialResultsWrapper, MultinomialResults)
 
+
 class L1MultinomialResultsWrapper(lm.RegressionResultsWrapper):
     pass
+
+
 wrap.populate_wrapper(L1MultinomialResultsWrapper, L1MultinomialResults)
diff --git a/statsmodels/discrete/tests/test_discrete.py b/statsmodels/discrete/tests/test_discrete.py
@@ -14,16 +14,19 @@
 import warnings
 
 import numpy as np
-import pandas as pd
 from numpy.testing import (assert_, assert_raises, assert_almost_equal,
                            assert_equal, assert_array_equal, assert_allclose,
                            assert_array_less)
+import pandas as pd
+from pandas.testing import assert_index_equal
 import pytest
+from scipy import stats
 
 from statsmodels.discrete.discrete_model import (Logit, Probit, MNLogit,
-                                                Poisson, NegativeBinomial,
-                                                CountModel, GeneralizedPoisson,
-                                                NegativeBinomialP)
+                                                 Poisson, NegativeBinomial,
+                                                 CountModel,
+                                                 GeneralizedPoisson,
+                                                 NegativeBinomialP)
 from statsmodels.discrete.discrete_margins import _iscount, _isdummy
 import statsmodels.api as sm
 import statsmodels.formula.api as smf
@@ -2357,8 +2360,22 @@ def test_unchanging_degrees_of_freedom():
 
 def test_mnlogit_float_name():
     df = pd.DataFrame({"A": [0., 1.1, 0, 0, 1.1], "B": [0, 1, 0, 1, 1]})
-    result = smf.mnlogit(formula="A ~ B", data=df).fit()
     with pytest.warns(SpecificationWarning,
                       match='endog contains values are that not int-like'):
-        summ = result.summary().as_text()
+        result = smf.mnlogit(formula="A ~ B", data=df).fit()
+    summ = result.summary().as_text()
     assert 'A=1.1' in summ
+
+
+def test_cov_confint_pandas():
+    data = sm.datasets.anes96.load(as_pandas=True)
+    exog = sm.add_constant(data.exog, prepend=False)
+    res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0)
+    cov = res1.cov_params()
+    ci = res1.conf_int()
+    se = np.sqrt(np.diag(cov))
+    se2 = (ci.iloc[:, 1] - ci.iloc[:, 0]) / (2 * stats.norm.ppf(0.975))
+    assert_allclose(se, se2)
+    assert_index_equal(ci.index, cov.index)
+    assert_index_equal(cov.index, cov.columns)
+    assert isinstance(ci.index, pd.MultiIndex)
diff --git a/statsmodels/tsa/vector_ar/tests/test_var.py b/statsmodels/tsa/vector_ar/tests/test_var.py
@@ -10,6 +10,8 @@
 import sys
 
 import numpy as np
+import pandas as pd
+from pandas.testing import assert_index_equal
 import pytest
 
 
@@ -802,3 +804,15 @@ def test_exog(self):
 def test_deprecated_attributes_varresults(bivariate_var_result, attr):
     with pytest.warns(FutureWarning):
         getattr(bivariate_var_result, attr)
+
+
+def test_var_cov_params(bivariate_var_data):
+    df = pd.DataFrame(bivariate_var_data, columns=['x', 'y'])
+    mod = VAR(df)
+    res = mod.fit(2)
+    cov = res.cov_params()
+    assert isinstance(cov, pd.DataFrame)
+    exog_names = ('const', 'L1.x', 'L1.y', 'L2.x', 'L2.y')
+    index = pd.MultiIndex.from_product((exog_names, ('x', 'y')))
+    assert_index_equal(cov.index, cov.columns)
+    assert_index_equal(cov.index, index)
diff --git a/statsmodels/tsa/vector_ar/var_model.py b/statsmodels/tsa/vector_ar/var_model.py
@@ -13,6 +13,7 @@
 from collections import defaultdict
 
 import numpy as np
+import pandas as pd
 import scipy.linalg
 import scipy.stats as stats
 
@@ -640,9 +641,8 @@ def fit(self, maxlags=None, method='ols', ic=None, trend='c',
             self.data.xnames = (self.data.xnames[:k_trend] +
                                 x_names_to_add +
                                 self.data.xnames[k_trend:])
-        self.data.cov_names = ['.'.join((str(yn), str(xn)))
-                               for xn in self.data.xnames
-                               for yn in self.data.ynames]
+        self.data.cov_names = pd.MultiIndex.from_product((self.data.xnames,
+                                                          self.data.ynames))
         return self._estimate_var(lags, trend=trend)
 
     def _estimate_var(self, lags, offset=0, trend='c'):
@@ -2137,9 +2137,10 @@ class VARResultsWrapper(wrap.ResultsWrapper):
               'stderr': 'columns_eq'}
     _wrap_attrs = wrap.union_dicts(TimeSeriesResultsWrapper._wrap_attrs,
                                    _attrs)
-    _methods = {}
+    _methods = {'conf_int': 'multivariate_confint'}
     _wrap_methods = wrap.union_dicts(TimeSeriesResultsWrapper._wrap_methods,
                                      _methods)
+
 wrap.populate_wrapper(VARResultsWrapper, VARResults)  # noqa:E305