Skip to content

Commit f3fba98

Browse files
committed
BUG: Fix MNLogit cov_params when using pandas
Fix cov_params and conf_int when using pandas closes statsmodels#814
1 parent 5134c97 commit f3fba98

File tree

5 files changed

+120
-29
lines changed

5 files changed

+120
-29
lines changed

statsmodels/base/data.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,9 @@ def cov_names(self):
362362
If not set, returns param_names
363363
"""
364364
# for handling names of covariance names in multidimensional models
365-
return self._cov_names or self.param_names
365+
if self._cov_names is not None:
366+
return self._cov_names
367+
return self.param_names
366368

367369
@cov_names.setter
368370
def cov_names(self, value):
@@ -444,6 +446,8 @@ def wrap_output(self, obj, how='columns', names=None):
444446
return self.attach_generic_columns_2d(obj, names)
445447
elif how == 'ynames':
446448
return self.attach_ynames(obj)
449+
elif how == 'multivariate_confint':
450+
return self.attach_mv_confint(obj)
447451
else:
448452
return obj
449453

@@ -465,6 +469,9 @@ def attach_rows(self, result):
465469
def attach_dates(self, result):
466470
return result
467471

472+
def attach_mv_confint(self, result):
473+
return result
474+
468475
def attach_generic_columns(self, result, *args, **kwargs):
469476
return result
470477

@@ -581,6 +588,11 @@ def attach_dates(self, result):
581588
return DataFrame(result, index=self.predict_dates,
582589
columns=self.ynames)
583590

591+
def attach_mv_confint(self, result):
592+
return DataFrame(result.reshape((-1, 2)),
593+
index=self.cov_names,
594+
columns=['lower', 'upper'])
595+
584596
def attach_ynames(self, result):
585597
squeezed = result.squeeze()
586598
# May be zero-dim, for example in the case of forecast one step in tsa

statsmodels/discrete/discrete_model.py

Lines changed: 65 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
W. Greene. `Econometric Analysis`. Prentice Hall, 5th. edition. 2003.
1717
"""
1818
__all__ = ["Poisson", "Logit", "Probit", "MNLogit", "NegativeBinomial",
19-
"GeneralizedPoisson", "NegativeBinomialP"]
19+
"GeneralizedPoisson", "NegativeBinomialP", "CountModel"]
2020

2121
from statsmodels.compat.python import range
2222
from scipy.special import loggamma
2323

2424
import numpy as np
25-
from pandas import get_dummies
25+
from pandas import get_dummies, MultiIndex
2626

2727
from scipy.special import gammaln, digamma, polygamma
2828
from scipy import stats, special
@@ -2163,7 +2163,20 @@ class MNLogit(MultinomialModel):
21632163
Notes
21642164
-----
21652165
See developer notes for further information on `MNLogit` internals.
2166-
""" % {'extra_params' : base._missing_param_doc}
2166+
""" % {'extra_params': base._missing_param_doc}
2167+
2168+
def __init__(self, endog, exog, **kwargs):
2169+
super(MNLogit, self).__init__(endog, exog, **kwargs)
2170+
2171+
# Override cov_names since multivariate model
2172+
yname = self.endog_names
2173+
ynames = self._ynames_map
2174+
ynames = MultinomialResults._maybe_convert_ynames_int(ynames)
2175+
# use range below to ensure sortedness
2176+
ynames = [ynames[key] for key in range(int(self.J))]
2177+
idx = MultiIndex.from_product((ynames[1:], self.data.xnames),
2178+
names=(yname, None))
2179+
self.data.cov_names = idx
21672180

21682181
def pdf(self, eXB):
21692182
"""
@@ -4051,7 +4064,8 @@ def __init__(self, model, mlefit):
40514064
self.J = model.J
40524065
self.K = model.K
40534066

4054-
def _maybe_convert_ynames_int(self, ynames):
4067+
@staticmethod
4068+
def _maybe_convert_ynames_int(ynames):
40554069
# see if they're integers
40564070
issue_warning = False
40574071
msg = ('endog contains values are that not int-like. Uses string '
@@ -4214,75 +4228,108 @@ def __init__(self, model, mlefit):
42144228

42154229
class OrderedResultsWrapper(lm.RegressionResultsWrapper):
42164230
pass
4231+
4232+
42174233
wrap.populate_wrapper(OrderedResultsWrapper, OrderedResults)
42184234

4235+
42194236
class CountResultsWrapper(lm.RegressionResultsWrapper):
42204237
pass
4238+
4239+
42214240
wrap.populate_wrapper(CountResultsWrapper, CountResults)
42224241

4242+
42234243
class NegativeBinomialResultsWrapper(lm.RegressionResultsWrapper):
42244244
pass
4245+
4246+
42254247
wrap.populate_wrapper(NegativeBinomialResultsWrapper,
42264248
NegativeBinomialResults)
42274249

4250+
42284251
class GeneralizedPoissonResultsWrapper(lm.RegressionResultsWrapper):
42294252
pass
4253+
4254+
42304255
wrap.populate_wrapper(GeneralizedPoissonResultsWrapper,
42314256
GeneralizedPoissonResults)
42324257

4258+
42334259
class PoissonResultsWrapper(lm.RegressionResultsWrapper):
42344260
pass
4235-
#_methods = {
4236-
# "predict_prob" : "rows",
4237-
# }
4238-
#_wrap_methods = lm.wrap.union_dicts(
4239-
# lm.RegressionResultsWrapper._wrap_methods,
4240-
# _methods)
4261+
4262+
42414263
wrap.populate_wrapper(PoissonResultsWrapper, PoissonResults)
42424264

4265+
42434266
class L1CountResultsWrapper(lm.RegressionResultsWrapper):
42444267
pass
42454268

4269+
42464270
class L1PoissonResultsWrapper(lm.RegressionResultsWrapper):
42474271
pass
4248-
#_methods = {
4272+
# _methods = {
42494273
# "predict_prob" : "rows",
42504274
# }
4251-
#_wrap_methods = lm.wrap.union_dicts(
4275+
# _wrap_methods = lm.wrap.union_dicts(
42524276
# lm.RegressionResultsWrapper._wrap_methods,
42534277
# _methods)
4278+
4279+
42544280
wrap.populate_wrapper(L1PoissonResultsWrapper, L1PoissonResults)
42554281

4282+
42564283
class L1NegativeBinomialResultsWrapper(lm.RegressionResultsWrapper):
42574284
pass
4285+
4286+
42584287
wrap.populate_wrapper(L1NegativeBinomialResultsWrapper,
42594288
L1NegativeBinomialResults)
42604289

4290+
42614291
class L1GeneralizedPoissonResultsWrapper(lm.RegressionResultsWrapper):
42624292
pass
4293+
4294+
42634295
wrap.populate_wrapper(L1GeneralizedPoissonResultsWrapper,
42644296
L1GeneralizedPoissonResults)
42654297

4298+
42664299
class BinaryResultsWrapper(lm.RegressionResultsWrapper):
4267-
_attrs = {"resid_dev" : "rows",
4268-
"resid_generalized" : "rows",
4269-
"resid_pearson" : "rows",
4270-
"resid_response" : "rows"
4300+
_attrs = {"resid_dev": "rows",
4301+
"resid_generalized": "rows",
4302+
"resid_pearson": "rows",
4303+
"resid_response": "rows"
42714304
}
42724305
_wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs,
42734306
_attrs)
4307+
4308+
42744309
wrap.populate_wrapper(BinaryResultsWrapper, BinaryResults)
42754310

4311+
42764312
class L1BinaryResultsWrapper(lm.RegressionResultsWrapper):
42774313
pass
4314+
4315+
42784316
wrap.populate_wrapper(L1BinaryResultsWrapper, L1BinaryResults)
42794317

4318+
42804319
class MultinomialResultsWrapper(lm.RegressionResultsWrapper):
4281-
_attrs = {"resid_misclassified" : "rows"}
4320+
_attrs = {"resid_misclassified": "rows"}
42824321
_wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs,
4283-
_attrs)
4322+
_attrs)
4323+
_methods = {'conf_int': 'multivariate_confint'}
4324+
_wrap_methods = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_methods,
4325+
_methods)
4326+
4327+
42844328
wrap.populate_wrapper(MultinomialResultsWrapper, MultinomialResults)
42854329

4330+
42864331
class L1MultinomialResultsWrapper(lm.RegressionResultsWrapper):
42874332
pass
4333+
4334+
42884335
wrap.populate_wrapper(L1MultinomialResultsWrapper, L1MultinomialResults)

statsmodels/discrete/tests/test_discrete.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,19 @@
1414
import warnings
1515

1616
import numpy as np
17-
import pandas as pd
1817
from numpy.testing import (assert_, assert_raises, assert_almost_equal,
1918
assert_equal, assert_array_equal, assert_allclose,
2019
assert_array_less)
20+
import pandas as pd
21+
from pandas.testing import assert_index_equal
2122
import pytest
23+
from scipy import stats
2224

2325
from statsmodels.discrete.discrete_model import (Logit, Probit, MNLogit,
24-
Poisson, NegativeBinomial,
25-
CountModel, GeneralizedPoisson,
26-
NegativeBinomialP)
26+
Poisson, NegativeBinomial,
27+
CountModel,
28+
GeneralizedPoisson,
29+
NegativeBinomialP)
2730
from statsmodels.discrete.discrete_margins import _iscount, _isdummy
2831
import statsmodels.api as sm
2932
import statsmodels.formula.api as smf
@@ -2357,8 +2360,22 @@ def test_unchanging_degrees_of_freedom():
23572360

23582361
def test_mnlogit_float_name():
23592362
df = pd.DataFrame({"A": [0., 1.1, 0, 0, 1.1], "B": [0, 1, 0, 1, 1]})
2360-
result = smf.mnlogit(formula="A ~ B", data=df).fit()
23612363
with pytest.warns(SpecificationWarning,
23622364
match='endog contains values are that not int-like'):
2363-
summ = result.summary().as_text()
2365+
result = smf.mnlogit(formula="A ~ B", data=df).fit()
2366+
summ = result.summary().as_text()
23642367
assert 'A=1.1' in summ
2368+
2369+
2370+
def test_cov_confint_pandas():
2371+
data = sm.datasets.anes96.load(as_pandas=True)
2372+
exog = sm.add_constant(data.exog, prepend=False)
2373+
res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0)
2374+
cov = res1.cov_params()
2375+
ci = res1.conf_int()
2376+
se = np.sqrt(np.diag(cov))
2377+
se2 = (ci.iloc[:, 1] - ci.iloc[:, 0]) / (2 * stats.norm.ppf(0.975))
2378+
assert_allclose(se, se2)
2379+
assert_index_equal(ci.index, cov.index)
2380+
assert_index_equal(cov.index, cov.columns)
2381+
assert isinstance(ci.index, pd.MultiIndex)

statsmodels/tsa/vector_ar/tests/test_var.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import sys
1111

1212
import numpy as np
13+
import pandas as pd
14+
from pandas.testing import assert_index_equal
1315
import pytest
1416

1517

@@ -802,3 +804,15 @@ def test_exog(self):
802804
def test_deprecated_attributes_varresults(bivariate_var_result, attr):
803805
with pytest.warns(FutureWarning):
804806
getattr(bivariate_var_result, attr)
807+
808+
809+
def test_var_cov_params(bivariate_var_data):
810+
df = pd.DataFrame(bivariate_var_data, columns=['x', 'y'])
811+
mod = VAR(df)
812+
res = mod.fit(2)
813+
cov = res.cov_params()
814+
assert isinstance(cov, pd.DataFrame)
815+
exog_names = ('const', 'L1.x', 'L1.y', 'L2.x', 'L2.y')
816+
index = pd.MultiIndex.from_product((exog_names, ('x', 'y')))
817+
assert_index_equal(cov.index, cov.columns)
818+
assert_index_equal(cov.index, index)

statsmodels/tsa/vector_ar/var_model.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from collections import defaultdict
1414

1515
import numpy as np
16+
import pandas as pd
1617
import scipy.linalg
1718
import scipy.stats as stats
1819

@@ -640,9 +641,8 @@ def fit(self, maxlags=None, method='ols', ic=None, trend='c',
640641
self.data.xnames = (self.data.xnames[:k_trend] +
641642
x_names_to_add +
642643
self.data.xnames[k_trend:])
643-
self.data.cov_names = ['.'.join((str(yn), str(xn)))
644-
for xn in self.data.xnames
645-
for yn in self.data.ynames]
644+
self.data.cov_names = pd.MultiIndex.from_product((self.data.xnames,
645+
self.data.ynames))
646646
return self._estimate_var(lags, trend=trend)
647647

648648
def _estimate_var(self, lags, offset=0, trend='c'):
@@ -2137,9 +2137,10 @@ class VARResultsWrapper(wrap.ResultsWrapper):
21372137
'stderr': 'columns_eq'}
21382138
_wrap_attrs = wrap.union_dicts(TimeSeriesResultsWrapper._wrap_attrs,
21392139
_attrs)
2140-
_methods = {}
2140+
_methods = {'conf_int': 'multivariate_confint'}
21412141
_wrap_methods = wrap.union_dicts(TimeSeriesResultsWrapper._wrap_methods,
21422142
_methods)
2143+
21432144
wrap.populate_wrapper(VARResultsWrapper, VARResults) # noqa:E305
21442145

21452146

0 commit comments

Comments
 (0)