Skip to content

Commit 876dd47

Browse files
authored
Merge pull request statsmodels#5956 from bashtage/var-cov-params
BUG: Fix mutidimensional model cov_params when using pandas
2 parents c860d30 + e6fb0ce commit 876dd47

File tree

5 files changed

+154
-57
lines changed

5 files changed

+154
-57
lines changed

statsmodels/base/data.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ class ModelData(object):
5555
appropriate form
5656
"""
5757
_param_names = None
58+
_cov_names = None
5859

5960
def __init__(self, endog, exog=None, missing='none', hasconst=None,
6061
**kwargs):
@@ -352,6 +353,26 @@ def param_names(self):
352353
def param_names(self, values):
353354
self._param_names = values
354355

356+
@property
357+
def cov_names(self):
358+
"""
359+
Labels for covariance matrices
360+
361+
In multidimensional models, each dimension of a covariance matrix
362+
differs from the number of param_names.
363+
364+
If not set, returns param_names
365+
"""
366+
# for handling names of covariance names in multidimensional models
367+
if self._cov_names is not None:
368+
return self._cov_names
369+
return self.param_names
370+
371+
@cov_names.setter
372+
def cov_names(self, value):
373+
# for handling names of covariance names in multidimensional models
374+
self._cov_names = value
375+
355376
@cache_readonly
356377
def row_labels(self):
357378
exog = self.orig_exog
@@ -427,6 +448,8 @@ def wrap_output(self, obj, how='columns', names=None):
427448
return self.attach_generic_columns_2d(obj, names)
428449
elif how == 'ynames':
429450
return self.attach_ynames(obj)
451+
elif how == 'multivariate_confint':
452+
return self.attach_mv_confint(obj)
430453
else:
431454
return obj
432455

@@ -448,6 +471,9 @@ def attach_rows(self, result):
448471
def attach_dates(self, result):
449472
return result
450473

474+
def attach_mv_confint(self, result):
475+
return result
476+
451477
def attach_generic_columns(self, result, *args, **kwargs):
452478
return result
453479

@@ -533,8 +559,7 @@ def attach_columns_eq(self, result):
533559
return DataFrame(result, index=self.xnames, columns=self.ynames)
534560

535561
def attach_cov(self, result):
536-
return DataFrame(result, index=self.param_names,
537-
columns=self.param_names)
562+
return DataFrame(result, index=self.cov_names, columns=self.cov_names)
538563

539564
def attach_cov_eq(self, result):
540565
return DataFrame(result, index=self.ynames, columns=self.ynames)
@@ -565,6 +590,11 @@ def attach_dates(self, result):
565590
return DataFrame(result, index=self.predict_dates,
566591
columns=self.ynames)
567592

593+
def attach_mv_confint(self, result):
594+
return DataFrame(result.reshape((-1, 2)),
595+
index=self.cov_names,
596+
columns=['lower', 'upper'])
597+
568598
def attach_ynames(self, result):
569599
squeezed = result.squeeze()
570600
# May be zero-dim, for example in the case of forecast one step in tsa

statsmodels/discrete/discrete_model.py

Lines changed: 65 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@
1616
W. Greene. `Econometric Analysis`. Prentice Hall, 5th. edition. 2003.
1717
"""
1818
__all__ = ["Poisson", "Logit", "Probit", "MNLogit", "NegativeBinomial",
19-
"GeneralizedPoisson", "NegativeBinomialP"]
19+
"GeneralizedPoisson", "NegativeBinomialP", "CountModel"]
2020

2121
from scipy.special import loggamma
2222

2323
import numpy as np
24-
from pandas import get_dummies
24+
from pandas import get_dummies, MultiIndex
2525

2626
from scipy.special import gammaln, digamma, polygamma
2727
from scipy import stats, special
@@ -2162,7 +2162,20 @@ class MNLogit(MultinomialModel):
21622162
Notes
21632163
-----
21642164
See developer notes for further information on `MNLogit` internals.
2165-
""" % {'extra_params' : base._missing_param_doc}
2165+
""" % {'extra_params': base._missing_param_doc}
2166+
2167+
def __init__(self, endog, exog, **kwargs):
2168+
super(MNLogit, self).__init__(endog, exog, **kwargs)
2169+
2170+
# Override cov_names since multivariate model
2171+
yname = self.endog_names
2172+
ynames = self._ynames_map
2173+
ynames = MultinomialResults._maybe_convert_ynames_int(ynames)
2174+
# use range below to ensure sortedness
2175+
ynames = [ynames[key] for key in range(int(self.J))]
2176+
idx = MultiIndex.from_product((ynames[1:], self.data.xnames),
2177+
names=(yname, None))
2178+
self.data.cov_names = idx
21662179

21672180
def pdf(self, eXB):
21682181
"""
@@ -4050,7 +4063,8 @@ def __init__(self, model, mlefit):
40504063
self.J = model.J
40514064
self.K = model.K
40524065

4053-
def _maybe_convert_ynames_int(self, ynames):
4066+
@staticmethod
4067+
def _maybe_convert_ynames_int(ynames):
40544068
# see if they're integers
40554069
issue_warning = False
40564070
msg = ('endog contains values are that not int-like. Uses string '
@@ -4213,75 +4227,108 @@ def __init__(self, model, mlefit):
42134227

42144228
class OrderedResultsWrapper(lm.RegressionResultsWrapper):
42154229
pass
4230+
4231+
42164232
wrap.populate_wrapper(OrderedResultsWrapper, OrderedResults)
42174233

4234+
42184235
class CountResultsWrapper(lm.RegressionResultsWrapper):
42194236
pass
4237+
4238+
42204239
wrap.populate_wrapper(CountResultsWrapper, CountResults)
42214240

4241+
42224242
class NegativeBinomialResultsWrapper(lm.RegressionResultsWrapper):
42234243
pass
4244+
4245+
42244246
wrap.populate_wrapper(NegativeBinomialResultsWrapper,
42254247
NegativeBinomialResults)
42264248

4249+
42274250
class GeneralizedPoissonResultsWrapper(lm.RegressionResultsWrapper):
42284251
pass
4252+
4253+
42294254
wrap.populate_wrapper(GeneralizedPoissonResultsWrapper,
42304255
GeneralizedPoissonResults)
42314256

4257+
42324258
class PoissonResultsWrapper(lm.RegressionResultsWrapper):
42334259
pass
4234-
#_methods = {
4235-
# "predict_prob" : "rows",
4236-
# }
4237-
#_wrap_methods = lm.wrap.union_dicts(
4238-
# lm.RegressionResultsWrapper._wrap_methods,
4239-
# _methods)
4260+
4261+
42404262
wrap.populate_wrapper(PoissonResultsWrapper, PoissonResults)
42414263

4264+
42424265
class L1CountResultsWrapper(lm.RegressionResultsWrapper):
42434266
pass
42444267

4268+
42454269
class L1PoissonResultsWrapper(lm.RegressionResultsWrapper):
42464270
pass
4247-
#_methods = {
4271+
# _methods = {
42484272
# "predict_prob" : "rows",
42494273
# }
4250-
#_wrap_methods = lm.wrap.union_dicts(
4274+
# _wrap_methods = lm.wrap.union_dicts(
42514275
# lm.RegressionResultsWrapper._wrap_methods,
42524276
# _methods)
4277+
4278+
42534279
wrap.populate_wrapper(L1PoissonResultsWrapper, L1PoissonResults)
42544280

4281+
42554282
class L1NegativeBinomialResultsWrapper(lm.RegressionResultsWrapper):
42564283
pass
4284+
4285+
42574286
wrap.populate_wrapper(L1NegativeBinomialResultsWrapper,
42584287
L1NegativeBinomialResults)
42594288

4289+
42604290
class L1GeneralizedPoissonResultsWrapper(lm.RegressionResultsWrapper):
42614291
pass
4292+
4293+
42624294
wrap.populate_wrapper(L1GeneralizedPoissonResultsWrapper,
42634295
L1GeneralizedPoissonResults)
42644296

4297+
42654298
class BinaryResultsWrapper(lm.RegressionResultsWrapper):
4266-
_attrs = {"resid_dev" : "rows",
4267-
"resid_generalized" : "rows",
4268-
"resid_pearson" : "rows",
4269-
"resid_response" : "rows"
4299+
_attrs = {"resid_dev": "rows",
4300+
"resid_generalized": "rows",
4301+
"resid_pearson": "rows",
4302+
"resid_response": "rows"
42704303
}
42714304
_wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs,
42724305
_attrs)
4306+
4307+
42734308
wrap.populate_wrapper(BinaryResultsWrapper, BinaryResults)
42744309

4310+
42754311
class L1BinaryResultsWrapper(lm.RegressionResultsWrapper):
42764312
pass
4313+
4314+
42774315
wrap.populate_wrapper(L1BinaryResultsWrapper, L1BinaryResults)
42784316

4317+
42794318
class MultinomialResultsWrapper(lm.RegressionResultsWrapper):
4280-
_attrs = {"resid_misclassified" : "rows"}
4319+
_attrs = {"resid_misclassified": "rows"}
42814320
_wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs,
4282-
_attrs)
4321+
_attrs)
4322+
_methods = {'conf_int': 'multivariate_confint'}
4323+
_wrap_methods = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_methods,
4324+
_methods)
4325+
4326+
42834327
wrap.populate_wrapper(MultinomialResultsWrapper, MultinomialResults)
42844328

4329+
42854330
class L1MultinomialResultsWrapper(lm.RegressionResultsWrapper):
42864331
pass
4332+
4333+
42874334
wrap.populate_wrapper(L1MultinomialResultsWrapper, L1MultinomialResults)

statsmodels/discrete/tests/test_discrete.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,19 @@
1313
import warnings
1414

1515
import numpy as np
16-
import pandas as pd
1716
from numpy.testing import (assert_, assert_raises, assert_almost_equal,
1817
assert_equal, assert_array_equal, assert_allclose,
1918
assert_array_less)
19+
import pandas as pd
20+
from pandas.testing import assert_index_equal
2021
import pytest
22+
from scipy import stats
2123

2224
from statsmodels.discrete.discrete_model import (Logit, Probit, MNLogit,
23-
Poisson, NegativeBinomial,
24-
CountModel, GeneralizedPoisson,
25-
NegativeBinomialP)
25+
Poisson, NegativeBinomial,
26+
CountModel,
27+
GeneralizedPoisson,
28+
NegativeBinomialP)
2629
from statsmodels.discrete.discrete_margins import _iscount, _isdummy
2730
import statsmodels.api as sm
2831
import statsmodels.formula.api as smf
@@ -2356,8 +2359,22 @@ def test_unchanging_degrees_of_freedom():
23562359

23572360
def test_mnlogit_float_name():
23582361
df = pd.DataFrame({"A": [0., 1.1, 0, 0, 1.1], "B": [0, 1, 0, 1, 1]})
2359-
result = smf.mnlogit(formula="A ~ B", data=df).fit()
23602362
with pytest.warns(SpecificationWarning,
23612363
match='endog contains values are that not int-like'):
2362-
summ = result.summary().as_text()
2364+
result = smf.mnlogit(formula="A ~ B", data=df).fit()
2365+
summ = result.summary().as_text()
23632366
assert 'A=1.1' in summ
2367+
2368+
2369+
def test_cov_confint_pandas():
2370+
data = sm.datasets.anes96.load(as_pandas=True)
2371+
exog = sm.add_constant(data.exog, prepend=False)
2372+
res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0)
2373+
cov = res1.cov_params()
2374+
ci = res1.conf_int()
2375+
se = np.sqrt(np.diag(cov))
2376+
se2 = (ci.iloc[:, 1] - ci.iloc[:, 0]) / (2 * stats.norm.ppf(0.975))
2377+
assert_allclose(se, se2)
2378+
assert_index_equal(ci.index, cov.index)
2379+
assert_index_equal(cov.index, cov.columns)
2380+
assert isinstance(ci.index, pd.MultiIndex)

statsmodels/tsa/vector_ar/tests/test_var.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import sys
1212

1313
import numpy as np
14+
import pandas as pd
15+
from pandas.testing import assert_index_equal
1416
import pytest
1517

1618

@@ -803,3 +805,15 @@ def test_exog(self):
803805
def test_deprecated_attributes_varresults(bivariate_var_result, attr):
804806
with pytest.warns(FutureWarning):
805807
getattr(bivariate_var_result, attr)
808+
809+
810+
def test_var_cov_params(bivariate_var_data):
811+
df = pd.DataFrame(bivariate_var_data, columns=['x', 'y'])
812+
mod = VAR(df)
813+
res = mod.fit(2)
814+
cov = res.cov_params()
815+
assert isinstance(cov, pd.DataFrame)
816+
exog_names = ('const', 'L1.x', 'L1.y', 'L2.x', 'L2.y')
817+
index = pd.MultiIndex.from_product((exog_names, ('x', 'y')))
818+
assert_index_equal(cov.index, cov.columns)
819+
assert_index_equal(cov.index, index)

0 commit comments

Comments
 (0)