Merge pull request statsmodels#8780 from josef-pkt/ref_getdistribution

josef-pkt · web-flow · commit 4afdb2216769 · 2023-04-13T15:35:37.000-04:00
REF: get_distribution, return 1-d instead of column frozen distribution
diff --git a/statsmodels/discrete/count_model.py b/statsmodels/discrete/count_model.py
@@ -727,7 +727,8 @@ def get_distribution(self, params, exog=None, exog_infl=None,
         w = self.predict(params, exog=exog, exog_infl=exog_infl,
                          exposure=exposure, offset=offset, which="prob-main")
 
-        distr = self.distribution(mu[:, None], 1 - w[:, None])
+        # distr = self.distribution(mu[:, None], 1 - w[:, None])
+        distr = self.distribution(mu, 1 - w)
         return distr
 
 
@@ -842,7 +843,8 @@ def get_distribution(self, params, exog=None, exog_infl=None,
                           exposure=exposure, offset=offset, which="mean-main")
         w = self.predict(params, exog=exog, exog_infl=exog_infl,
                          exposure=exposure, offset=offset, which="prob-main")
-        distr = self.distribution(mu[:, None], params[-1], p, 1 - w[:, None])
+        # distr = self.distribution(mu[:, None], params[-1], p, 1 - w[:, None])
+        distr = self.distribution(mu, params[-1], p, 1 - w)
         return distr
 
 
@@ -958,7 +960,8 @@ def get_distribution(self, params, exog=None, exog_infl=None,
         w = self.predict(params, exog=exog, exog_infl=exog_infl,
                          exposure=exposure, offset=offset, which="prob-main")
 
-        distr = self.distribution(mu[:, None], params[-1], p, 1 - w[:, None])
+        # distr = self.distribution(mu[:, None], params[-1], p, 1 - w[:, None])
+        distr = self.distribution(mu, params[-1], p, 1 - w)
         return distr
 
 
diff --git a/statsmodels/discrete/discrete_model.py b/statsmodels/discrete/discrete_model.py
@@ -677,7 +677,8 @@ def get_distribution(self, params, exog=None, offset=None):
         Instance of frozen scipy distribution.
         """
         mu = self.predict(params, exog=exog, offset=offset)
-        distr = stats.bernoulli(mu[:, None])
+        # distr = stats.bernoulli(mu[:, None])
+        distr = stats.bernoulli(mu)
         return distr
 
 
@@ -1659,7 +1660,7 @@ def predict(self, params, exog=None, exposure=None, offset=None,
                               exposure=exposure, offset=offset,
                               )[:, None]
             # uses broadcasting
-            return stats.poisson.pmf(y_values, mu)
+            return stats.poisson._pmf(y_values, mu)
         else:
             raise ValueError('Value of the `which` option is not recognized')
 
@@ -2281,7 +2282,8 @@ def get_distribution(self, params, exog=None, exposure=None, offset=None):
         """
         mu = self.predict(params, exog=exog, exposure=exposure, offset=offset)
         p = self.parameterization + 1
-        distr = genpoisson_p(mu[:, None], params[-1], p)
+        # distr = genpoisson_p(mu[:, None], params[-1], p)
+        distr = genpoisson_p(mu, params[-1], p)
         return distr
 
 
@@ -3587,8 +3589,13 @@ def predict(self, params, exog=None, exposure=None, offset=None,
                 offset=offset
                 )
             if y_values is None:
-                y_values = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
-            return distr.pmf(y_values)
+                y_values = np.arange(0, np.max(self.endog) + 1)
+            else:
+                y_values = np.asarray(y_values)
+
+            assert y_values.ndim == 1
+            y_values = y_values[..., None]
+            return distr.pmf(y_values).T
 
         exog, offset, exposure = self._get_predict_arrays(
             exog=exog,
@@ -3757,7 +3764,8 @@ def get_distribution(self, params, exog=None, exposure=None, offset=None):
         """
         mu = self.predict(params, exog=exog, exposure=exposure, offset=offset)
         if self.loglike_method == 'geometric':
-            distr = stats.geom(1 / (1 + mu[:, None]), loc=-1)
+            # distr = stats.geom(1 / (1 + mu[:, None]), loc=-1)
+            distr = stats.geom(1 / (1 + mu), loc=-1)
         else:
             if self.loglike_method == 'nb2':
                 p = 2
@@ -3768,7 +3776,8 @@ def get_distribution(self, params, exog=None, exposure=None, offset=None):
             q = 2 - p
             size = 1. / alpha * mu**q
             prob = size / (size + mu)
-            distr = nbinom(size[:, None], prob[:, None])
+            # distr = nbinom(size[:, None], prob[:, None])
+            distr = nbinom(size, prob)
 
         return distr
 
@@ -4343,7 +4352,8 @@ def get_distribution(self, params, exog=None, exposure=None, offset=None):
         """
         mu = self.predict(params, exog=exog, exposure=exposure, offset=offset)
         size, prob = self.convert_params(params, mu)
-        distr = nbinom(size[:, None], prob[:, None])
+        # distr = nbinom(size[:, None], prob[:, None])
+        distr = nbinom(size, prob)
         return distr
 
 
diff --git a/statsmodels/discrete/tests/test_conditional.py b/statsmodels/discrete/tests/test_conditional.py
@@ -20,12 +20,12 @@ def test_logit_1d():
     for x in -1, 0, 1, 2:
         params = np.r_[x, ]
         _, grad = model._denom_grad(0, params)
-        ngrad = approx_fprime(params, lambda x: model._denom(0, x))
+        ngrad = approx_fprime(params, lambda x: model._denom(0, x)).squeeze()
         assert_allclose(grad, ngrad)
 
     # Check the gradient for the loglikelihood
     for x in -1, 0, 1, 2:
-        grad = approx_fprime(np.r_[x, ], model.loglike)
+        grad = approx_fprime(np.r_[x, ], model.loglike).squeeze()
         score = model.score(np.r_[x, ])
         assert_allclose(grad, score, rtol=1e-4)
 
@@ -117,7 +117,7 @@ def test_poisson_1d():
 
     # Check the gradient for the loglikelihood
     for x in -1, 0, 1, 2:
-        grad = approx_fprime(np.r_[x, ], model.loglike)
+        grad = approx_fprime(np.r_[x, ], model.loglike).squeeze()
         score = model.score(np.r_[x, ])
         assert_allclose(grad, score, rtol=1e-4)
 
diff --git a/statsmodels/discrete/tests/test_predict.py b/statsmodels/discrete/tests/test_predict.py
@@ -359,6 +359,7 @@ def test_distr(case):
     # res = mod.fit()
     params_dgp = params
     distr = mod.get_distribution(params_dgp)
+    assert distr.pmf(1).ndim == 1
     try:
         y2 = distr.rvs(size=(nobs, 1)).squeeze()
     except ValueError:
@@ -378,7 +379,14 @@ def test_distr(case):
     assert_allclose(res.resid_pearson, (y2 - mean) / np.sqrt(var_), rtol=1e-13)
 
     if not issubclass(cls_model, BinaryModel):
+        # smoke, shape, consistency test
+        probs = res.predict(which="prob", y_values=np.arange(5))
+        assert probs.shape == (len(mod.endog), 5)
+        probs2 = res.get_prediction(
+            which="prob", y_values=np.arange(5), average=True)
+        assert_allclose(probs2.predicted, probs.mean(0), rtol=1e-10)
         dia = res.get_diagnostic()
+        dia.probs_predicted
         # fig = dia.plot_probs();
         # fig.suptitle(cls_model.__name__ + repr(kwds), fontsize=16)
 
diff --git a/statsmodels/discrete/truncated_model.py b/statsmodels/discrete/truncated_model.py
@@ -383,12 +383,13 @@ def predict(self, params, exog=None, exposure=None, offset=None,
             return probs
         elif which == 'prob-base':
             if y_values is not None:
-                counts = np.atleast_2d(y_values)
+                counts = np.asarray(y_values)
             else:
-                counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
+                counts = np.arange(0, np.max(self.endog)+1)
+
             probs = self.model_main.predict(
                 params, exog=exog, exposure=np.exp(exposure),
-                offset=offset, which="prob", y_values=counts)[:, None]
+                offset=offset, which="prob", y_values=counts)
             return probs
         elif which == 'var':
             mu = np.exp(linpred)
diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
@@ -2149,6 +2149,68 @@ def get_influence(self, observed=True):
                          hat_matrix_diag=hat_matrix_diag)
         return infl
 
+    def get_distribution(self, exog=None, exposure=None,
+                         offset=None, var_weights=1., n_trials=1.):
+        """
+        Return a instance of the predictive distribution.
+
+        Parameters
+        ----------
+        scale : scalar
+            The scale parameter.
+        exog : array_like
+            The predictor variable matrix.
+        offset : array_like or None
+            Offset variable for predicted mean.
+        exposure : array_like or None
+            Log(exposure) will be added to the linear prediction.
+        var_weights : array_like
+            1d array of variance (analytic) weights. The default is None.
+        n_trials : int
+            Number of trials for the binomial distribution. The default is 1
+            which corresponds to a Bernoulli random variable.
+
+        Returns
+        -------
+        gen
+            Instance of a scipy frozen distribution based on estimated
+            parameters.
+            Use the ``rvs`` method to generate random values.
+
+        Notes
+        -----
+        Due to the behavior of ``scipy.stats.distributions objects``, the
+        returned random number generator must be called with ``gen.rvs(n)``
+        where ``n`` is the number of observations in the data set used
+        to fit the model.  If any other value is used for ``n``, misleading
+        results will be produced.
+        """
+        # Note this is mostly a copy of GLM.get_prediction
+        # calling here results.predict avoids the exog check and trasnform
+
+        if isinstance(self.model.family, (families.Binomial, families.Poisson,
+                                    families.NegativeBinomial)):
+            # use scale=1, independent of QMLE scale for discrete
+            scale = 1.
+            if self.scale != 1.:
+                msg = "using scale=1, no exess dispersion in distribution"
+                warnings.warn(msg, UserWarning)
+        else:
+            scale = self.scale
+
+        mu = self.predict(exog, exposure, offset, which="mean")
+
+        kwds = {}
+        if (np.any(n_trials != 1) and
+                isinstance(self.model.family, families.Binomial)):
+
+            kwds["n_trials"] = n_trials
+
+        distr = self.model.family.get_distribution(
+            mu, scale, var_weights=var_weights, **kwds)
+        return distr
+
+
     @Appender(base.LikelihoodModelResults.remove_data.__doc__)
     def remove_data(self):
         # GLM has alias/reference in result instance
diff --git a/statsmodels/genmod/tests/test_glm.py b/statsmodels/genmod/tests/test_glm.py
@@ -255,6 +255,15 @@ def test_get_distribution(self):
         var_ = res1.predict(which="var_unscaled")
         assert_allclose(var_ * res_scale, var_endog, rtol=1e-13)
 
+        # check get_distribution of results instance
+        if getattr(self, "has_edispersion", False):
+            with pytest.warns(UserWarning, match="using scale=1"):
+                distr3 = res1.get_distribution()
+        else:
+            distr3 = res1.get_distribution()
+        for k in distr2.kwds:
+            assert_allclose(distr3.kwds[k], distr2.kwds[k], rtol=1e-13)
+
 
 class CheckComparisonMixin:
 
@@ -865,6 +874,7 @@ def setup_class(cls):
         res2 = Committee()
         res2.aic_R += 2 # They do not count a degree of freedom for the scale
         cls.res2 = res2
+        cls.has_edispersion = True
 
 # FIXME: enable or delete
 #    def setup_method(self):
diff --git a/statsmodels/othermod/betareg.py b/statsmodels/othermod/betareg.py
@@ -134,6 +134,8 @@ def __init__(self, endog, exog, exog_precision=None,
             self._init_keys.extend(['exog_precision'])
         self._init_keys.extend(['link', 'link_precision'])
         self._null_drop_keys = ['exog_precision']
+        del kwds['extra_params_names']
+        self._check_kwargs(kwds)
         self.results_class = BetaResults
         self.results_class_wrapper = BetaResultsWrapper
 
diff --git a/statsmodels/othermod/tests/test_beta.py b/statsmodels/othermod/tests/test_beta.py
@@ -2,11 +2,16 @@
 import io
 import os
 
+import pytest
+
 import numpy as np
 from numpy.testing import assert_allclose, assert_equal
 import pandas as pd
 import patsy
 from statsmodels.api import families
+from statsmodels.tools.sm_exceptions import (
+    ValueWarning,
+    )
 from statsmodels.othermod.betareg import BetaModel
 from .results import results_betareg as resultsb
 
@@ -126,6 +131,12 @@ def test_precision_formula(self):
         assert_close(rslt.params, self.meth_fit.params, 1e-10)
         assert isinstance(rslt.params, pd.Series)
 
+        with pytest.warns(ValueWarning, match="unknown kwargs"):
+            BetaModel.from_formula(self.model, methylation,
+                                   exog_precision_formula='~ age',
+                                   link_precision=links.Identity(),
+                                   junk=False)
+
     def test_scores(self):
         model, Z = self.model, self.Z
         for link in (links.Identity(), links.Log()):
diff --git a/statsmodels/tools/numdiff.py b/statsmodels/tools/numdiff.py
@@ -158,7 +158,10 @@ def approx_fprime(x, f, epsilon=None, args=(), kwargs={}, centered=False):
                           f(*((x-ei,)+args), **kwargs))/(2 * epsilon[k])
             ei[k] = 0.0
 
-    return grad.squeeze().T
+    if n == 1:
+        return grad.T
+    else:
+        return grad.squeeze().T
 
 
 def _approx_fprime_scalar(x, f, epsilon=None, args=(), kwargs={},