cppt
diff --git a/‎examples/notebooks/postestimation_poisson.ipynb
Lines changed: 66 additions & 54 deletions b/‎examples/notebooks/postestimation_poisson.ipynb
Lines changed: 66 additions & 54 deletions
diff --git a/‎statsmodels/discrete/_diagnostics_count.py
Lines changed: 194 additions & 0 deletions b/‎statsmodels/discrete/_diagnostics_count.py
Lines changed: 194 additions & 0 deletions
diff --git a/‎statsmodels/discrete/diagnostic.py
Lines changed: 8 additions & 8 deletions b/‎statsmodels/discrete/diagnostic.py
Lines changed: 8 additions & 8 deletions
diff --git a/‎statsmodels/discrete/tests/test_diagnostic.py
Lines changed: 3 additions & 2 deletions b/‎statsmodels/discrete/tests/test_diagnostic.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎statsmodels/discrete/tests/test_predict.py
Lines changed: 1 addition & 1 deletion b/‎statsmodels/discrete/tests/test_predict.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎statsmodels/genmod/tests/test_score_test.py
Lines changed: 5 additions & 4 deletions b/‎statsmodels/genmod/tests/test_score_test.py
Lines changed: 5 additions & 4 deletions
@@ -8,6 +8,8 @@
 import numpy as np
 from scipy import stats
 
+import pandas as pd
+
 from statsmodels.stats.base import HolderTuple
 from statsmodels.discrete.discrete_model import Poisson
 from statsmodels.regression.linear_model import OLS
@@ -227,6 +229,198 @@ def test_chisquare_prob(results, probs, bin_edges=None, method=None):
     return res
 
 
+class DispersionResults(HolderTuple):
+
+    def summary_frame(self):
+        frame = pd.DataFrame({
+            "statistic": self.statistic,
+            "pvalue": self.pvalue,
+            "method": self.method,
+            "alternative": self.alternative
+            })
+
+        return frame
+
+
+def test_poisson_dispersion(results, method="all", _old=False):
+    """Score/LM type tests for Poisson variance assumptions
+
+    Null Hypothesis is
+
+    H0: var(y) = E(y) and assuming E(y) is correctly specified
+    H1: var(y) ~= E(y)
+
+    The tests are based on the constrained model, i.e. the Poisson model.
+    The tests differ in their assumed alternatives, and in their maintained
+    assumptions.
+
+    Parameters
+    ----------
+    results : Poisson results instance
+        This can be a results instance for either a discrete Poisson or a GLM
+        with family Poisson.
+    method : str
+        Not used yet. Currently results for all methods are returned.
+    _old : bool
+        Temporary keyword for backwards compatibility, will be removed
+        in future version of statsmodels.
+
+    Returns
+    -------
+    res : instance
+        The instance of DispersionResults has the hypothesis test results,
+        statistic, pvalue, method, alternative, as main attributes and a
+        summary_frame method that returns the results as pandas DataFrame.
+
+    """
+
+    if method not in ["all"]:
+        raise ValueError(f'unknown method "{method}"')
+
+    if hasattr(results, '_results'):
+        results = results._results
+
+    endog = results.model.endog
+    nobs = endog.shape[0]  # TODO: use attribute, may need to be added
+    fitted = results.predict()
+    # fitted = results.fittedvalues  # discrete has linear prediction
+    # this assumes Poisson
+    resid2 = results.resid_response**2
+    var_resid_endog = (resid2 - endog)
+    var_resid_fitted = (resid2 - fitted)
+    std1 = np.sqrt(2 * (fitted**2).sum())
+
+    var_resid_endog_sum = var_resid_endog.sum()
+    dean_a = var_resid_fitted.sum() / std1
+    dean_b = var_resid_endog_sum / std1
+    dean_c = (var_resid_endog / fitted).sum() / np.sqrt(2 * nobs)
+
+    pval_dean_a = 2 * stats.norm.sf(np.abs(dean_a))
+    pval_dean_b = 2 * stats.norm.sf(np.abs(dean_b))
+    pval_dean_c = 2 * stats.norm.sf(np.abs(dean_c))
+
+    results_all = [[dean_a, pval_dean_a],
+                   [dean_b, pval_dean_b],
+                   [dean_c, pval_dean_c]]
+    description = [['Dean A', 'mu (1 + a mu)'],
+                   ['Dean B', 'mu (1 + a mu)'],
+                   ['Dean C', 'mu (1 + a)']]
+
+    # Cameron Trived auxiliary regression page 78 count book 1989
+    endog_v = var_resid_endog / fitted
+    res_ols_nb2 = OLS(endog_v, fitted).fit(use_t=False)
+    stat_ols_nb2 = res_ols_nb2.tvalues[0]
+    pval_ols_nb2 = res_ols_nb2.pvalues[0]
+    results_all.append([stat_ols_nb2, pval_ols_nb2])
+    description.append(['CT nb2', 'mu (1 + a mu)'])
+
+    res_ols_nb1 = OLS(endog_v, fitted).fit(use_t=False)
+    stat_ols_nb1 = res_ols_nb1.tvalues[0]
+    pval_ols_nb1 = res_ols_nb1.pvalues[0]
+    results_all.append([stat_ols_nb1, pval_ols_nb1])
+    description.append(['CT nb1', 'mu (1 + a)'])
+
+    endog_v = var_resid_endog / fitted
+    res_ols_nb2 = OLS(endog_v, fitted).fit(cov_type='HC3', use_t=False)
+    stat_ols_hc1_nb2 = res_ols_nb2.tvalues[0]
+    pval_ols_hc1_nb2 = res_ols_nb2.pvalues[0]
+    results_all.append([stat_ols_hc1_nb2, pval_ols_hc1_nb2])
+    description.append(['CT nb2 HC3', 'mu (1 + a mu)'])
+
+    res_ols_nb1 = OLS(endog_v, np.ones(len(endog_v))).fit(cov_type='HC3',
+                                                          use_t=False)
+    stat_ols_hc1_nb1 = res_ols_nb1.tvalues[0]
+    pval_ols_hc1_nb1 = res_ols_nb1.pvalues[0]
+    results_all.append([stat_ols_hc1_nb1, pval_ols_hc1_nb1])
+    description.append(['CT nb1 HC3', 'mu (1 + a)'])
+
+    results_all = np.array(results_all)
+    if _old:
+        # for backwards compatibility in 0.14, remove in later versions
+        return results_all, description
+    else:
+        res = DispersionResults(
+            statistic=results_all[:, 0],
+            pvalue=results_all[:, 1],
+            method=[i[0] for i in description],
+            alternative=[i[1] for i in description],
+            name="Poisson Dispersion Test"
+            )
+        return res
+
+
+def _test_poisson_dispersion_generic(
+        results,
+        exog_new_test,
+        exog_new_control=None,
+        include_score=False,
+        use_endog=True,
+        cov_type='HC3',
+        cov_kwds=None,
+        use_t=False
+        ):
+    """A variable addition test for the variance function
+
+    This uses an artificial regression to calculate a variant of an LM or
+    generalized score test for the specification of the variance assumption
+    in a Poisson model. The performed test is a Wald test on the coefficients
+    of the `exog_new_test`.
+
+    Warning: insufficiently tested, especially for options
+    """
+
+    if hasattr(results, '_results'):
+        results = results._results
+
+    endog = results.model.endog
+    nobs = endog.shape[0]   # TODO: use attribute, may need to be added
+    # fitted = results.fittedvalues  # generic has linpred as fittedvalues
+    fitted = results.predict()
+    resid2 = results.resid_response**2
+    # the following assumes Poisson
+    if use_endog:
+        var_resid = (resid2 - endog)
+    else:
+        var_resid = (resid2 - fitted)
+
+    endog_v = var_resid / fitted
+
+    k_constraints = exog_new_test.shape[1]
+    ex_list = [exog_new_test]
+    if include_score:
+        score_obs = results.model.score_obs(results.params)
+        ex_list.append(score_obs)
+
+    if exog_new_control is not None:
+        ex_list.append(score_obs)
+
+    if len(ex_list) > 1:
+        ex = np.column_stack(ex_list)
+        use_wald = True
+    else:
+        ex = ex_list[0]  # no control variables in exog
+        use_wald = False
+
+    res_ols = OLS(endog_v, ex).fit(cov_type=cov_type, cov_kwds=cov_kwds,
+                                   use_t=use_t)
+
+    if use_wald:
+        # we have controls and need to test coefficients
+        k_vars = ex.shape[1]
+        constraints = np.eye(k_constraints, k_vars)
+        ht = res_ols.wald_test(constraints)
+        stat_ols = ht.statistic
+        pval_ols = ht.pvalue
+    else:
+        # we do not have controls and can use overall fit
+        nobs = endog_v.shape[0]
+        rsquared_noncentered = 1 - res_ols.ssr/res_ols.uncentered_tss
+        stat_ols = nobs * rsquared_noncentered
+        pval_ols = stats.chi2.sf(stat_ols, k_constraints)
+
+    return stat_ols, pval_ols
+
+
 def test_poisson_zeroinflation_jh(results_poisson, exog_infl=None):
     """score test for zero inflation or deflation in Poisson
 
 
@@ -13,15 +13,12 @@
 
 from statsmodels.tools.decorators import cache_readonly
 
-from statsmodels.stats._diagnostic_other import (
-    dispersion_poisson,
-    # dispersion_poisson_generic,
-    )
-
 from statsmodels.stats.diagnostic_gen import (
     test_chisquare_binning
     )
 from statsmodels.discrete._diagnostics_count import (
+    test_poisson_dispersion,
+    # _test_poisson_dispersion_generic,
     test_poisson_zeroinflation_jh,
     test_poisson_zeroinflation_broek,
     test_poisson_zeros,
@@ -37,7 +34,10 @@ class CountDiagnostic:
 
     Parameters
     ----------
-    results : PoissonResults instance
+    results : Results instance of a count model.
+    y_max : int
+        Largest count to include when computing predicted probabilities for
+        counts. Default is the largest observed count.
 
     """
 
@@ -48,7 +48,7 @@ def __init__(self, results, y_max=None):
     @cache_readonly
     def probs_predicted(self):
         if self.y_max is not None:
-            kwds = {"y_values": np.arange(self.y_max)}
+            kwds = {"y_values": np.arange(self.y_max + 1)}
         else:
             kwds = {}
         return self.results.predict(which="prob", **kwds)
@@ -147,7 +147,7 @@ def test_dispersion(self):
         -------
         dispersion results
         """
-        res = dispersion_poisson(self.results)
+        res = test_poisson_dispersion(self.results)
         return res
 
     def test_poisson_zeroinflation(self, method="prob", exog_infl=None):
 
@@ -116,8 +116,9 @@ def test_spec_tests(self):
 
         respoi = Poisson(self.endog, self.exog).fit(disp=0)
         dia = PoissonDiagnostic(respoi)
-        t_disp = dia.test_dispersion()[0]
-        assert_allclose(t_disp, res_dispersion, rtol=1e-8)
+        t_disp = dia.test_dispersion()
+        res_disp = np.column_stack(((t_disp.statistic, t_disp.pvalue)))
+        assert_allclose(res_disp, res_dispersion, rtol=1e-8)
 
         nobs = self.endog.shape[0]
         t_zi_jh = dia.test_poisson_zeroinflation(method="broek",
 
@@ -153,7 +153,7 @@ def test_diagnostic(self):
         dia = res1.get_diagnostic(y_max=21)
         res_chi2 = dia.test_chisquare_prob(bin_edges=np.arange(4))
         assert_equal(res_chi2.diff1.shape[1], 3)
-        assert_equal(dia.probs_predicted.shape[1], 21)
+        assert_equal(dia.probs_predicted.shape[1], 22)
 
         try:
             dia.plot_probs(upp_xlim=20)
 
@@ -13,6 +13,7 @@
 from statsmodels.genmod import families
 from statsmodels.discrete.discrete_model import Poisson
 import statsmodels.stats._diagnostic_other as diao
+import statsmodels.discrete._diagnostics_count as diac
 from statsmodels.base._parameter_inference import score_test
 
 
@@ -110,19 +111,19 @@ def setup_class(cls):
         cls.model_full = GLM(y, xx, family=families.Poisson())
         cls.model_drop = GLM(y, x, family=families.Poisson())
 
-
     def test_dispersion(self):
         res_drop = self.model_drop.fit()
-        res_test = diao.dispersion_poisson(res_drop)
-        assert_allclose(res_test[0], self.res_disptest, rtol=1e-6, atol=1e-14)
+        res_test = diac.test_poisson_dispersion(res_drop)
+        res_test_ = np.column_stack((res_test.statistic, res_test.pvalue))
+        assert_allclose(res_test_, self.res_disptest, rtol=1e-6, atol=1e-14)
         # constant only dispersion
         ex = np.ones((res_drop.model.endog.shape[0], 1))
         # ex = np.column_stack((np.ones(res_drop.model.endog.shape[0]),
         #                      res_drop.predict()))  # or **2
         # dispersion_poisson_generic might not be correct
         # or not clear what the alternative hypothesis is
         # choosing different `ex` implies different alternative hypotheses
-        res_test = diao.dispersion_poisson_generic(res_drop, ex)
+        res_test = diac._test_poisson_dispersion_generic(res_drop, ex)
         assert_allclose(res_test, self.res_disptest_g, rtol=1e-6, atol=1e-14)