|
8 | 8 | import numpy as np
|
9 | 9 | from scipy import stats
|
10 | 10 |
|
| 11 | +import pandas as pd |
| 12 | + |
11 | 13 | from statsmodels.stats.base import HolderTuple
|
12 | 14 | from statsmodels.discrete.discrete_model import Poisson
|
13 | 15 | from statsmodels.regression.linear_model import OLS
|
@@ -227,6 +229,198 @@ def test_chisquare_prob(results, probs, bin_edges=None, method=None):
|
227 | 229 | return res
|
228 | 230 |
|
229 | 231 |
|
| 232 | +class DispersionResults(HolderTuple): |
| 233 | + |
| 234 | + def summary_frame(self): |
| 235 | + frame = pd.DataFrame({ |
| 236 | + "statistic": self.statistic, |
| 237 | + "pvalue": self.pvalue, |
| 238 | + "method": self.method, |
| 239 | + "alternative": self.alternative |
| 240 | + }) |
| 241 | + |
| 242 | + return frame |
| 243 | + |
| 244 | + |
| 245 | +def test_poisson_dispersion(results, method="all", _old=False): |
| 246 | + """Score/LM type tests for Poisson variance assumptions |
| 247 | +
|
| 248 | + Null Hypothesis is |
| 249 | +
|
| 250 | + H0: var(y) = E(y) and assuming E(y) is correctly specified |
| 251 | + H1: var(y) ~= E(y) |
| 252 | +
|
| 253 | + The tests are based on the constrained model, i.e. the Poisson model. |
| 254 | + The tests differ in their assumed alternatives, and in their maintained |
| 255 | + assumptions. |
| 256 | +
|
| 257 | + Parameters |
| 258 | + ---------- |
| 259 | + results : Poisson results instance |
| 260 | + This can be a results instance for either a discrete Poisson or a GLM |
| 261 | + with family Poisson. |
| 262 | + method : str |
| 263 | + Not used yet. Currently results for all methods are returned. |
| 264 | + _old : bool |
| 265 | + Temporary keyword for backwards compatibility, will be removed |
| 266 | + in future version of statsmodels. |
| 267 | +
|
| 268 | + Returns |
| 269 | + ------- |
| 270 | + res : instance |
| 271 | + The instance of DispersionResults has the hypothesis test results, |
| 272 | + statistic, pvalue, method, alternative, as main attributes and a |
| 273 | + summary_frame method that returns the results as pandas DataFrame. |
| 274 | +
|
| 275 | + """ |
| 276 | + |
| 277 | + if method not in ["all"]: |
| 278 | + raise ValueError(f'unknown method "{method}"') |
| 279 | + |
| 280 | + if hasattr(results, '_results'): |
| 281 | + results = results._results |
| 282 | + |
| 283 | + endog = results.model.endog |
| 284 | + nobs = endog.shape[0] # TODO: use attribute, may need to be added |
| 285 | + fitted = results.predict() |
| 286 | + # fitted = results.fittedvalues # discrete has linear prediction |
| 287 | + # this assumes Poisson |
| 288 | + resid2 = results.resid_response**2 |
| 289 | + var_resid_endog = (resid2 - endog) |
| 290 | + var_resid_fitted = (resid2 - fitted) |
| 291 | + std1 = np.sqrt(2 * (fitted**2).sum()) |
| 292 | + |
| 293 | + var_resid_endog_sum = var_resid_endog.sum() |
| 294 | + dean_a = var_resid_fitted.sum() / std1 |
| 295 | + dean_b = var_resid_endog_sum / std1 |
| 296 | + dean_c = (var_resid_endog / fitted).sum() / np.sqrt(2 * nobs) |
| 297 | + |
| 298 | + pval_dean_a = 2 * stats.norm.sf(np.abs(dean_a)) |
| 299 | + pval_dean_b = 2 * stats.norm.sf(np.abs(dean_b)) |
| 300 | + pval_dean_c = 2 * stats.norm.sf(np.abs(dean_c)) |
| 301 | + |
| 302 | + results_all = [[dean_a, pval_dean_a], |
| 303 | + [dean_b, pval_dean_b], |
| 304 | + [dean_c, pval_dean_c]] |
| 305 | + description = [['Dean A', 'mu (1 + a mu)'], |
| 306 | + ['Dean B', 'mu (1 + a mu)'], |
| 307 | + ['Dean C', 'mu (1 + a)']] |
| 308 | + |
| 309 | + # Cameron Trived auxiliary regression page 78 count book 1989 |
| 310 | + endog_v = var_resid_endog / fitted |
| 311 | + res_ols_nb2 = OLS(endog_v, fitted).fit(use_t=False) |
| 312 | + stat_ols_nb2 = res_ols_nb2.tvalues[0] |
| 313 | + pval_ols_nb2 = res_ols_nb2.pvalues[0] |
| 314 | + results_all.append([stat_ols_nb2, pval_ols_nb2]) |
| 315 | + description.append(['CT nb2', 'mu (1 + a mu)']) |
| 316 | + |
| 317 | + res_ols_nb1 = OLS(endog_v, fitted).fit(use_t=False) |
| 318 | + stat_ols_nb1 = res_ols_nb1.tvalues[0] |
| 319 | + pval_ols_nb1 = res_ols_nb1.pvalues[0] |
| 320 | + results_all.append([stat_ols_nb1, pval_ols_nb1]) |
| 321 | + description.append(['CT nb1', 'mu (1 + a)']) |
| 322 | + |
| 323 | + endog_v = var_resid_endog / fitted |
| 324 | + res_ols_nb2 = OLS(endog_v, fitted).fit(cov_type='HC3', use_t=False) |
| 325 | + stat_ols_hc1_nb2 = res_ols_nb2.tvalues[0] |
| 326 | + pval_ols_hc1_nb2 = res_ols_nb2.pvalues[0] |
| 327 | + results_all.append([stat_ols_hc1_nb2, pval_ols_hc1_nb2]) |
| 328 | + description.append(['CT nb2 HC3', 'mu (1 + a mu)']) |
| 329 | + |
| 330 | + res_ols_nb1 = OLS(endog_v, np.ones(len(endog_v))).fit(cov_type='HC3', |
| 331 | + use_t=False) |
| 332 | + stat_ols_hc1_nb1 = res_ols_nb1.tvalues[0] |
| 333 | + pval_ols_hc1_nb1 = res_ols_nb1.pvalues[0] |
| 334 | + results_all.append([stat_ols_hc1_nb1, pval_ols_hc1_nb1]) |
| 335 | + description.append(['CT nb1 HC3', 'mu (1 + a)']) |
| 336 | + |
| 337 | + results_all = np.array(results_all) |
| 338 | + if _old: |
| 339 | + # for backwards compatibility in 0.14, remove in later versions |
| 340 | + return results_all, description |
| 341 | + else: |
| 342 | + res = DispersionResults( |
| 343 | + statistic=results_all[:, 0], |
| 344 | + pvalue=results_all[:, 1], |
| 345 | + method=[i[0] for i in description], |
| 346 | + alternative=[i[1] for i in description], |
| 347 | + name="Poisson Dispersion Test" |
| 348 | + ) |
| 349 | + return res |
| 350 | + |
| 351 | + |
| 352 | +def _test_poisson_dispersion_generic( |
| 353 | + results, |
| 354 | + exog_new_test, |
| 355 | + exog_new_control=None, |
| 356 | + include_score=False, |
| 357 | + use_endog=True, |
| 358 | + cov_type='HC3', |
| 359 | + cov_kwds=None, |
| 360 | + use_t=False |
| 361 | + ): |
| 362 | + """A variable addition test for the variance function |
| 363 | +
|
| 364 | + This uses an artificial regression to calculate a variant of an LM or |
| 365 | + generalized score test for the specification of the variance assumption |
| 366 | + in a Poisson model. The performed test is a Wald test on the coefficients |
| 367 | + of the `exog_new_test`. |
| 368 | +
|
| 369 | + Warning: insufficiently tested, especially for options |
| 370 | + """ |
| 371 | + |
| 372 | + if hasattr(results, '_results'): |
| 373 | + results = results._results |
| 374 | + |
| 375 | + endog = results.model.endog |
| 376 | + nobs = endog.shape[0] # TODO: use attribute, may need to be added |
| 377 | + # fitted = results.fittedvalues # generic has linpred as fittedvalues |
| 378 | + fitted = results.predict() |
| 379 | + resid2 = results.resid_response**2 |
| 380 | + # the following assumes Poisson |
| 381 | + if use_endog: |
| 382 | + var_resid = (resid2 - endog) |
| 383 | + else: |
| 384 | + var_resid = (resid2 - fitted) |
| 385 | + |
| 386 | + endog_v = var_resid / fitted |
| 387 | + |
| 388 | + k_constraints = exog_new_test.shape[1] |
| 389 | + ex_list = [exog_new_test] |
| 390 | + if include_score: |
| 391 | + score_obs = results.model.score_obs(results.params) |
| 392 | + ex_list.append(score_obs) |
| 393 | + |
| 394 | + if exog_new_control is not None: |
| 395 | + ex_list.append(score_obs) |
| 396 | + |
| 397 | + if len(ex_list) > 1: |
| 398 | + ex = np.column_stack(ex_list) |
| 399 | + use_wald = True |
| 400 | + else: |
| 401 | + ex = ex_list[0] # no control variables in exog |
| 402 | + use_wald = False |
| 403 | + |
| 404 | + res_ols = OLS(endog_v, ex).fit(cov_type=cov_type, cov_kwds=cov_kwds, |
| 405 | + use_t=use_t) |
| 406 | + |
| 407 | + if use_wald: |
| 408 | + # we have controls and need to test coefficients |
| 409 | + k_vars = ex.shape[1] |
| 410 | + constraints = np.eye(k_constraints, k_vars) |
| 411 | + ht = res_ols.wald_test(constraints) |
| 412 | + stat_ols = ht.statistic |
| 413 | + pval_ols = ht.pvalue |
| 414 | + else: |
| 415 | + # we do not have controls and can use overall fit |
| 416 | + nobs = endog_v.shape[0] |
| 417 | + rsquared_noncentered = 1 - res_ols.ssr/res_ols.uncentered_tss |
| 418 | + stat_ols = nobs * rsquared_noncentered |
| 419 | + pval_ols = stats.chi2.sf(stat_ols, k_constraints) |
| 420 | + |
| 421 | + return stat_ols, pval_ols |
| 422 | + |
| 423 | + |
230 | 424 | def test_poisson_zeroinflation_jh(results_poisson, exog_infl=None):
|
231 | 425 | """score test for zero inflation or deflation in Poisson
|
232 | 426 |
|
|
0 commit comments