Skip to content

Commit c8841e8

Browse files
authored
Merge pull request statsmodels#4760 from kshedden/riskratio
@bashtage CLN: Provide better name for pooled risk ratio
2 parents 2e6ee26 + 21f8216 commit c8841e8

File tree

1 file changed

+111
-28
lines changed

1 file changed

+111
-28
lines changed

statsmodels/stats/contingency_tables.py

+111-28
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@
3030
import numpy as np
3131
from scipy import stats
3232
import pandas as pd
33+
import warnings
3334
from statsmodels import iolib
34-
from statsmodels.tools.sm_exceptions import SingularMatrixWarning
35+
from statsmodels.tools import sm_exceptions
3536

3637

3738
def _make_df_square(table):
@@ -148,7 +149,8 @@ def __init__(self, table, shift_zeros=True):
148149
self.table = self.table + 0.5
149150

150151
def __str__(self):
151-
s = "A %dx%d contingency table with counts:\n" % tuple(self.table.shape)
152+
s = ("A %dx%d contingency table with counts:\n" %
153+
tuple(self.table.shape))
152154
s += np.array_str(self.table)
153155
return s
154156

@@ -293,7 +295,16 @@ def test_ordinal_association(self, row_scores=None, col_scores=None):
293295

294296
@cache_readonly
295297
def marginal_probabilities(self):
296-
# docstring for cached attributes in init above
298+
"""
299+
Estimate marginal probability distributions for the rows and columns.
300+
301+
Returns
302+
-------
303+
row : ndarray
304+
Marginal row probabilities
305+
col : ndarray
306+
Marginal column probabilities
307+
"""
297308

298309
n = self.table.sum()
299310
row = self.table.sum(1) / n
@@ -307,7 +318,13 @@ def marginal_probabilities(self):
307318

308319
@cache_readonly
309320
def independence_probabilities(self):
310-
# docstring for cached attributes in init above
321+
"""
322+
Returns fitted joint probabilities under independence.
323+
324+
The returned table is outer(row, column), where row and
325+
column are the estimated marginal distributions
326+
of the rows and columns.
327+
"""
311328

312329
row, col = self.marginal_probabilities
313330
itab = np.outer(row, col)
@@ -320,37 +337,60 @@ def independence_probabilities(self):
320337

321338
@cache_readonly
322339
def fittedvalues(self):
323-
# docstring for cached attributes in init above
340+
"""
341+
Returns fitted cell counts under independence.
342+
343+
The returned cell counts are estimates under a model
344+
where the rows and columns of the table are independent.
345+
"""
324346

325347
probs = self.independence_probabilities
326348
fit = self.table.sum() * probs
327349
return fit
328350

329351
@cache_readonly
330352
def resid_pearson(self):
331-
# docstring for cached attributes in init above
353+
"""
354+
Returns Pearson residuals.
355+
356+
The Pearson residuals are calculated under a model where
357+
the rows and columns of the table are independent.
358+
"""
332359

333360
fit = self.fittedvalues
334361
resids = (self.table - fit) / np.sqrt(fit)
335362
return resids
336363

337364
@cache_readonly
338365
def standardized_resids(self):
339-
# docstring for cached attributes in init above
366+
"""
367+
Returns standardized residuals under independence.
368+
"""
340369

341370
row, col = self.marginal_probabilities
342371
sresids = self.resid_pearson / np.sqrt(np.outer(1 - row, 1 - col))
343372
return sresids
344373

345374
@cache_readonly
346375
def chi2_contribs(self):
347-
# docstring for cached attributes in init above
376+
"""
377+
Returns the contributions to the chi^2 statistic for independence.
378+
379+
The returned table contains the contribution of each cell to the chi^2
380+
test statistic for the null hypothesis that the rows and columns
381+
are independent.
382+
"""
348383

349384
return self.resid_pearson**2
350385

351386
@cache_readonly
352387
def local_log_oddsratios(self):
353-
# docstring for cached attributes in init above
388+
"""
389+
Returns local log odds ratios.
390+
391+
The local log odds ratios are the log odds ratios
392+
calculated for contiguous 2x2 sub-tables.
393+
"""
354394

355395
ta = self.table.copy()
356396
a = ta[0:-1, 0:-1]
@@ -370,13 +410,25 @@ def local_log_oddsratios(self):
370410

371411
@cache_readonly
372412
def local_oddsratios(self):
373-
# docstring for cached attributes in init above
413+
"""
414+
Returns local odds ratios.
415+
416+
See documentation for local_log_oddsratios.
417+
"""
374418

375419
return np.exp(self.local_log_oddsratios)
376420

377421
@cache_readonly
378422
def cumulative_log_oddsratios(self):
379-
# docstring for cached attributes in init above
423+
"""
424+
Returns cumulative log odds ratios.
425+
426+
The cumulative log odds ratios for a contingency table
427+
with ordered rows and columns are calculated by collapsing
428+
all cells to the left/right and above/below a given point,
429+
to obtain a 2x2 table from which a log odds ratio can be
430+
calculated.
431+
"""
380432

381433
ta = self.table.cumsum(0).cumsum(1)
382434

@@ -398,7 +450,11 @@ def cumulative_log_oddsratios(self):
398450

399451
@cache_readonly
400452
def cumulative_oddsratios(self):
401-
# docstring for cached attributes in init above
453+
"""
454+
Returns the cumulative odds ratios for a contingency table.
455+
456+
See documentation for cumulative_log_oddsratio.
457+
"""
402458

403459
return np.exp(self.cumulative_log_oddsratios)
404460

@@ -563,9 +619,8 @@ def homogeneity(self, method="stuart_maxwell"):
563619
try:
564620
statistic = n_obs * np.dot(d, np.linalg.solve(vmat, d))
565621
except np.linalg.LinAlgError:
566-
import warnings
567622
warnings.warn("Unable to invert covariance matrix",
568-
SingularMatrixWarning)
623+
sm_exceptions.SingularMatrixWarning)
569624
b = _Bunch()
570625
b.statistic = np.nan
571626
b.pvalue = np.nan
@@ -686,21 +741,27 @@ def from_data(cls, data, shift_zeros=True):
686741

687742
@cache_readonly
688743
def log_oddsratio(self):
689-
# docstring for cached attributes in init above
744+
"""
745+
Returns the log odds ratio for a 2x2 table.
746+
"""
690747

691748
f = self.table.flatten()
692749
return np.dot(np.log(f), np.r_[1, -1, -1, 1])
693750

694751
@cache_readonly
695752
def oddsratio(self):
696-
# docstring for cached attributes in init above
753+
"""
754+
Returns the odds ratio for a 2x2 table.
755+
"""
697756

698757
return (self.table[0, 0] * self.table[1, 1] /
699758
(self.table[0, 1] * self.table[1, 0]))
700759

701760
@cache_readonly
702761
def log_oddsratio_se(self):
703-
# docstring for cached attributes in init above
762+
"""
763+
Returns the standard error for the log odds ratio.
764+
"""
704765

705766
return np.sqrt(np.sum(1 / self.table))
706767

@@ -769,20 +830,28 @@ def oddsratio_confint(self, alpha=0.05, method="normal"):
769830

770831
@cache_readonly
771832
def riskratio(self):
772-
# docstring for cached attributes in init above
833+
"""
834+
Returns the risk ratio for a 2x2 table.
835+
836+
The risk ratio is calcuoated with respec to the rows.
837+
"""
773838

774839
p = self.table[:, 0] / self.table.sum(1)
775840
return p[0] / p[1]
776841

777842
@cache_readonly
778843
def log_riskratio(self):
779-
# docstring for cached attributes in init above
844+
"""
845+
Returns the log od the risk ratio.
846+
"""
780847

781848
return np.log(self.riskratio)
782849

783850
@cache_readonly
784851
def log_riskratio_se(self):
785-
# docstring for cached attributes in init above
852+
"""
853+
Returns the standard error of the log of the risk ratio.
854+
"""
786855

787856
n = self.table.sum(1)
788857
p = self.table[:, 0] / n
@@ -915,17 +984,19 @@ class StratifiedTable(object):
915984
An estimate of the pooled log odds ratio. This is the
916985
Mantel-Haenszel estimate of an odds ratio that is common to
917986
all the tables.
918-
log_oddsratio_se : float
987+
logodds_pooled_se : float
919988
The estimated standard error of the pooled log odds ratio,
920989
following Robins, Breslow and Greenland (Biometrics
921990
42:311-323).
922991
oddsratio_pooled : float
923992
An estimate of the pooled odds ratio. This is the
924993
Mantel-Haenszel estimate of an odds ratio that is common to
925994
all tables.
926-
risk_pooled : float
995+
riskratio_pooled : float
927996
An estimate of the pooled risk ratio. This is an estimate of
928997
a risk ratio that is common to all the tables.
998+
risk_pooled : float
999+
Same as riskratio_pooled, deprecated.
9291000
9301001
Notes
9311002
-----
@@ -1055,30 +1126,42 @@ def test_null_odds(self, correction=False):
10551126

10561127
@cache_readonly
10571128
def oddsratio_pooled(self):
1058-
# doc for cached attributes in init above
1129+
"""
1130+
The pooled odds ratio.
10591131
1132+
The value is an estimate of a common odds ratio across all of the
1133+
stratified tables.
1134+
"""
10601135
odds_ratio = np.sum(self._ad / self._n) / np.sum(self._bc / self._n)
10611136
return odds_ratio
10621137

10631138
@cache_readonly
10641139
def logodds_pooled(self):
1065-
# doc for cached attributes in init above
1140+
"""
1141+
Returns the logarithm of the pooled odds ratio.
10661142
1143+
See oddsratio_pooled for more information.
1144+
"""
10671145
return np.log(self.oddsratio_pooled)
10681146

10691147
@cache_readonly
1070-
def risk_pooled(self):
1071-
# doc for cached attributes in init above
1148+
def riskratio_pooled(self):
10721149

10731150
acd = self.table[0, 0, :] * self._cpd
10741151
cab = self.table[1, 0, :] * self._apb
10751152

10761153
rr = np.sum(acd / self._n) / np.sum(cab / self._n)
10771154
return rr
10781155

1156+
@cache_readonly
1157+
def risk_pooled(self):
1158+
# Deprecated due to name being misleading
1159+
msg = "'risk_pooled' is deprecated, use 'riskratio_pooled' instead"
1160+
warnings.warn(msg, DeprecationWarning)
1161+
return self.riskratio_pooled
1162+
10791163
@cache_readonly
10801164
def logodds_pooled_se(self):
1081-
# doc for cached attributes in init above
10821165

10831166
adns = np.sum(self._ad / self._n)
10841167
bcns = np.sum(self._bc / self._n)
@@ -1233,7 +1316,7 @@ def fmt(x):
12331316
stubs = ["Pooled odds", "Pooled log odds", "Pooled risk ratio", ""]
12341317
data = [[fmt(x) for x in [self.oddsratio_pooled, co_lcb, co_ucb]],
12351318
[fmt(x) for x in [self.logodds_pooled, clo_lcb, clo_ucb]],
1236-
[fmt(x) for x in [self.risk_pooled, "", ""]],
1319+
[fmt(x) for x in [self.riskratio_pooled, "", ""]],
12371320
['', '', '']]
12381321
tab1 = iolib.SimpleTable(data, headers, stubs, data_aligns="r",
12391322
table_dec_above='')

0 commit comments

Comments
 (0)