Merge pull request statsmodels#4760 from kshedden/riskratio

kshedden · web-flow · commit c8841e83bcbc · 2018-09-16T10:44:20.000-04:00
@bashtage CLN: Provide better name for pooled risk ratio
diff --git a/statsmodels/stats/contingency_tables.py b/statsmodels/stats/contingency_tables.py
@@ -30,8 +30,9 @@
 import numpy as np
 from scipy import stats
 import pandas as pd
+import warnings
 from statsmodels import iolib
-from statsmodels.tools.sm_exceptions import SingularMatrixWarning
+from statsmodels.tools import sm_exceptions
 
 
 def _make_df_square(table):
@@ -148,7 +149,8 @@ def __init__(self, table, shift_zeros=True):
             self.table = self.table + 0.5
 
     def __str__(self):
-        s = "A %dx%d contingency table with counts:\n" % tuple(self.table.shape)
+        s = ("A %dx%d contingency table with counts:\n" %
+             tuple(self.table.shape))
         s += np.array_str(self.table)
         return s
 
@@ -293,7 +295,16 @@ def test_ordinal_association(self, row_scores=None, col_scores=None):
 
     @cache_readonly
     def marginal_probabilities(self):
-        # docstring for cached attributes in init above
+        """
+        Estimate marginal probability distributions for the rows and columns.
+
+        Returns
+        -------
+        row : ndarray
+            Marginal row probabilities
+        col : ndarray
+            Marginal column probabilities
+        """
 
         n = self.table.sum()
         row = self.table.sum(1) / n
@@ -307,7 +318,13 @@ def marginal_probabilities(self):
 
     @cache_readonly
     def independence_probabilities(self):
-        # docstring for cached attributes in init above
+        """
+        Returns fitted joint probabilities under independence.
+
+        The returned table is outer(row, column), where row and
+        column are the estimated marginal distributions
+        of the rows and columns.
+        """
 
         row, col = self.marginal_probabilities
         itab = np.outer(row, col)
@@ -320,37 +337,60 @@ def independence_probabilities(self):
 
     @cache_readonly
     def fittedvalues(self):
-        # docstring for cached attributes in init above
+        """
+        Returns fitted cell counts under independence.
+
+        The returned cell counts are estimates under a model
+        where the rows and columns of the table are independent.
+        """
 
         probs = self.independence_probabilities
         fit = self.table.sum() * probs
         return fit
 
     @cache_readonly
     def resid_pearson(self):
-        # docstring for cached attributes in init above
+        """
+        Returns Pearson residuals.
+
+        The Pearson residuals are calculated under a model where
+        the rows and columns of the table are independent.
+        """
 
         fit = self.fittedvalues
         resids = (self.table - fit) / np.sqrt(fit)
         return resids
 
     @cache_readonly
     def standardized_resids(self):
-        # docstring for cached attributes in init above
+        """
+        Returns standardized residuals under independence.
+        """
 
         row, col = self.marginal_probabilities
         sresids = self.resid_pearson / np.sqrt(np.outer(1 - row, 1 - col))
         return sresids
 
     @cache_readonly
     def chi2_contribs(self):
-        # docstring for cached attributes in init above
+        """
+        Returns the contributions to the chi^2 statistic for independence.
+
+        The returned table contains the contribution of each cell to the chi^2
+        test statistic for the null hypothesis that the rows and columns
+        are independent.
+        """
 
         return self.resid_pearson**2
 
     @cache_readonly
     def local_log_oddsratios(self):
-        # docstring for cached attributes in init above
+        """
+        Returns local log odds ratios.
+
+        The local log odds ratios are the log odds ratios
+        calculated for contiguous 2x2 sub-tables.
+        """
 
         ta = self.table.copy()
         a = ta[0:-1, 0:-1]
@@ -370,13 +410,25 @@ def local_log_oddsratios(self):
 
     @cache_readonly
     def local_oddsratios(self):
-        # docstring for cached attributes in init above
+        """
+        Returns local odds ratios.
+
+        See documentation for local_log_oddsratios.
+        """
 
         return np.exp(self.local_log_oddsratios)
 
     @cache_readonly
     def cumulative_log_oddsratios(self):
-        # docstring for cached attributes in init above
+        """
+        Returns cumulative log odds ratios.
+
+        The cumulative log odds ratios for a contingency table
+        with ordered rows and columns are calculated by collapsing
+        all cells to the left/right and above/below a given point,
+        to obtain a 2x2 table from which a log odds ratio can be
+        calculated.
+        """
 
         ta = self.table.cumsum(0).cumsum(1)
 
@@ -398,7 +450,11 @@ def cumulative_log_oddsratios(self):
 
     @cache_readonly
     def cumulative_oddsratios(self):
-        # docstring for cached attributes in init above
+        """
+        Returns the cumulative odds ratios for a contingency table.
+
+        See documentation for cumulative_log_oddsratio.
+        """
 
         return np.exp(self.cumulative_log_oddsratios)
 
@@ -563,9 +619,8 @@ def homogeneity(self, method="stuart_maxwell"):
         try:
             statistic = n_obs * np.dot(d, np.linalg.solve(vmat, d))
         except np.linalg.LinAlgError:
-            import warnings
             warnings.warn("Unable to invert covariance matrix",
-                          SingularMatrixWarning)
+                          sm_exceptions.SingularMatrixWarning)
             b = _Bunch()
             b.statistic = np.nan
             b.pvalue = np.nan
@@ -686,21 +741,27 @@ def from_data(cls, data, shift_zeros=True):
 
     @cache_readonly
     def log_oddsratio(self):
-        # docstring for cached attributes in init above
+        """
+        Returns the log odds ratio for a 2x2 table.
+        """
 
         f = self.table.flatten()
         return np.dot(np.log(f), np.r_[1, -1, -1, 1])
 
     @cache_readonly
     def oddsratio(self):
-        # docstring for cached attributes in init above
+        """
+        Returns the odds ratio for a 2x2 table.
+        """
 
         return (self.table[0, 0] * self.table[1, 1] /
                 (self.table[0, 1] * self.table[1, 0]))
 
     @cache_readonly
     def log_oddsratio_se(self):
-        # docstring for cached attributes in init above
+        """
+        Returns the standard error for the log odds ratio.
+        """
 
         return np.sqrt(np.sum(1 / self.table))
 
@@ -769,20 +830,28 @@ def oddsratio_confint(self, alpha=0.05, method="normal"):
 
     @cache_readonly
     def riskratio(self):
-        # docstring for cached attributes in init above
+        """
+        Returns the risk ratio for a 2x2 table.
+
+        The risk ratio is calcuoated with respec to the rows.
+        """
 
         p = self.table[:, 0] / self.table.sum(1)
         return p[0] / p[1]
 
     @cache_readonly
     def log_riskratio(self):
-        # docstring for cached attributes in init above
+        """
+        Returns the log od the risk ratio.
+        """
 
         return np.log(self.riskratio)
 
     @cache_readonly
     def log_riskratio_se(self):
-        # docstring for cached attributes in init above
+        """
+        Returns the standard error of the log of the risk ratio.
+        """
 
         n = self.table.sum(1)
         p = self.table[:, 0] / n
@@ -915,17 +984,19 @@ class StratifiedTable(object):
         An estimate of the pooled log odds ratio.  This is the
         Mantel-Haenszel estimate of an odds ratio that is common to
         all the tables.
-    log_oddsratio_se : float
+    logodds_pooled_se : float
         The estimated standard error of the pooled log odds ratio,
         following Robins, Breslow and Greenland (Biometrics
         42:311-323).
     oddsratio_pooled : float
         An estimate of the pooled odds ratio.  This is the
         Mantel-Haenszel estimate of an odds ratio that is common to
         all tables.
-    risk_pooled : float
+    riskratio_pooled : float
         An estimate of the pooled risk ratio.  This is an estimate of
         a risk ratio that is common to all the tables.
+    risk_pooled : float
+        Same as riskratio_pooled, deprecated.
 
     Notes
     -----
@@ -1055,30 +1126,42 @@ def test_null_odds(self, correction=False):
 
     @cache_readonly
     def oddsratio_pooled(self):
-        # doc for cached attributes in init above
+        """
+        The pooled odds ratio.
 
+        The value is an estimate of a common odds ratio across all of the
+        stratified tables.
+        """
         odds_ratio = np.sum(self._ad / self._n) / np.sum(self._bc / self._n)
         return odds_ratio
 
     @cache_readonly
     def logodds_pooled(self):
-        # doc for cached attributes in init above
+        """
+        Returns the logarithm of the pooled odds ratio.
 
+        See oddsratio_pooled for more information.
+        """
         return np.log(self.oddsratio_pooled)
 
     @cache_readonly
-    def risk_pooled(self):
-        # doc for cached attributes in init above
+    def riskratio_pooled(self):
 
         acd = self.table[0, 0, :] * self._cpd
         cab = self.table[1, 0, :] * self._apb
 
         rr = np.sum(acd / self._n) / np.sum(cab / self._n)
         return rr
 
+    @cache_readonly
+    def risk_pooled(self):
+        # Deprecated due to name being misleading
+        msg = "'risk_pooled' is deprecated, use 'riskratio_pooled' instead"
+        warnings.warn(msg, DeprecationWarning)
+        return self.riskratio_pooled
+
     @cache_readonly
     def logodds_pooled_se(self):
-        # doc for cached attributes in init above
 
         adns = np.sum(self._ad / self._n)
         bcns = np.sum(self._bc / self._n)
@@ -1233,7 +1316,7 @@ def fmt(x):
         stubs = ["Pooled odds", "Pooled log odds", "Pooled risk ratio", ""]
         data = [[fmt(x) for x in [self.oddsratio_pooled, co_lcb, co_ucb]],
                 [fmt(x) for x in [self.logodds_pooled, clo_lcb, clo_ucb]],
-                [fmt(x) for x in [self.risk_pooled, "", ""]],
+                [fmt(x) for x in [self.riskratio_pooled, "", ""]],
                 ['', '', '']]
         tab1 = iolib.SimpleTable(data, headers, stubs, data_aligns="r",
                                  table_dec_above='')