30
30
import numpy as np
31
31
from scipy import stats
32
32
import pandas as pd
33
+ import warnings
33
34
from statsmodels import iolib
34
- from statsmodels .tools . sm_exceptions import SingularMatrixWarning
35
+ from statsmodels .tools import sm_exceptions
35
36
36
37
37
38
def _make_df_square (table ):
@@ -148,7 +149,8 @@ def __init__(self, table, shift_zeros=True):
148
149
self .table = self .table + 0.5
149
150
150
151
def __str__ (self ):
151
- s = "A %dx%d contingency table with counts:\n " % tuple (self .table .shape )
152
+ s = ("A %dx%d contingency table with counts:\n " %
153
+ tuple (self .table .shape ))
152
154
s += np .array_str (self .table )
153
155
return s
154
156
@@ -293,7 +295,16 @@ def test_ordinal_association(self, row_scores=None, col_scores=None):
293
295
294
296
@cache_readonly
295
297
def marginal_probabilities (self ):
296
- # docstring for cached attributes in init above
298
+ """
299
+ Estimate marginal probability distributions for the rows and columns.
300
+
301
+ Returns
302
+ -------
303
+ row : ndarray
304
+ Marginal row probabilities
305
+ col : ndarray
306
+ Marginal column probabilities
307
+ """
297
308
298
309
n = self .table .sum ()
299
310
row = self .table .sum (1 ) / n
@@ -307,7 +318,13 @@ def marginal_probabilities(self):
307
318
308
319
@cache_readonly
309
320
def independence_probabilities (self ):
310
- # docstring for cached attributes in init above
321
+ """
322
+ Returns fitted joint probabilities under independence.
323
+
324
+ The returned table is outer(row, column), where row and
325
+ column are the estimated marginal distributions
326
+ of the rows and columns.
327
+ """
311
328
312
329
row , col = self .marginal_probabilities
313
330
itab = np .outer (row , col )
@@ -320,37 +337,60 @@ def independence_probabilities(self):
320
337
321
338
@cache_readonly
322
339
def fittedvalues (self ):
323
- # docstring for cached attributes in init above
340
+ """
341
+ Returns fitted cell counts under independence.
342
+
343
+ The returned cell counts are estimates under a model
344
+ where the rows and columns of the table are independent.
345
+ """
324
346
325
347
probs = self .independence_probabilities
326
348
fit = self .table .sum () * probs
327
349
return fit
328
350
329
351
@cache_readonly
330
352
def resid_pearson (self ):
331
- # docstring for cached attributes in init above
353
+ """
354
+ Returns Pearson residuals.
355
+
356
+ The Pearson residuals are calculated under a model where
357
+ the rows and columns of the table are independent.
358
+ """
332
359
333
360
fit = self .fittedvalues
334
361
resids = (self .table - fit ) / np .sqrt (fit )
335
362
return resids
336
363
337
364
@cache_readonly
338
365
def standardized_resids (self ):
339
- # docstring for cached attributes in init above
366
+ """
367
+ Returns standardized residuals under independence.
368
+ """
340
369
341
370
row , col = self .marginal_probabilities
342
371
sresids = self .resid_pearson / np .sqrt (np .outer (1 - row , 1 - col ))
343
372
return sresids
344
373
345
374
@cache_readonly
346
375
def chi2_contribs (self ):
347
- # docstring for cached attributes in init above
376
+ """
377
+ Returns the contributions to the chi^2 statistic for independence.
378
+
379
+ The returned table contains the contribution of each cell to the chi^2
380
+ test statistic for the null hypothesis that the rows and columns
381
+ are independent.
382
+ """
348
383
349
384
return self .resid_pearson ** 2
350
385
351
386
@cache_readonly
352
387
def local_log_oddsratios (self ):
353
- # docstring for cached attributes in init above
388
+ """
389
+ Returns local log odds ratios.
390
+
391
+ The local log odds ratios are the log odds ratios
392
+ calculated for contiguous 2x2 sub-tables.
393
+ """
354
394
355
395
ta = self .table .copy ()
356
396
a = ta [0 :- 1 , 0 :- 1 ]
@@ -370,13 +410,25 @@ def local_log_oddsratios(self):
370
410
371
411
@cache_readonly
372
412
def local_oddsratios (self ):
373
- # docstring for cached attributes in init above
413
+ """
414
+ Returns local odds ratios.
415
+
416
+ See documentation for local_log_oddsratios.
417
+ """
374
418
375
419
return np .exp (self .local_log_oddsratios )
376
420
377
421
@cache_readonly
378
422
def cumulative_log_oddsratios (self ):
379
- # docstring for cached attributes in init above
423
+ """
424
+ Returns cumulative log odds ratios.
425
+
426
+ The cumulative log odds ratios for a contingency table
427
+ with ordered rows and columns are calculated by collapsing
428
+ all cells to the left/right and above/below a given point,
429
+ to obtain a 2x2 table from which a log odds ratio can be
430
+ calculated.
431
+ """
380
432
381
433
ta = self .table .cumsum (0 ).cumsum (1 )
382
434
@@ -398,7 +450,11 @@ def cumulative_log_oddsratios(self):
398
450
399
451
@cache_readonly
400
452
def cumulative_oddsratios (self ):
401
- # docstring for cached attributes in init above
453
+ """
454
+ Returns the cumulative odds ratios for a contingency table.
455
+
456
+ See documentation for cumulative_log_oddsratio.
457
+ """
402
458
403
459
return np .exp (self .cumulative_log_oddsratios )
404
460
@@ -563,9 +619,8 @@ def homogeneity(self, method="stuart_maxwell"):
563
619
try :
564
620
statistic = n_obs * np .dot (d , np .linalg .solve (vmat , d ))
565
621
except np .linalg .LinAlgError :
566
- import warnings
567
622
warnings .warn ("Unable to invert covariance matrix" ,
568
- SingularMatrixWarning )
623
+ sm_exceptions . SingularMatrixWarning )
569
624
b = _Bunch ()
570
625
b .statistic = np .nan
571
626
b .pvalue = np .nan
@@ -686,21 +741,27 @@ def from_data(cls, data, shift_zeros=True):
686
741
687
742
@cache_readonly
688
743
def log_oddsratio (self ):
689
- # docstring for cached attributes in init above
744
+ """
745
+ Returns the log odds ratio for a 2x2 table.
746
+ """
690
747
691
748
f = self .table .flatten ()
692
749
return np .dot (np .log (f ), np .r_ [1 , - 1 , - 1 , 1 ])
693
750
694
751
@cache_readonly
695
752
def oddsratio (self ):
696
- # docstring for cached attributes in init above
753
+ """
754
+ Returns the odds ratio for a 2x2 table.
755
+ """
697
756
698
757
return (self .table [0 , 0 ] * self .table [1 , 1 ] /
699
758
(self .table [0 , 1 ] * self .table [1 , 0 ]))
700
759
701
760
@cache_readonly
702
761
def log_oddsratio_se (self ):
703
- # docstring for cached attributes in init above
762
+ """
763
+ Returns the standard error for the log odds ratio.
764
+ """
704
765
705
766
return np .sqrt (np .sum (1 / self .table ))
706
767
@@ -769,20 +830,28 @@ def oddsratio_confint(self, alpha=0.05, method="normal"):
769
830
770
831
@cache_readonly
771
832
def riskratio (self ):
772
- # docstring for cached attributes in init above
833
+ """
834
+ Returns the risk ratio for a 2x2 table.
835
+
836
+ The risk ratio is calcuoated with respec to the rows.
837
+ """
773
838
774
839
p = self .table [:, 0 ] / self .table .sum (1 )
775
840
return p [0 ] / p [1 ]
776
841
777
842
@cache_readonly
778
843
def log_riskratio (self ):
779
- # docstring for cached attributes in init above
844
+ """
845
+ Returns the log od the risk ratio.
846
+ """
780
847
781
848
return np .log (self .riskratio )
782
849
783
850
@cache_readonly
784
851
def log_riskratio_se (self ):
785
- # docstring for cached attributes in init above
852
+ """
853
+ Returns the standard error of the log of the risk ratio.
854
+ """
786
855
787
856
n = self .table .sum (1 )
788
857
p = self .table [:, 0 ] / n
@@ -915,17 +984,19 @@ class StratifiedTable(object):
915
984
An estimate of the pooled log odds ratio. This is the
916
985
Mantel-Haenszel estimate of an odds ratio that is common to
917
986
all the tables.
918
- log_oddsratio_se : float
987
+ logodds_pooled_se : float
919
988
The estimated standard error of the pooled log odds ratio,
920
989
following Robins, Breslow and Greenland (Biometrics
921
990
42:311-323).
922
991
oddsratio_pooled : float
923
992
An estimate of the pooled odds ratio. This is the
924
993
Mantel-Haenszel estimate of an odds ratio that is common to
925
994
all tables.
926
- risk_pooled : float
995
+ riskratio_pooled : float
927
996
An estimate of the pooled risk ratio. This is an estimate of
928
997
a risk ratio that is common to all the tables.
998
+ risk_pooled : float
999
+ Same as riskratio_pooled, deprecated.
929
1000
930
1001
Notes
931
1002
-----
@@ -1055,30 +1126,42 @@ def test_null_odds(self, correction=False):
1055
1126
1056
1127
@cache_readonly
1057
1128
def oddsratio_pooled (self ):
1058
- # doc for cached attributes in init above
1129
+ """
1130
+ The pooled odds ratio.
1059
1131
1132
+ The value is an estimate of a common odds ratio across all of the
1133
+ stratified tables.
1134
+ """
1060
1135
odds_ratio = np .sum (self ._ad / self ._n ) / np .sum (self ._bc / self ._n )
1061
1136
return odds_ratio
1062
1137
1063
1138
@cache_readonly
1064
1139
def logodds_pooled (self ):
1065
- # doc for cached attributes in init above
1140
+ """
1141
+ Returns the logarithm of the pooled odds ratio.
1066
1142
1143
+ See oddsratio_pooled for more information.
1144
+ """
1067
1145
return np .log (self .oddsratio_pooled )
1068
1146
1069
1147
@cache_readonly
1070
- def risk_pooled (self ):
1071
- # doc for cached attributes in init above
1148
+ def riskratio_pooled (self ):
1072
1149
1073
1150
acd = self .table [0 , 0 , :] * self ._cpd
1074
1151
cab = self .table [1 , 0 , :] * self ._apb
1075
1152
1076
1153
rr = np .sum (acd / self ._n ) / np .sum (cab / self ._n )
1077
1154
return rr
1078
1155
1156
+ @cache_readonly
1157
+ def risk_pooled (self ):
1158
+ # Deprecated due to name being misleading
1159
+ msg = "'risk_pooled' is deprecated, use 'riskratio_pooled' instead"
1160
+ warnings .warn (msg , DeprecationWarning )
1161
+ return self .riskratio_pooled
1162
+
1079
1163
@cache_readonly
1080
1164
def logodds_pooled_se (self ):
1081
- # doc for cached attributes in init above
1082
1165
1083
1166
adns = np .sum (self ._ad / self ._n )
1084
1167
bcns = np .sum (self ._bc / self ._n )
@@ -1233,7 +1316,7 @@ def fmt(x):
1233
1316
stubs = ["Pooled odds" , "Pooled log odds" , "Pooled risk ratio" , "" ]
1234
1317
data = [[fmt (x ) for x in [self .oddsratio_pooled , co_lcb , co_ucb ]],
1235
1318
[fmt (x ) for x in [self .logodds_pooled , clo_lcb , clo_ucb ]],
1236
- [fmt (x ) for x in [self .risk_pooled , "" , "" ]],
1319
+ [fmt (x ) for x in [self .riskratio_pooled , "" , "" ]],
1237
1320
['' , '' , '' ]]
1238
1321
tab1 = iolib .SimpleTable (data , headers , stubs , data_aligns = "r" ,
1239
1322
table_dec_above = '' )
0 commit comments