-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkruskal_wallis.py
135 lines (98 loc) · 3.98 KB
/
kruskal_wallis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 4 10:45:00 2022
@author: l1021338
"""
import numpy as np
import warnings
import scipy.stats as stats
from itertools import combinations
from statsmodels.stats.multitest import multipletests
def kw_dunn(groups, to_compare=None, alpha=0.05, method='bonf'):
"""
Kruskal-Wallis 1-way ANOVA with Dunn's multiple comparison test
Arguments:
---------------
groups: sequence
arrays corresponding to k mutually independent samples from
continuous populations
to_compare: sequence
tuples specifying the indices of pairs of groups to compare, e.g.
[(0, 1), (0, 2)] would compare group 0 with 1 & 2. by default, all
possible pairwise comparisons between groups are performed.
alpha: float
family-wise error rate used for correcting for multiple comparisons
(see statsmodels.stats.multitest.multipletests for details)
method: string
method used to adjust p-values to account for multiple corrections (see
statsmodels.stats.multitest.multipletests for options)
Returns:
---------------
H: float
Kruskal-Wallis H-statistic
p_omnibus: float
p-value corresponding to the global null hypothesis that the medians of
the groups are all equal
Z_pairs: float array
Z-scores computed for the absolute difference in mean ranks for each
pairwise comparison
p_corrected: float array
corrected p-values for each pairwise comparison, corresponding to the
null hypothesis that the pair of groups has equal medians. note that
these are only meaningful if the global null hypothesis is rejected.
reject: bool array
True for pairs where the null hypothesis can be rejected for the given
alpha
Reference:
---------------
Gibbons, J. D., & Chakraborti, S. (2011). Nonparametric Statistical
Inference (5th ed., pp. 353-357). Boca Raton, FL: Chapman & Hall.
"""
# omnibus test (K-W ANOVA)
# -------------------------------------------------------------------------
groups = [np.array(gg) for gg in groups]
k = len(groups)
n = np.array([len(gg) for gg in groups])
if np.any(n < 5):
warnings.warn("Sample sizes < 5 are not recommended (K-W test assumes "
"a chi square distribution)")
allgroups = np.concatenate(groups)
N = len(allgroups)
ranked = stats.rankdata(allgroups)
# correction factor for ties
T = stats.tiecorrect(ranked)
if T == 0:
raise ValueError('All numbers are identical in kruskal')
# sum of ranks for each group
j = np.insert(np.cumsum(n), 0, 0)
R = np.empty(k, dtype=np.float)
for ii in range(k):
R[ii] = ranked[j[ii]:j[ii + 1]].sum()
# the Kruskal-Wallis H-statistic
H = (12. / (N * (N + 1.))) * ((R ** 2.) / n).sum() - 3 * (N + 1)
# apply correction factor for ties
H /= T
df_omnibus = k - 1
p_omnibus = stats.distributions.chi2.sf(H, df_omnibus)
# multiple comparisons
# -------------------------------------------------------------------------
# by default we compare every possible pair of groups
if to_compare is None:
to_compare = tuple(combinations(range(k), 2))
ncomp = len(to_compare)
Z_pairs = np.empty(ncomp, dtype=np.float)
p_uncorrected = np.empty(ncomp, dtype=np.float)
Rmean = R / n
for pp, (ii, jj) in enumerate(to_compare):
# standardized score
Zij = (np.abs(Rmean[ii] - Rmean[jj]) /
np.sqrt((1. / 12.) * N * (N + 1) * (1. / n[ii] + 1. / n[jj])))
Z_pairs[pp] = Zij
# corresponding p-values obtained from upper quantiles of the standard
# normal distribution
p_uncorrected = stats.norm.sf(Z_pairs) * 2.
# correction for multiple comparisons
reject, p_corrected, alphac_sidak, alphac_bonf = multipletests(
p_uncorrected, method=method)
return H, p_omnibus, Z_pairs, p_corrected, reject