forked from zhanxw/rvtests
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsnp_hwe.c
112 lines (91 loc) · 3.34 KB
/
snp_hwe.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
// obtained from http://www.sph.umich.edu/csg/abecasis/Exact/snp_hwe.c
// 2012-11-29 Xiaowei
/*
// This code implements an exact SNP test of Hardy-Weinberg Equilibrium as described in
// Wigginton, JE, Cutler, DJ, and Abecasis, GR (2005) A Note on Exact Tests of
// Hardy-Weinberg Equilibrium. American Journal of Human Genetics. 76: 000 - 000
//
// Written by Jan Wigginton
*/
#include <vector>
/**
* NOTE (by zhanxw)
* !! Make sure not all parameters equal to 0, or the program will crash.
*/
inline double SNPHWE(int obs_hets, int obs_hom1, int obs_hom2)
{
if (obs_hom1 < 0 || obs_hom2 < 0 || obs_hets < 0)
{
printf("FATAL ERROR - SNP-HWE: Current genotype configuration (%d %d %d ) includes a"
" negative count", obs_hets, obs_hom1, obs_hom2);
exit(EXIT_FAILURE);
}
int obs_homc = obs_hom1 < obs_hom2 ? obs_hom2 : obs_hom1;
int obs_homr = obs_hom1 < obs_hom2 ? obs_hom1 : obs_hom2;
int rare_copies = 2 * obs_homr + obs_hets;
int genotypes = obs_hets + obs_homc + obs_homr;
// double * het_probs = (double *) malloc((size_t) (rare_copies + 1) * sizeof(double));
// if (het_probs == NULL)
// {
// printf("FATAL ERROR - SNP-HWE: Unable to allocate array for heterozygote probabilities" );
// exit(EXIT_FAILURE);
// }
static std::vector<double> het_probs;
het_probs.resize((size_t) (rare_copies + 1));
int i;
for (i = 0; i <= rare_copies; i++)
het_probs[i] = 0.0;
/* start at midpoint */
int mid = 1.0 * rare_copies * (2 * genotypes - rare_copies) / (2 * genotypes);
/* check to ensure that midpoint and rare alleles have same parity */
if ((rare_copies & 1) ^ (mid & 1))
mid++;
int curr_hets = mid;
int curr_homr = (rare_copies - mid) / 2;
int curr_homc = genotypes - curr_hets - curr_homr;
het_probs[mid] = 1.0;
double sum = het_probs[mid];
for (curr_hets = mid; curr_hets > 1; curr_hets -= 2)
{
het_probs[curr_hets - 2] = het_probs[curr_hets] * curr_hets * (curr_hets - 1.0)
/ (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0));
sum += het_probs[curr_hets - 2];
/* 2 fewer heterozygotes for next iteration -> add one rare, one common homozygote */
curr_homr++;
curr_homc++;
}
curr_hets = mid;
curr_homr = (rare_copies - mid) / 2;
curr_homc = genotypes - curr_hets - curr_homr;
for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2)
{
het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 * curr_homr * curr_homc
/((curr_hets + 2.0) * (curr_hets + 1.0));
sum += het_probs[curr_hets + 2];
/* add 2 heterozygotes for next iteration -> subtract one rare, one common homozygote */
curr_homr--;
curr_homc--;
}
for (i = 0; i <= rare_copies; i++)
het_probs[i] /= sum;
/* alternate p-value calculation for p_hi/p_lo
double p_hi = het_probs[obs_hets];
for (i = obs_hets + 1; i <= rare_copies; i++)
p_hi += het_probs[i];
double p_lo = het_probs[obs_hets];
for (i = obs_hets - 1; i >= 0; i--)
p_lo += het_probs[i];
double p_hi_lo = p_hi < p_lo ? 2.0 * p_hi : 2.0 * p_lo;
*/
double p_hwe = 0.0;
/* p-value calculation for p_hwe */
for (i = 0; i <= rare_copies; i++)
{
if (het_probs[i] > het_probs[obs_hets])
continue;
p_hwe += het_probs[i];
}
p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe;
// free(het_probs);
return p_hwe;
}