-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathGF_AF_functions.py
101 lines (71 loc) · 3.32 KB
/
GF_AF_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""
@author: dpava
"""
''' This module contains functions to calculate genotype and allele frequencies tailored to the needs of the JIDA web application.'''
### Genotype frequency function ###
def gtfreq(GT00, GT01, GT10, GT11):
''' Calculates the genotype frequency from a given number of genotypes
Parameters
----------
GT00: int
GT01: int
GT10: int
GT11: int
Description
-----------
Calculates the genotype frequency based on the samples that exhibit a specific genotype (GT). The function receives
the count of each GT and divides it by the total sample count (which is the sum of all GT counts).
The function assumes diploid samples. Thus, it only receives 4 numeric inputs in the following order:
GT 0|0, 0|1, 1|0, 1|1 in the format GT 00,01,10,11, respectively These values are stored in a dictionary.
Returns
-------
Returns a dictionary with the genotype as key and the frequency as value rounded to 3 decimal places
'''
# Calculates sample count by adding the frequency of each genotype.
sample_count = GT00 + GT01 + GT10 + GT11
# GT frequency for each genotype is calculated and dividied by sample_count to 3 decimal places
gf00 = round(GT00 / sample_count, 3)
gf01 = round(GT01 / sample_count, 3)
gf10 = round(GT10 / sample_count, 3)
gf11 = round(GT11 / sample_count, 3)
# Creates a dictionary storing the genotype as key and its frequency as value
gf_dict = {"0|0": gf00,
"0|1": gf01,
"1|0": gf10,
"1|1": gf11
}
# Returns the dictionary
return gf_dict
# -----------------------------------------------------------------------------------------------------------------------------------------------------------#
### Allele frequency function ###
def allefreq(GT00, GT01, GT10, GT11):
''' Calculates the genotype frequency from a given number of genotypes
Parameters
----------
GT00: int
GT01: int
GT10: int
GT11: int
Description
-----------
Calculates the allele frequency based on the samples that exhibit a specific genotype (GT). The function receives
the count of each GT, adds the corresponding allele values together and divides it by the total sample
count (which is the sum of all GT counts).
The function assumes diploid samples. Thus, it only receives 4 inputs in the following order:
GT 0|0, 0|1, 1|0, 1|1 in the format GT 00,01,10,11, respectively.
Returns
-------
Returns a dictionary with the allele number as key and the frequency as value rounded to 3 decimal places
'''
# Calculates sample count by adding the frequency of each genotype.
sample_count = GT00 + GT01 + GT10 + GT11
# Calculates allele frequency for the 0 allele to 3 decimal places
af_0 = round(((GT00 * 2) + GT01 + GT10) / (sample_count * 2), 3)
# Calculates allele frequency for the 1 allele to 3 decimal places
af_1 = round(((GT11 * 2) + GT01 + GT10) / (sample_count * 2), 3)
# Creates a dictionary with the results where key is the allele and value is the frequency
af_dict = {
0: af_0,
1: af_1
}
return af_dict