-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathploidy_check.py
45 lines (35 loc) · 1.04 KB
/
ploidy_check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import matplotlib.pyplot as plt
vcf_file = "At7_variants_vs_Col.vcf"
fig_file = "variant_frequencies.png"
cov_fig_file = "variant_coverages.png"
values = []
coverage = []
with open( vcf_file, "r" ) as f:
line = f.readline()
while line:
if line[0] != '#':
parts = line.strip().split('\t')
reads = parts[-1].split(':')[1].split(',')
if len( reads ) == 2:
x = float( reads[0] )
y = float( reads[1] )
if x / y < 0.05: #homozygous
coverage.append( x+y )
elif x / y > 0.95: #homozygous
coverage.append( x+y )
else: #heterozygous
coverage.append( x+y )
if x+y > 20:
values.append( x / ( x+y ) )
line = f.readline()
fig, ax = plt.subplots()
ax.hist( coverage, bins=10000, color="lime" )
ax.set_xlim( 0, 300 )
ax.set_xlabel( "sequencing coverage" )
ax.set_ylabel( "number of variants" )
fig.savefig( cov_fig_file, dpi=300 )
fig, ax = plt.subplots()
ax.hist( values, bins=100, color="lime" )
ax.set_xlabel( "allele frequency" )
ax.set_ylabel( "number of variants" )
fig.savefig( fig_file, dpi=300 )