-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvardict2vcf.py
executable file
·43 lines (36 loc) · 1.24 KB
/
vardict2vcf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""
Takes a vardict output file and outputs it in VCF format
WARNING: VarDict sometimes forgets to put in newlines/tabs and so some calls are truncated. This script does not account for it since the lack of newlines/tabs is inconsistent
"""
import sys
import os
def parseVar(varInput, vcf):
i=0
for line in open(varInput, 'r'):
each = line.rstrip().split()
if len(each) < 34:
print('SKIPPED')
print(len(each))
continue
else:
# print(each[0])
sample = each[0].split()[0]
chrm = each[1]
start = each[3]
end = each[4]
refAllele = each[5]
altAllele = each[6]
coverage = each[7]
altDepth = each[8]
VAF = each[14]
mapQual = each[20]
ID = '.'
info = each[33]
vcf.write('{0} \t {1} \t {2} \t {3} \t {4} \t {5} \t {6} \t {7} \n'.format(chrm, start, ID, refAllele, altAllele, mapQual, 'None', 'AF=' + str(VAF) + ';TC=' + str(coverage) + ';AC=' + str(altDepth) ))
i+=1
var = sys.argv[1]
vcf = open(var + '.vcf', 'w+')
#vcf = open(var.split('/')[-1] + '.vcf', 'w+')
print(str(vcf))
maf = parseVar(var, vcf)
vcf.close()