1
1
import sys , yaml
2
2
import os , subprocess
3
+ import shlex
3
4
4
5
def die (msg ):
5
6
sys .stderr .write ("%s: " % sys .argv [0 ])
6
7
sys .stderr .write (msg )
7
8
sys .stderr .write ("\n " )
8
9
exit (1 )
9
10
10
- def chr_lists (autosomes , sexs ):
11
+ def chr_lists (autosomes , sexs , prefix ):
11
12
a_start , a_end = map (int , autosomes .split (":" ))
12
13
13
- sex_list = sexs .split ("," )
14
+ sex_list = list ( map ( lambda x : prefix + x , sexs .split ("," )) )
14
15
15
- autosomes_list = list (range (a_start , a_end + 1 ))
16
+ autosomes_list = list (map ( lambda x : prefix + str ( x ), range (a_start , a_end + 1 ) ))
16
17
17
- return list ( map ( str , autosomes_list )) , sex_list
18
+ return autosomes_list , sex_list
18
19
19
20
def pipe_commands (command_list ):
20
21
print ("process " + str (0 ) + " is: " + command_list [0 ])
21
- print (command_list [0 ].split ())
22
- plist = [subprocess .Popen (command_list [0 ].split (),stdout = subprocess .PIPE )]
22
+ command = shlex .split (command_list [0 ])
23
+ print (command )
24
+ plist = [subprocess .Popen (command ,stdout = subprocess .PIPE )]
23
25
#print("first process is " + str(plist[0].pid))
24
26
for i in range (1 ,len (command_list )):
25
27
print ("process " + str (i ) + " is: " + command_list [i ])
26
- print (command_list [i ]. split () )
28
+ command = shlex . split (command_list [i ])
27
29
#print("input is :")
28
30
#print(plist[i-1].stdout)
29
- plist .append (subprocess .Popen (command_list [ i ]. split () , stdin = plist [i - 1 ].stdout ,stdout = subprocess .PIPE ))
31
+ plist .append (subprocess .Popen (command , stdin = plist [i - 1 ].stdout ,stdout = subprocess .PIPE ))
30
32
#print("prev process is " + str(plist[i-1].pid))
31
33
#print("curr process is " + str(plist[i].pid))
32
34
#plist[i-1].stdout.close()
@@ -61,13 +63,15 @@ def check_plist(plist,name):
61
63
cfile = sys .argv [1 ]
62
64
with open (cfile ) as f :
63
65
config = yaml .load (f )
64
-
65
- autosomes , sexs = chr_lists (config ["autosomes" ], config ["sexChr" ])
66
+ try :
67
+ autosomes , sexs = chr_lists (config ["autosomes" ], config ["sexChr" ], config ['contigPrefix' ])
68
+ except :
69
+ autosomes , sexs = chr_lists (config ["autosomes" ], config ["sexChr" ],'' )
66
70
chrs = autosomes + sexs
67
71
68
72
mlist = list (map (int , config ["M" ].split ("," )))
69
73
flist = list (map (int , config ["F" ].split ("," )))
70
-
74
+
71
75
#print(flist)
72
76
try :
73
77
index = flist .index (2 )
@@ -78,16 +82,16 @@ def check_plist(plist,name):
78
82
diploid = "M"
79
83
except ValueError :
80
84
die ("diploid sex chr not found" )
81
-
85
+
82
86
testChrs = [autosomes [- 1 ],sexs [index ]]
83
87
plist = []
84
88
for chrom in testChrs :
85
89
print ("finding common var pos for " + chrom )
86
- command_list = ["bcftools view -q 0.2:nonmajor -v snps -R " + config ["regionsFile" ] + " " + config ["snpVCFpath" ] + chrom + config ["snpVCFname" ]]
90
+ command_list = ["bcftools view -i \' INFO/AF[0] > 0.2 \' -v snps -R " + config ["regionsFile" ] + " " + config ["snpVCFpath" ] + chrom + config ["snpVCFname" ]]
87
91
command_list .append ("grep -v ^#" )
88
92
p = pipe_commands (command_list )
89
93
plist .append (p )
90
- bedfile = open ("commonVar.chr " + chrom + ".bed" ,"w" )
94
+ bedfile = open ("commonVar." + chrom + ".bed" ,"w" )
91
95
while True :
92
96
line = p [- 1 ].stdout .readline ().decode ("utf8" )
93
97
if line != '' :
@@ -96,29 +100,29 @@ def check_plist(plist,name):
96
100
else :
97
101
break
98
102
bedfile .close ()
99
-
100
- #print(plist)
103
+
104
+ #print(plist)
101
105
for i in range (0 ,len (plist )):
102
106
check_plist (plist [i ],testChrs [i ])
103
-
107
+
104
108
plist = []
105
109
stats = []
106
110
for chrom in testChrs :
107
- command_list = ["bcftools mpileup -Ou -b " + config ["bamList" ] + " -B -f " + config ["refGenome" ] + " -R commonVar.chr " + chrom + ".bed -I -Ou" ]
111
+ command_list = ["bcftools mpileup -Ou -b " + config ["bamList" ] + " -B -f " + config ["refGenome" ] + " -R commonVar." + chrom + ".bed -I -Ou" ]
108
112
command_list .append ("bcftools call -Ou -m" )
109
113
command_list .append ("bcftools stats -s -" )
110
114
command_list .append ("grep ^PSC" )
111
115
command_list .append ("cut -f3,4,5,6,14" )
112
116
p = pipe_commands (command_list )
113
117
plist .append (p )
114
118
stats .append (p [- 1 ].stdout )
115
-
119
+
116
120
for i in range (0 ,len (plist )):
117
121
check_plist (plist [i ],testChrs [i ])
118
122
#print(stats[i].readline().decode("utf8"))
119
123
out = open (config ["bamList" ] + ".guessSex.txt" ,"w" )
120
124
header = ["sample" ,testChrs [0 ] + "-nHomRef" ,testChrs [0 ] + "-nHomAlt" ,testChrs [0 ] + "-nHet" ,testChrs [0 ]+ "-none" ]
121
- header .extend ([testChrs [1 ] + "-nHomRef" ,testChrs [1 ] + "-nHomAlt" ,testChrs [1 ] + "-nHet" ,testChrs [1 ]+ "-none" ])
125
+ header .extend ([testChrs [1 ] + "-nHomRef" ,testChrs [1 ] + "-nHomAlt" ,testChrs [1 ] + "-nHet" ,testChrs [1 ]+ "-none" ])
122
126
header .extend ([testChrs [0 ] + "-Het/Hom" ,testChrs [1 ] + "-Het/Hom" ,"sex" ])
123
127
out .write ("\t " .join (header ) + "\n " )
124
128
sex_list = []
@@ -148,7 +152,7 @@ def check_plist(plist,name):
148
152
out .write ("\t " .join (dataA ) + "\t " + "\t " .join (dataS [1 :len (dataA )]) + "\t " + str (hetA ) + "\t " + str (hetS ) + "\t " + sex + "\n " )
149
153
sex_list .append (sex )
150
154
out .write ("\n sexList: " + ',' .join (sex_list ) + "\n " )
151
- out .close ()
155
+ out .close ()
152
156
153
157
#module load samtools/1.7
154
158
#PATH=$PATH:/home/rhalperin/bin/samtools-1.5/bin/
@@ -158,6 +162,3 @@ def check_plist(plist,name):
158
162
159
163
#bcftools view -q 0.2:nonmajor -v snps -R $BED ${VCFPATH}${TCHR}${VCFEXT} | grep -v ^# | awk '{ print $1 "\t" ($2-1) "\t" $2}' >commonVar.chr${TCHR}.bed
160
164
#bcftools mpileup -Ou -b $BAMLIST -B -f $REF -R commonVar.chr${TCHR}.bed -I -Ou | bcftools call -Ou -m | bcftools stats -i '%QUAL>20' -s - | grep -B 1 ^PSC | cut -f3,4,5,6,14 >${BAMLIST}.callCounts.chr${TCHR}.txt &
161
-
162
-
163
-
0 commit comments