Skip to content

Commit 46e504b

Browse files
author
Rebecca Halperin
committed
enable chr contig prefix
1 parent d64f108 commit 46e504b

File tree

3 files changed

+45
-39
lines changed

3 files changed

+45
-39
lines changed

scripts/guessSex.py

+25-24
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,34 @@
11
import sys, yaml
22
import os, subprocess
3+
import shlex
34

45
def die(msg):
56
sys.stderr.write("%s: " % sys.argv[0])
67
sys.stderr.write(msg)
78
sys.stderr.write("\n")
89
exit(1)
910

10-
def chr_lists(autosomes, sexs):
11+
def chr_lists(autosomes, sexs, prefix):
1112
a_start, a_end = map(int, autosomes.split(":"))
1213

13-
sex_list = sexs.split(",")
14+
sex_list = list(map(lambda x: prefix + x, sexs.split(",")))
1415

15-
autosomes_list = list(range(a_start, a_end + 1))
16+
autosomes_list = list(map(lambda x: prefix + str(x), range(a_start, a_end + 1)))
1617

17-
return list(map(str, autosomes_list)), sex_list
18+
return autosomes_list, sex_list
1819

1920
def pipe_commands(command_list):
2021
print("process " + str(0) + " is: " + command_list[0])
21-
print(command_list[0].split())
22-
plist = [subprocess.Popen(command_list[0].split(),stdout=subprocess.PIPE)]
22+
command=shlex.split(command_list[0])
23+
print(command)
24+
plist = [subprocess.Popen(command,stdout=subprocess.PIPE)]
2325
#print("first process is " + str(plist[0].pid))
2426
for i in range(1,len(command_list)):
2527
print("process " + str(i) + " is: " + command_list[i])
26-
print(command_list[i].split())
28+
command=shlex.split(command_list[i])
2729
#print("input is :")
2830
#print(plist[i-1].stdout)
29-
plist.append(subprocess.Popen(command_list[i].split(), stdin=plist[i-1].stdout,stdout=subprocess.PIPE))
31+
plist.append(subprocess.Popen(command, stdin=plist[i-1].stdout,stdout=subprocess.PIPE))
3032
#print("prev process is " + str(plist[i-1].pid))
3133
#print("curr process is " + str(plist[i].pid))
3234
#plist[i-1].stdout.close()
@@ -61,13 +63,15 @@ def check_plist(plist,name):
6163
cfile=sys.argv[1]
6264
with open(cfile) as f:
6365
config = yaml.load(f)
64-
65-
autosomes, sexs = chr_lists(config["autosomes"], config["sexChr"])
66+
try:
67+
autosomes, sexs = chr_lists(config["autosomes"], config["sexChr"], config['contigPrefix'])
68+
except:
69+
autosomes, sexs = chr_lists(config["autosomes"], config["sexChr"],'')
6670
chrs = autosomes + sexs
6771

6872
mlist = list(map(int, config["M"].split(",")))
6973
flist = list(map(int, config["F"].split(",")))
70-
74+
7175
#print(flist)
7276
try:
7377
index = flist.index(2)
@@ -78,16 +82,16 @@ def check_plist(plist,name):
7882
diploid = "M"
7983
except ValueError:
8084
die("diploid sex chr not found")
81-
85+
8286
testChrs=[autosomes[-1],sexs[index]]
8387
plist=[]
8488
for chrom in testChrs:
8589
print("finding common var pos for " + chrom)
86-
command_list=["bcftools view -q 0.2:nonmajor -v snps -R " + config["regionsFile"] + " " + config["snpVCFpath"] + chrom + config["snpVCFname"]]
90+
command_list=["bcftools view -i \'INFO/AF[0] > 0.2\' -v snps -R " + config["regionsFile"] + " " + config["snpVCFpath"] + chrom + config["snpVCFname"]]
8791
command_list.append("grep -v ^#")
8892
p=pipe_commands(command_list)
8993
plist.append(p)
90-
bedfile = open("commonVar.chr" + chrom + ".bed","w")
94+
bedfile = open("commonVar." + chrom + ".bed","w")
9195
while True:
9296
line = p[-1].stdout.readline().decode("utf8")
9397
if line != '':
@@ -96,29 +100,29 @@ def check_plist(plist,name):
96100
else:
97101
break
98102
bedfile.close()
99-
100-
#print(plist)
103+
104+
#print(plist)
101105
for i in range(0,len(plist)):
102106
check_plist(plist[i],testChrs[i])
103-
107+
104108
plist=[]
105109
stats=[]
106110
for chrom in testChrs:
107-
command_list=["bcftools mpileup -Ou -b " + config["bamList"] + " -B -f " + config["refGenome"] + " -R commonVar.chr" + chrom + ".bed -I -Ou"]
111+
command_list=["bcftools mpileup -Ou -b " + config["bamList"] + " -B -f " + config["refGenome"] + " -R commonVar." + chrom + ".bed -I -Ou"]
108112
command_list.append("bcftools call -Ou -m")
109113
command_list.append("bcftools stats -s -")
110114
command_list.append("grep ^PSC")
111115
command_list.append("cut -f3,4,5,6,14")
112116
p=pipe_commands(command_list)
113117
plist.append(p)
114118
stats.append(p[-1].stdout)
115-
119+
116120
for i in range(0,len(plist)):
117121
check_plist(plist[i],testChrs[i])
118122
#print(stats[i].readline().decode("utf8"))
119123
out=open(config["bamList"] + ".guessSex.txt","w")
120124
header=["sample",testChrs[0] + "-nHomRef",testChrs[0] + "-nHomAlt",testChrs[0] + "-nHet",testChrs[0]+"-none"]
121-
header.extend([testChrs[1] + "-nHomRef",testChrs[1] + "-nHomAlt",testChrs[1] + "-nHet",testChrs[1]+"-none"])
125+
header.extend([testChrs[1] + "-nHomRef",testChrs[1] + "-nHomAlt",testChrs[1] + "-nHet",testChrs[1]+"-none"])
122126
header.extend([testChrs[0] + "-Het/Hom",testChrs[1] + "-Het/Hom","sex"])
123127
out.write("\t".join(header) + "\n")
124128
sex_list=[]
@@ -148,7 +152,7 @@ def check_plist(plist,name):
148152
out.write("\t".join(dataA) + "\t" + "\t".join(dataS[1:len(dataA)]) + "\t" + str(hetA) + "\t" + str(hetS) + "\t" + sex + "\n")
149153
sex_list.append(sex)
150154
out.write("\nsexList: " + ','.join(sex_list) + "\n")
151-
out.close()
155+
out.close()
152156

153157
#module load samtools/1.7
154158
#PATH=$PATH:/home/rhalperin/bin/samtools-1.5/bin/
@@ -158,6 +162,3 @@ def check_plist(plist,name):
158162

159163
#bcftools view -q 0.2:nonmajor -v snps -R $BED ${VCFPATH}${TCHR}${VCFEXT} | grep -v ^# | awk '{ print $1 "\t" ($2-1) "\t" $2}' >commonVar.chr${TCHR}.bed
160164
#bcftools mpileup -Ou -b $BAMLIST -B -f $REF -R commonVar.chr${TCHR}.bed -I -Ou | bcftools call -Ou -m | bcftools stats -i '%QUAL>20' -s - | grep -B 1 ^PSC | cut -f3,4,5,6,14 >${BAMLIST}.callCounts.chr${TCHR}.txt &
161-
162-
163-

scripts/runNormalMetrics.py

+15-13
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,14 @@ def die(msg):
77
sys.stderr.write("\n")
88
exit(1)
99

10-
def chr_lists(autosomes, sexs):
10+
def chr_lists(autosomes, sexs, prefix):
1111
a_start, a_end = map(int, autosomes.split(":"))
1212

13-
sex_list = sexs.split(",")
13+
sex_list = list(map(lambda x: prefix + x, sexs.split(",")))
1414

15-
autosomes_list = list(range(a_start, a_end + 1))
15+
autosomes_list = list(map(lambda x: prefix + str(x), range(a_start, a_end + 1)))
1616

17-
return list(map(str, autosomes_list)), sex_list
18-
17+
return autosomes_list, sex_list
1918

2019
if __name__ == "__main__":
2120
if len(sys.argv) < 2:
@@ -28,9 +27,12 @@ def chr_lists(autosomes, sexs):
2827
with open(cfile) as f:
2928
config = yaml.load(f)
3029

31-
chro = sys.argv[2]
32-
33-
autosomes, sexs = chr_lists(config["autosomes"], config["sexChr"])
30+
try:
31+
chro = config['contigPrefix'] + sys.argv[2]
32+
autosomes, sexs = chr_lists(config["autosomes"], config["sexChr"], config['contigPrefix'])
33+
except:
34+
chro = sys.argv[2]
35+
autosomes, sexs = chr_lists(config["autosomes"], config["sexChr"],'')
3436
chrs = autosomes + sexs
3537

3638
sexlist = config["sexList"].split(",")
@@ -50,15 +52,15 @@ def chr_lists(autosomes, sexs):
5052
ploidyStr=''.join(ploidy)
5153
except ValueError:
5254
ploidyStr=('2' * nsample)
53-
55+
5456
command=config["gvmPath"] + "/gvm -c " + cfile + " -C " + chro + " -P -E -N --ploidystr=" + ploidyStr
5557
print("Running:\n" + command)
5658
process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
5759
stdout, stderr = process.communicate()
5860
print(stdout)
5961
if not process.returncode == 0:
6062
die("gvm failed:\n" + stderr)
61-
63+
6264
command1="sort -n -k2 " + config["outfile"] + chro + ".txt"
6365
command2="bgzip -c >" + config["outfile"] + chro + ".txt.gz"
6466
print("Running:\n" + command1 + " | " + command2)
@@ -74,13 +76,13 @@ def chr_lists(autosomes, sexs):
7476
p2.wait()
7577
if not p2.returncode == 0:
7678
print(p2.stderr)
77-
die("bgzip failed")
78-
79+
die("bgzip failed")
80+
7981
command="tabix -b 2 -e 2 " + config["outfile"] + chro + ".txt.gz"
8082
print("Running:\n" + command)
8183
process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
8284
stdout, stderr = process.communicate()
8385
print(stdout)
8486
if not process.returncode == 0:
85-
print(stderr)
87+
print(stderr)
8688
die("tabix failed")

src/chr2idx.m

+5-2
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,16 @@
3232
%profile('-memory','on');
3333
%profile on;
3434

35-
sexChr=regexprep(inputParam.sexChr,'''','')
36-
sexChr=regexp(sexChr,',','split')
35+
sexChr=regexprep(inputParam.sexChr,'''','');
36+
sexChr=regexp(sexChr,',','split');
3737
if max(cellfun('length',sexChr))==0
3838
chrList=cellstr(num2str(inputParam.autosomes','%-d'))
3939
else
4040
chrList=[cellstr(num2str(inputParam.autosomes','%-d')); sexChr'];
4141
end
42+
if isfield(inputParam,'contigPrefix')
43+
chrList=strcat(inputParam.contigPrefix,chrList)
44+
end
4245

4346
[~,out]=system(['samtools view -H `head -n1 ' inputParam.bamList ' ` | grep ^@SQ']);
4447

0 commit comments

Comments
 (0)