Skip to content

Commit c79c7f9

Browse files
committed
Don't pool unary controls for pooled peaks
1 parent a9f0c19 commit c79c7f9

File tree

2 files changed

+67
-46
lines changed

2 files changed

+67
-46
lines changed

dnanexus/encode_macs2/src/encode_macs2.py

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def main(rep1_ta, rep2_ta, ctl1_ta, ctl2_ta, rep1_xcor, rep2_xcor, rep1_paired_e
5656

5757
rep1_ta_file = dxpy.DXFile(rep1_ta)
5858
rep2_ta_file = dxpy.DXFile(rep2_ta)
59+
unary_control = ctl1_ta == ctl2_ta
5960
ctl1_ta_file = dxpy.DXFile(ctl1_ta)
6061
ctl2_ta_file = dxpy.DXFile(ctl2_ta)
6162
rep1_xcor_file = dxpy.DXFile(rep1_xcor)
@@ -93,29 +94,39 @@ def main(rep1_ta, rep2_ta, ctl1_ta, ctl2_ta, rep1_xcor, rep2_xcor, rep1_paired_e
9394

9495
pool_applet = dxpy.find_one_data_object(
9596
classname='applet', name='pool', zero_ok=False, more_ok=False, return_handler=True)
96-
pool_controls_subjob = pool_applet.run({"inputs": [ctl1_ta, ctl2_ta]})
9797
pool_replicates_subjob = pool_applet.run({"inputs": [rep1_ta, rep2_ta]})
98-
99-
pooled_controls = pool_controls_subjob.get_output_ref("pooled")
10098
pooled_replicates = pool_replicates_subjob.get_output_ref("pooled")
10199

102-
rep1_control = ctl1_ta #default
103-
rep2_control = ctl2_ta #default
104-
ratio_ctl_reads = float(ntags_ctl1)/float(ntags_ctl2)
105-
if ratio_ctl_reads < 1:
106-
ratio_ctl_reads = 1/ratio_ctl_reads
107-
ratio_cutoff = 1.2
108-
if ratio_ctl_reads > ratio_cutoff:
109-
print "Number of reads in controls differ by > factor of %f. Using pooled controls." %(ratio_cutoff)
110-
rep1_control = pooled_controls
111-
rep2_control = pooled_controls
100+
rep1_control = ctl1_ta #default. May be changed later.
101+
rep2_control = ctl2_ta #default. May be changed later.
102+
103+
if unary_control:
104+
print "Only one control supplied. Using it for both replicate 1 and 2 and for the pool."
105+
control_for_pool = rep1_control
112106
else:
113-
if ntags_ctl1 < ntags_rep1:
114-
print "Fewer reads in control replicate 1 than experiment replicate 1. Using pooled controls for replicate 1."
115-
rep1_control = pooled_controls
116-
if ntags_ctl2 < ntags_rep2:
117-
print "Fewer reads in control replicate 2 than experiment replicate 2. Using pooled controls for replicate 2."
118-
rep2_control = pooled_controls
107+
pool_controls_subjob = pool_applet.run({"inputs": [ctl1_ta, ctl2_ta]})
108+
pooled_controls = pool_controls_subjob.get_output_ref("pooled")
109+
#always use the pooled controls for the pool
110+
control_for_pool = pooled_controls
111+
112+
#use the pooled controls for the reps depending on the ration of rep to control reads
113+
ratio_ctl_reads = float(ntags_ctl1)/float(ntags_ctl2)
114+
if ratio_ctl_reads < 1:
115+
ratio_ctl_reads = 1/ratio_ctl_reads
116+
ratio_cutoff = 1.2
117+
if ratio_ctl_reads > ratio_cutoff:
118+
print "Number of reads in controls differ by > factor of %f. Using pooled controls." %(ratio_cutoff)
119+
rep1_control = pooled_controls
120+
rep2_control = pooled_controls
121+
else:
122+
if ntags_ctl1 < ntags_rep1:
123+
print "Fewer reads in control replicate 1 than experiment replicate 1. Using pooled controls for replicate 1."
124+
rep1_control = pooled_controls
125+
elif ntags_ctl2 < ntags_rep2:
126+
print "Fewer reads in control replicate 2 than experiment replicate 2. Using pooled controls for replicate 2."
127+
rep2_control = pooled_controls
128+
else:
129+
print "Using distinct controls for replicate 1 and 2."
119130

120131
pseudoreplicator_applet = dxpy.find_one_data_object(
121132
classname='applet', name='pseudoreplicator', zero_ok=False, more_ok=False, return_handler=True)
@@ -150,7 +161,7 @@ def main(rep1_ta, rep2_ta, ctl1_ta, ctl2_ta, rep1_xcor, rep2_xcor, rep1_paired_e
150161
rep2_xcor, **common_args)
151162

152163
pooled_peaks_subjob = macs2( pooled_replicates,
153-
pooled_controls,
164+
control_for_pool,
154165
pooled_replicates_xcor_subjob.get_output_ref("CC_scores_file"), **common_args)
155166

156167
rep1pr1_peaks_subjob = macs2( rep1_pr_subjob.get_output_ref("pseudoreplicate1"),
@@ -170,11 +181,11 @@ def main(rep1_ta, rep2_ta, ctl1_ta, ctl2_ta, rep1_xcor, rep2_xcor, rep1_paired_e
170181
rep2_pr2_xcor_subjob.get_output_ref("CC_scores_file"), **common_args)
171182

172183
pooledpr1_peaks_subjob = macs2( pool_pr1_subjob.get_output_ref("pooled"),
173-
pooled_controls,
184+
control_for_pool,
174185
pool_pr1_xcor_subjob.get_output_ref("CC_scores_file"), **common_args)
175186

176187
pooledpr2_peaks_subjob = macs2( pool_pr2_subjob.get_output_ref("pooled"),
177-
pooled_controls,
188+
control_for_pool,
178189
pool_pr2_xcor_subjob.get_output_ref("CC_scores_file"), **common_args)
179190

180191
output = {

dnanexus/encode_spp/src/encode_spp.py

Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def main(rep1_ta, rep2_ta, ctl1_ta, ctl2_ta, rep1_xcor, rep2_xcor, npeaks, nodup
9191

9292
rep1_ta_file = dxpy.DXFile(rep1_ta)
9393
rep2_ta_file = dxpy.DXFile(rep2_ta)
94+
unary_control = ctl1_ta == ctl2_ta
9495
ctl1_ta_file = dxpy.DXFile(ctl1_ta)
9596
ctl2_ta_file = dxpy.DXFile(ctl2_ta)
9697
rep1_xcor_file = dxpy.DXFile(rep1_xcor)
@@ -129,31 +130,40 @@ def main(rep1_ta, rep2_ta, ctl1_ta, ctl2_ta, rep1_xcor, rep2_xcor, npeaks, nodup
129130
pool_applet = dxpy.find_one_data_object(
130131
classname='applet', name='pool', project=dxpy.PROJECT_CONTEXT_ID,
131132
zero_ok=False, more_ok=False, return_handler=True)
132-
pool_controls_subjob = pool_applet.run({"inputs": [ctl1_ta, ctl2_ta]})
133133
pool_replicates_subjob = pool_applet.run({"inputs": [rep1_ta, rep2_ta]})
134-
135-
pooled_controls = pool_controls_subjob.get_output_ref("pooled")
136134
pooled_replicates = pool_replicates_subjob.get_output_ref("pooled")
135+
pooled_replicates_xcor_subjob = xcor_only(pooled_replicates, paired_end)
137136

138-
rep1_control = ctl1_ta #default
139-
rep2_control = ctl2_ta #default
140-
ratio_ctl_reads = float(ntags_ctl1)/float(ntags_ctl2)
141-
if ratio_ctl_reads < 1:
142-
ratio_ctl_reads = 1/ratio_ctl_reads
143-
ratio_cutoff = 1.2
144-
if ratio_ctl_reads > ratio_cutoff:
145-
print "Number of reads in controls differ by > factor of %f. Using pooled controls." %(ratio_cutoff)
146-
rep1_control = pooled_controls
147-
rep2_control = pooled_controls
148-
else:
149-
if ntags_ctl1 < ntags_rep1:
150-
print "Fewer reads in control replicate 1 than experiment replicate 1. Using pooled controls for replicate 1."
151-
rep1_control = pooled_controls
152-
if ntags_ctl2 < ntags_rep2:
153-
print "Fewer reads in control replicate 2 than experiment replicate 2. Using pooled controls for replicate 2."
154-
rep2_control = pooled_controls
137+
rep1_control = ctl1_ta #default. May be changed later.
138+
rep2_control = ctl2_ta #default. May be changed later.
155139

156-
pooled_replicates_xcor_subjob = xcor_only(pooled_replicates, paired_end)
140+
if unary_control:
141+
print "Only one control supplied. Using it for both replicate 1 and 2 and for the pool."
142+
control_for_pool = rep1_control
143+
else:
144+
pool_controls_subjob = pool_applet.run({"inputs": [ctl1_ta, ctl2_ta]})
145+
pooled_controls = pool_controls_subjob.get_output_ref("pooled")
146+
#always use the pooled controls for the pool
147+
control_for_pool = pooled_controls
148+
149+
#use the pooled controls for the reps depending on the ration of rep to control reads
150+
ratio_ctl_reads = float(ntags_ctl1)/float(ntags_ctl2)
151+
if ratio_ctl_reads < 1:
152+
ratio_ctl_reads = 1/ratio_ctl_reads
153+
ratio_cutoff = 1.2
154+
if ratio_ctl_reads > ratio_cutoff:
155+
print "Number of reads in controls differ by > factor of %f. Using pooled controls." %(ratio_cutoff)
156+
rep1_control = pooled_controls
157+
rep2_control = pooled_controls
158+
else:
159+
if ntags_ctl1 < ntags_rep1:
160+
print "Fewer reads in control replicate 1 than experiment replicate 1. Using pooled controls for replicate 1."
161+
rep1_control = pooled_controls
162+
elif ntags_ctl2 < ntags_rep2:
163+
print "Fewer reads in control replicate 2 than experiment replicate 2. Using pooled controls for replicate 2."
164+
rep2_control = pooled_controls
165+
else:
166+
print "Using distinct controls for replicate 1 and 2."
157167

158168
rep1_peaks_subjob = spp(rep1_ta,
159169
rep1_control,
@@ -170,7 +180,7 @@ def main(rep1_ta, rep2_ta, ctl1_ta, ctl2_ta, rep1_xcor, rep2_xcor, npeaks, nodup
170180
as_file=as_file)
171181

172182
pooled_peaks_subjob = spp(pooled_replicates,
173-
pooled_controls,
183+
control_for_pool,
174184
pooled_replicates_xcor_subjob.get_output_ref("CC_scores_file"),
175185
chrom_sizes=chrom_sizes,
176186
bigbed=True,
@@ -241,13 +251,13 @@ def main(rep1_ta, rep2_ta, ctl1_ta, ctl2_ta, rep1_xcor, rep2_xcor, npeaks, nodup
241251
bigbed=False)
242252

243253
pooledpr1_peaks_subjob = spp(pool_pr1_subjob.get_output_ref("pooled"),
244-
pooled_controls,
254+
control_for_pool,
245255
pool_pr1_xcor_subjob.get_output_ref("CC_scores_file"),
246256
chrom_sizes=chrom_sizes,
247257
bigbed=False)
248258

249259
pooledpr2_peaks_subjob = spp(pool_pr2_subjob.get_output_ref("pooled"),
250-
pooled_controls,
260+
control_for_pool,
251261
pool_pr2_xcor_subjob.get_output_ref("CC_scores_file"),
252262
chrom_sizes=chrom_sizes,
253263
bigbed=False)

0 commit comments

Comments
 (0)