forked from galaxyproject/tools-iuc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtb_profiler_profile.xml
279 lines (270 loc) · 17.4 KB
/
tb_profiler_profile.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
<tool id="tb_profiler_profile" name="TB-Profiler Profile" version="@TOOL_VERSION@+galaxy0" profile="20.09" license="AGPL-3.0-or-later">
<description>Infer strain types and drug resistance markers from sequences</description>
<macros>
<token name="@TOOL_VERSION@">6.4.0</token>
</macros>
<xrefs>
<xref type="bio.tools">tb-profiler</xref>
</xrefs>
<requirements>
<requirement type="package" version="1.2.6">delly</requirement>
<requirement type="package" version="@TOOL_VERSION@">tb-profiler</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
#if str($fastq_or_bam.input_select.value) in ("paired_fastq", "paired_collection_fastq", "single_fastq")
#if str($fastq_or_bam.input_select.value) == "paired_fastq"
#set r1_ext = $fastq_or_bam.read1.extension
#set r2_ext = $fastq_or_bam.read2.extension
ln -s '$fastq_or_bam.read1' fastq_r1.'$r1_ext' &&
ln -s '$fastq_or_bam.read2' fastq_r2.'$r2_ext' &&
#else if str($fastq_or_bam.input_select.value) == "single_fastq"
#set r1_ext = $fastq_or_bam.fastq.extension
ln -s '$fastq_or_bam.fastq' fastq_r1.'$r1_ext' &&
#else if str($fastq_or_bam.input_select.value) == "paired_collection_fastq"
#set r1_ext = $fastq_or_bam.fastq_collection.forward.extension
#set r2_ext = $fastq_or_bam.fastq_collection.reverse.extension
ln -s '$fastq_or_bam.fastq_collection.forward' fastq_r1.'$r1_ext' &&
ln -s '$fastq_or_bam.fastq_collection.reverse' fastq_r2.'$r2_ext' &&
#end if
#else if str($fastq_or_bam.input_select.value) == "bam"
ln -s '$fastq_or_bam.bam_input' input.bam &&
#end if
tb-profiler profile
--platform '${platform.value}'
${spoligotype}
#if str($fastq_or_bam.input_select.value) in ("paired_fastq", "paired_collection_fastq", "single_fastq")
-1 fastq_r1.'$r1_ext'
#end if
#if str($fastq_or_bam.input_select.value) in ("paired_fastq", "paired_collection_fastq")
-2 fastq_r2.'$r1_ext'
#else if str($fastq_or_bam.input_select.value) == "bam"
--bam input.bam
#end if
--threads "\${GALAXY_SLOTS:-1}"
#if $advanced.options == 'yes'
--mapper '${advanced.mapper}'
--caller '${advanced.caller}'
--depth '${advanced.vf.min_depth_hard},${advanced.vf.min_depth_soft}'
--af '${advanced.vf.min_af_hard},${advanced.vf.min_af_soft}'
--strand '${advanced.vf.min_read_hard},${advanced.vf.min_read_soft}'
--sv-depth '${advanced.vf.min_sv_depth_hard},${advanced.vf.min_sv_depth_soft}'
--sv-af '${advanced.vf.min_sv_af_hard},${advanced.vf.min_sv_af_soft}'
--sv-len '${advanced.vf.max_sv_len_hard},${advanced.vf.max_sv_len_soft}'
${advanced.suspect}
${advanced.no_trim}
${advanced.no_coverage_qc}
${advanced.no_samclip}
${advanced.no_delly}
#end if
#if $output_format == "pdf"
--pdf
#else if $output_format == "txt"
--txt
#end if
&& mv results/tbprofiler.results.json $results_json
#if str($fastq_or_bam.input_select) != "bam"
&& mv bam/tbprofiler.bam '${output_bam}'
#end if
&& bcftools view -Ov -o '${output_vcf}' vcf/tbprofiler.targets.vcf.gz
#if $output_format == "pdf"
&& mv results/tbprofiler.results.pdf '${output_pdf}'
#else if $output_format == "txt"
&& mv results/tbprofiler.results.txt '${output_txt}'
#end if
]]> </command>
<inputs>
<param name="platform" type="select" label="Platform">
<option value="illumina" selected="true">Illumina</option>
<option value="nanopore">Nanopore</option>
<option value="pacbio">PacBio</option>
</param>
<conditional name="fastq_or_bam">
<param name="input_select" type="select" label="Input File Type">
<option value="paired_fastq">Paired Fastq</option>
<option value="paired_collection_fastq">Paired Collection Fastq</option>
<option value="single_fastq">Single Fastq</option>
<option value="bam">BAM</option>
</param>
<when value="paired_fastq">
<param name="read1" type="data" format="fastq" label="Read1" help="First read file (default: None)"/>
<param name="read2" type="data" format="fastq" optional="true" label="Read2" help="Second read file (default: None)"/>
</when>
<when value="paired_collection_fastq">
<param label="Reads (collection)" name="fastq_collection" type="data_collection" collection_type="paired" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" />
</when>
<when value="single_fastq">
<param label="Reads" name="fastq" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" />
</when>
<when value="bam">
<param name="bam_input" type="data" format="bam" label="Bam" help="Warning!!!: The BAM files must have been created using the ensembl version of the genome."/>
</when>
</conditional>
<param argument="--spoligotype" type="boolean" label="Perform in-silico spoligotyping" checked="false" truevalue="--spoligotype" falsevalue=""
help="Uses spacers of CRISPR region to call a spoligotype."/>
<param name="output_format" label="Output format" type="select">
<option value="txt" selected="true">Text</option>
<option value="pdf">PDF</option>
</param>
<conditional name="advanced">
<param label="Select advanced options" type="select" name="options">
<option value="yes">Yes</option>
<option value="no" selected="true">No</option>
</param>
<when value="no">
</when>
<when value="yes">
<param argument="mapper" label="Mapper" type="select" help="Mapping tools to use (default: bwa)">
<option value="bwa" selected="true">bwa</option>
<option value="minimap2">minimap2</option>
<option value="bowtie2">bowtie2</option>
<option value="bwa-mem2">bwa-mem2</option>
</param>
<param argument="caller" label="Variant caller" help="Variant calling tool to use" type="select">
<option value="freebayes" selected="true">freebayes</option>
<option value="bcftools">bcftools</option>
<option value="gatk">gatk</option>
<option value="pilon">pilon</option>
<option value="lofreq">lofreq</option>
</param>
<section name="vf" title="Variant Filters">
<param name="min_depth_hard" argument="--depth" label="Min Depth (hard cutoff)" type="integer" value="0" min="0"
help="Minimum depth required to call variant. Bases with depth below this cutoff will be marked as missing (default: 0)"/>
<param name="min_depth_soft" argument="--depth" label="Min Depth (soft cutoff)" type="integer" value="10" min="0"
help="Minimum depth required to call variant. Variants at positions lower than this depth and above the hard cutoff will be marked as QC fail (default: 10)"/>
<param name="min_af_hard" argument="--af" type="float" value="0" label="Minimum allele frequency to call variants (hard cutoff)" min="0.0" max="1.0"
help="Minimum allele frequency to call variants (default: 0)" />
<param name="min_af_soft" argument="--af" type="float" value="0.1" label="Minimum allele frequency to call variants (soft cutoff)" min="0.0" max="1.0"
help="Variants with an allele frequency lower than this (and higher than the hard cutoff) will be marked as QC fail (default 0.1)" />
<param name="min_read_hard" argument="--strand" type="integer" value="0" label="Mininum read number per strand (hard cutoff)" min="0"
help="Minimum read number per strand to call variants (default: 0)" />
<param name="min_read_soft" argument="--strand" type="integer" value="3" label="Mininum read number per strand (soft cutoff)" min="0"
help="Minimum read number per strand to report variants (default: 3). Variants with number of reads on each strand below this (and above the hard cutoff) will be marked as QC fail" />
<param name="min_sv_depth_hard" argument="--sv-depth" type="integer" value="0" label="Min Depth for SV (hard cutoff)" min="0"
help="Minimum depth required to call structural variants (default: 0)" />
<param name="min_sv_depth_soft" argument="--sv-depth" type="integer" value="10" label="Min Depth for SV (soft cutoff)" min="0"
help="Minimum depth required to call structural variants. Variants at positions lower than this depth and above the hard cutoff will be marked as QC fail" />
<param name="min_sv_af_hard" argument="--sv-af" type="float" value="0.5" label="Min SV allele frequency (hard cutoff)" min="0.0" max="1.0"
help="Minimum allele frequency to call structural variants (default: 0.5)" />
<param name="min_sv_af_soft" argument="--sv-af" type="float" value="0.9" label="Min SV allele frequency (soft cutoff)" min="0.0" max="1.0"
help="Variants with an allele frequency lower than this (and higher than the hard cutoff) will be marked as QC fail (default 0.9)" />
<param name="max_sv_len_hard" argument="--sv-len" type="integer" value="100000" label="Max SV length (hard cutoff)" min="0"
help="Maximum length (length) of structural variants to call (default: 100000)" />
<param name="max_sv_len_soft" argument="--sv-len" type="integer" value="50000" label="Max SV length (soft cutoff)" min="0"
help="Structural variants with length higher than this (and lower than the hard cutoff) will be marked as QC fail (default: 50000)" />
</section>
<param argument="--suspect" label="Use ML predictions of resistance from the SUSPECT tool suite" type="boolean" checked="False" truevalue="--suspect" falsevalue=""
help="The SUSPECT tools offer ML-based predictions of RIF, PZA and BDQ resistance. Note that this uses a web service thus requires an Internet connection."/>
<param argument="--no_trim" label="Do not trim reads using trimmomatic" checked="false" type="boolean" truevalue="--no_trim" falsevalue="" />
<param argument="--no_coverage_qc" label="Don't collect flagstats" type="boolean" truevalue="--no_coverage_qc" falsevalue="" />
<param argument="--no_samclip" label="Don't removed clipped reads from variant calling" type="boolean" truevalue="--no_samclip" falsevalue="" />
<param argument="--no_delly" label="Don't run delly for SV calling" type="boolean" truevalue="--no_delly" falsevalue="" />
</when>
</conditional>
</inputs>
<outputs>
<data name="results_json" format="json" from_work_dir="results/tbprofiler.results.json" label="${tool.name} on ${on_string}: Results.json"/>
<data format="vcf" name="output_vcf" label="${tool.name} VCF on ${on_string}" />
<data format="bam" name="output_bam" label="${tool.name} BAM on ${on_string}">
<filter>fastq_or_bam['input_select'] != 'bam'</filter>
</data>
<data format="pdf" name="output_pdf" label="${tool.name} PDF report on ${on_string}">
<filter>output_format == 'pdf'</filter>
</data>
<data format="txt" name="output_txt" label="${tool.name} report on ${on_string}">
<filter>output_format == 'txt'</filter>
</data>
</outputs>
<tests>
<test expect_num_outputs="4">
<param name="input_select" value="single_fastq"/>
<param name="fastq" ftype="fastq.gz" value="rif_resistant.fastq.gz" />
<param name="output_format" value="txt" />
<param name="platform" value="illumina" />
<param name="options" value="no" />
<output name="output_txt">
<assert_contents>
<has_line line="Drug-resistance: RR-TB" />
<has_line line="lineage2.2.2	1.000	East-Asian (Beijing)	RD105;RD207" />
<has_line line="Rifampicin	R	rpoB p.Asp435Val (1.00)" />
<has_line line="761110	Rv0667	rpoB	missense_variant	p.Asp435Val	43	1.000	rifampicin	Assoc w R" />
</assert_contents>
</output>
</test>
<test expect_num_outputs="3">
<param name="input_select" value="bam"/>
<param name="bam_input" ftype="bam" value="rif_resistant.bam" />
<param name="output_format" value="txt" />
<param name="platform" value="illumina" />
<param name="options" value="no" />
<output name="output_txt">
<assert_contents>
<has_line line="Drug-resistance: RR-TB" />
<has_line line="lineage2.2.2	1.000	East-Asian (Beijing)	RD105;RD207" />
<has_line line="Rifampicin	R	rpoB p.Asp435Val (1.00)" />
<has_line line="761110	Rv0667	rpoB	missense_variant	p.Asp435Val	43	1.000	rifampicin	Assoc w R" />
</assert_contents>
</output>
</test>
<test expect_num_outputs="4">
<param name="input_select" value="single_fastq"/>
<param name="fastq" ftype="fastq.gz" value="rif_resistant.fastq.gz" />
<param name="output_format" value="txt" />
<param name="platform" value="illumina" />
<conditional name="advanced">
<param name="options" value="yes" />
<section name="vf">
<param name="min_depth_hard" value="40" />
<param name="min_depth_soft" value="50" />
</section>
</conditional>
<output name="output_txt">
<assert_contents>
<has_line line="lineage2.2.2	1.000	East-Asian (Beijing)	RD105;RD207" />
<has_line line="761110	Rv0667	rpoB	missense_variant	p.Asp435Val	43	1.000	rifampicin	Assoc w R" />
</assert_contents>
</output>
<output name="results_json">
<assert_contents>
<has_text text='"filter": "soft_fail",' />
</assert_contents>
</output>
</test>
<test expect_num_outputs="4">
<param name="input_select" value="single_fastq"/>
<param name="fastq" ftype="fastq.gz" value="rpoB_and_crispr.fastq.gz" />
<param name="output_format" value="txt" />
<param name="platform" value="illumina" />
<param name="spoligotype" value="true" />
<param name="options" value="no" />
<output name="output_txt">
<assert_contents>
<has_line line="Octal: 000000000000771" />
<has_line line="Drug-resistance: RR-TB" />
<has_line line="Rifampicin	R	rpoB p.Asp435Val (1.00)" />
<has_line line="761110	Rv0667	rpoB	missense_variant	p.Asp435Val	35	1.000	rifampicin	Assoc w R" />
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
Summary
=======
The pipeline aligns reads to the H37Rv reference using BWA, bowtie2 or minimap2 and then calls variants
(using bcftools, GATK4 or freebayes). These variants are then compared to a drug-resistance database.
TB-Profiler also predicts the number of reads supporting drug resistance variants as an insight into
hetero-resistance (not applicable for MinION data).
Produces a JSON output file by default.
In the Advanced options, you can select the mapper and variant caller to use, as well as set a number of filtering parameters.
Each of these typically has a "hard" and a "soft" cutoff. If a variant value is below the "hard" cutoff it is discarded, if
it is between the "hard" and "soft" values it is removed from the final predictions but added to the "qc_fail_variants" section
of the output.
One of the advanced options is to use the SUSPECT tool suite to predict resistance to `rifampicin (RIF)`_, `pyrazinamide (PZA)`_
and `bedaquiline (BDQ)`_.
.. _rifampicin (RIF): https://www.nature.com/articles/s41598-020-74648-y
.. _pyrazinamide (PZA): https://www.nature.com/articles/s41598-020-58635-x
.. _bedaquiline (BDQ): https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0217169
]]> </help>
<citations>
<citation type="doi">10.1186/s13073-019-0650-x</citation>
</citations>
</tool>