Skip to content

Commit

Permalink
Merge pull request #100 from Pathogen-Genomics-Cymru/tbtamr
Browse files Browse the repository at this point in the history
Tbtamr
  • Loading branch information
WhalleyT authored Sep 3, 2024
2 parents 1dfead9 + 11c5820 commit 53c33e1
Show file tree
Hide file tree
Showing 10 changed files with 169 additions and 15 deletions.
1 change: 0 additions & 1 deletion .github/workflows/build-push-quay.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ on:
push:
branches:
- main
- recursive_decontamination
paths:
- '**/Dockerfile*'
- "bin/"
Expand Down
4 changes: 4 additions & 0 deletions config/containers.config
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ process {
container = "quay.io/pathogen-genomics-cymru/tbprofiler:0.9.9"
}

withLabel:tbtamr {
container = "quay.io/pathogen-genomics-cymru/tbtamr:0.9.9"
}

withName:downloadContamGenomes {
shell = ['/bin/bash','-u']
errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
Expand Down
1 change: 1 addition & 0 deletions docker/Dockerfile.tbprofiler-0.9.9
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
FROM ubuntu:focal


#copy the reference genome to pre-compute our index
COPY resources/tuberculosis.fasta /data/tuberculosis.fasta

Expand Down
39 changes: 39 additions & 0 deletions docker/Dockerfile.tbtamr-0.9.9
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
FROM ubuntu:jammy

WORKDIR /

# LABEL instructions tag the image with metadata that might be important to the user
LABEL base.image="ubuntu:jammy"
LABEL dockerfile.version="0.9.9"
LABEL software="tbtamr"
LABEL description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database."
LABEL maintainer3="Tom Whalley"
LABEL maintainer3.email="[email protected]"

#set env for root prefix
ENV MAMBA_ROOT_PREFIX="/opt/conda"

RUN apt-get update && apt-get install -y apt-utils wget bzip2 curl git

RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest| tar -xvj bin/micromamba \
&& touch /root/.bashrc \
&& ./bin/micromamba shell init -s bash -p /opt/conda \
&& grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc # this line has been modified \
&& apt-get clean autoremove --yes \
&& rm -rf /var/lib/{apt,dpkg,cache,log}


# install tb-profiler via bioconda; install into 'base' conda env
RUN micromamba install --yes --name base --channel conda-forge --channel bioconda jq requests xlsxwriter tbtamr
RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4
RUN micromamba install --yes --name base --channel conda-forge --channel bioconda samtools
RUN micromamba install --yes --name base --channel conda-forge --channel bioconda freebayes==1.3.6 #STDERR in current version of freebayes
RUN micromamba clean --all --yes

# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
ENV PATH="/opt/conda/bin:${PATH}"

WORKDIR /data

#wants full path to reference
RUN tbtamr setup
11 changes: 6 additions & 5 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ Mandatory and conditional parameters:
This is the Langmead lab pre-built major-allele-SNP reference; see https://github.com/BenLangmead/bowtie-majref)
--bowtie_index_name Name of the bowtie index, e.g. hg19_1kgmaj
--vcfmix Run VFCMIX "yes" or "no". Should be set to "no" for synthetic samples
--resistance_profiler Tool to profile resistance with. At the moment options are "tb-profiler" or "none"
--resistance_profiler Tool to profile resistance with. At the moment options are "tb-profiler", tbt-amr or "none"
--afanc_myco_db Path to the Afanc database used for speciation. Obtain from https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_3.0.tar.gz
--permissive One of "yes" or "no". If "yes", continue to clockwork flags will be ignored and alignment will be performed anyway.
If there are not enough reads and/or not a reference found the programme will still exit.
Expand Down Expand Up @@ -85,10 +85,11 @@ nextflow run main.nf -profile docker --filetype bam --input_dir bam_dir --unmix_
}


resistance_profilers = ["tb-profiler", "none"]
resistance_profilers = ["tb-profiler", "tbtamr", "none"]

if(!resistance_profilers.contains(params.resistance_profiler)){
exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler" or "none" to skip.'
exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler", "tbtamr" \
or "none" to skip.'
}


Expand Down Expand Up @@ -197,13 +198,13 @@ workflow {
clockwork(preprocessing_output)

// VCFPREDICT SUB-WORKFLOW

sample_and_fastqs = clockwork.out.sample_and_fastqs
mpileup_vcf = clockwork.out.mpileup_vcf
minos_vcf = clockwork.out.minos_vcf
reference = clockwork.out.reference
bam = clockwork.out.bam

vcfpredict(bam, mpileup_vcf, minos_vcf, reference)
vcfpredict(sample_and_fastqs, bam, mpileup_vcf, minos_vcf, reference)

}

Expand Down
45 changes: 45 additions & 0 deletions modules/vcfpredictModules.nf
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,51 @@ process tbprofiler {
"""
}

process tbtamr {
tag { sample_name }
label 'medium_memory'
label 'medium_cpu'
label 'tbtamr'

publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.tbprofiler-out.json', overwrite: 'true'
publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'

input:
tuple val(sample_name), path(fq1), path(fq2), path(report_json), val(isSampleTB)

output:
tuple val(sample_name), path("${sample_name}.tbtamr-out.json"), path("${sample_name}_report.json"), emit: tbprofiler_json

when:
isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/

script:
error_log = "${sample_name}_err.json"
tbtamr_json = "${sample_name}.tbtamr-out.json"

"""
tbtamr run -r1 $fq1 -r2 $fq2
mv tbtamr/tbtamr.json ${tbtamr_json}
cp ${sample_name}_report.json ${sample_name}_report_previous.json
echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log}
#tidy up report so we can combine
sed -i '1d;\$d' ${tbtamr_json}
sed -i 's/Seq_ID/resistance_profiler/g' ${tbtamr_json}
jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${tbtamr_json} > ${report_json}
"""

stub:
"""
touch ${sample_name}.tbtamr-out.json
touch ${sample_name}_report.json
"""
}

process add_allelic_depth {
tag { sample_name }
label 'low_memory'
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ params {
// run VCFMIX 'yes' or 'no' (set to no for synthetic samples)
vcfmix = 'yes'

resistance_profiler = "tb-profiler"
resistance_profiler = "tbtamr"

update_tbprofiler = "no"

Expand Down
55 changes: 55 additions & 0 deletions singularity/Singularity.tbtamr-0.9.9
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
Bootstrap: docker
From: ubuntu:jammy
Stage: spython-base

%labels
base.image="ubuntu:jammy"
dockerfile.version="0.9.9"
software="tbtamr"
description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database."
maintainer3="Tom Whalley"
maintainer3.email="[email protected]"
%post

mkdir -p /
cd /

# LABEL instructions tag the image with metadata that might be important to the user

#set env for root prefix
MAMBA_ROOT_PREFIX="/opt/conda"

apt-get update && apt-get install -y apt-utils wget bzip2 curl git

curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest| tar -xvj bin/micromamba \
&& touch /root/.bashrc \
&& ./bin/micromamba shell init -s bash -p /opt/conda \
&& grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc # this line has been modified \
&& apt-get clean autoremove --yes \
&& rm -rf /var/lib/{apt,dpkg,cache,log}


# install tb-profiler via bioconda; install into 'base' conda env
micromamba install --yes --name base --channel conda-forge --channel bioconda jq requests xlsxwriter tbtamr
micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4
micromamba install --yes --name base --channel conda-forge --channel bioconda samtools
micromamba install --yes bioconda freebayes==1.3.6 #STDERR in current version of freebayes
micromamba clean --all --yes

# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
PATH="/opt/conda/bin:${PATH}"

mkdir -p /data
cd /data

#wants full path to reference
tbtamr setup
%environment
export MAMBA_ROOT_PREFIX="/opt/conda"
export PATH="/opt/conda/bin:${PATH}"
%runscript
cd /data
exec /bin/bash "$@"
%startscript
cd /data
exec /bin/bash "$@"
2 changes: 1 addition & 1 deletion workflows/clockwork.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ workflow clockwork {
gvcf(alignToRef.out.alignToRef_bam.join(minos.out.minos_vcf, by: 0))

emit:

sample_and_fastqs = input_seqs_json.map{it[0,1,2]}
mpileup_vcf = callVarsMpileup.out.mpileup_vcf.join(minos.out.minos_report, by: 0)
minos_vcf = minos.out.minos_vcf.join(alignToRef.out.alignToRef_report, by: 0)
reference = getRefFromJSON.out
Expand Down
24 changes: 17 additions & 7 deletions workflows/vcfpredict.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ include {tbprofiler} from '../modules/vcfpredictModules.nf' params(params)
include {tbprofiler_update_db} from '../modules/vcfpredictModules.nf' params(params)
include {add_allelic_depth} from '../modules/vcfpredictModules.nf' params(params)
include {finalJson} from '../modules/vcfpredictModules.nf' params(params)
include {tbtamr} from '../modules/vcfpredictModules.nf' params(params)

// define workflow component
workflow vcfpredict {

take:
sample_and_fastqs
clockwork_bam
clockwork_bcftools_tuple
minos_vcf_tuple
Expand All @@ -26,24 +28,32 @@ workflow vcfpredict {

}

//get just the vcf
sample_name = minos_vcf_tuple.map{it[0]}
minos_vcf = minos_vcf_tuple.map{it[1]}
do_we_resistance_profile = minos_vcf_tuple.map{it[2]}
report_json = minos_vcf_tuple.map{it[3]}
bam = clockwork_bam.map{it[2]}
fastq_and_report = sample_and_fastqs.combine(report_json).combine(do_we_resistance_profile)

if ( params.resistance_profiler == "tb-profiler"){
//get just the vcf
sample_name = minos_vcf_tuple.map{it[0]}
minos_vcf = minos_vcf_tuple.map{it[1]}
do_we_resistance_profile = minos_vcf_tuple.map{it[2]}
report_json = minos_vcf_tuple.map{it[3]}
bam = clockwork_bam.map{it[2]}

//if we are local and want to match our references, run this
if (params.update_tbprofiler == "yes"){
tbprofiler_update_db(reference_fasta)
}

//add allelic depth back in: was calculated in mpileup but lost in minos
add_allelic_depth(sample_name, minos_vcf, bam, reference_fasta, do_we_resistance_profile)
//run tb-profiler
tbprofiler(sample_name, add_allelic_depth.out, report_json, do_we_resistance_profile)
profiling_json = tbprofiler.out.tbprofiler_json
} else if (params.resistance_profiler == "tbtamr"){
tbtamr(fastq_and_report)
profiling_json = tbtamr.out.tbprofiler_json
}

if (params.vcfmix == "yes" && params.resistance_profiler != "none"){
finalJson(vcfmix.out.vcfmix_json.join(tbprofiler.out.tbprofiler_json, by: 0))
finalJson(vcfmix.out.vcfmix_json.join(profiling_json, by: 0))
}
}

0 comments on commit 53c33e1

Please sign in to comment.