diff --git a/.github/workflows/build-push-quay.yml b/.github/workflows/build-push-quay.yml index 646ddee..a1c2b72 100644 --- a/.github/workflows/build-push-quay.yml +++ b/.github/workflows/build-push-quay.yml @@ -3,7 +3,6 @@ on: push: branches: - main - - recursive_decontamination paths: - '**/Dockerfile*' - "bin/" diff --git a/config/containers.config b/config/containers.config index 8cf6a35..c1580cf 100644 --- a/config/containers.config +++ b/config/containers.config @@ -12,6 +12,10 @@ process { container = "quay.io/pathogen-genomics-cymru/tbprofiler:0.9.9" } + withLabel:tbtamr { + container = "quay.io/pathogen-genomics-cymru/tbtamr:0.9.9" + } + withName:downloadContamGenomes { shell = ['/bin/bash','-u'] errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' } diff --git a/docker/Dockerfile.tbprofiler-0.9.9 b/docker/Dockerfile.tbprofiler-0.9.9 index b1a8853..42c3832 100644 --- a/docker/Dockerfile.tbprofiler-0.9.9 +++ b/docker/Dockerfile.tbprofiler-0.9.9 @@ -1,5 +1,6 @@ FROM ubuntu:focal + #copy the reference genome to pre-compute our index COPY resources/tuberculosis.fasta /data/tuberculosis.fasta diff --git a/docker/Dockerfile.tbtamr-0.9.9 b/docker/Dockerfile.tbtamr-0.9.9 new file mode 100644 index 0000000..79c960f --- /dev/null +++ b/docker/Dockerfile.tbtamr-0.9.9 @@ -0,0 +1,39 @@ +FROM ubuntu:jammy + +WORKDIR / + +# LABEL instructions tag the image with metadata that might be important to the user +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="0.9.9" +LABEL software="tbtamr" +LABEL description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database." +LABEL maintainer3="Tom Whalley" +LABEL maintainer3.email="twhalley93@gmail.com" + +#set env for root prefix +ENV MAMBA_ROOT_PREFIX="/opt/conda" + +RUN apt-get update && apt-get install -y apt-utils wget bzip2 curl git + +RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest| tar -xvj bin/micromamba \ + && touch /root/.bashrc \ + && ./bin/micromamba shell init -s bash -p /opt/conda \ + && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc # this line has been modified \ + && apt-get clean autoremove --yes \ + && rm -rf /var/lib/{apt,dpkg,cache,log} + + +# install tb-profiler via bioconda; install into 'base' conda env +RUN micromamba install --yes --name base --channel conda-forge --channel bioconda jq requests xlsxwriter tbtamr +RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 +RUN micromamba install --yes --name base --channel conda-forge --channel bioconda samtools +RUN micromamba install --yes --name base --channel conda-forge --channel bioconda freebayes==1.3.6 #STDERR in current version of freebayes +RUN micromamba clean --all --yes + +# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time +ENV PATH="/opt/conda/bin:${PATH}" + +WORKDIR /data + +#wants full path to reference +RUN tbtamr setup diff --git a/main.nf b/main.nf index 6b1aaea..6006704 100644 --- a/main.nf +++ b/main.nf @@ -50,7 +50,7 @@ Mandatory and conditional parameters: This is the Langmead lab pre-built major-allele-SNP reference; see https://github.com/BenLangmead/bowtie-majref) --bowtie_index_name Name of the bowtie index, e.g. hg19_1kgmaj --vcfmix Run VFCMIX "yes" or "no". Should be set to "no" for synthetic samples ---resistance_profiler Tool to profile resistance with. At the moment options are "tb-profiler" or "none" +--resistance_profiler Tool to profile resistance with. At the moment options are "tb-profiler", tbt-amr or "none" --afanc_myco_db Path to the Afanc database used for speciation. Obtain from https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_3.0.tar.gz --permissive One of "yes" or "no". If "yes", continue to clockwork flags will be ignored and alignment will be performed anyway. If there are not enough reads and/or not a reference found the programme will still exit. @@ -85,10 +85,11 @@ nextflow run main.nf -profile docker --filetype bam --input_dir bam_dir --unmix_ } -resistance_profilers = ["tb-profiler", "none"] +resistance_profilers = ["tb-profiler", "tbtamr", "none"] if(!resistance_profilers.contains(params.resistance_profiler)){ - exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler" or "none" to skip.' + exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler", "tbtamr" \ + or "none" to skip.' } @@ -197,13 +198,13 @@ workflow { clockwork(preprocessing_output) // VCFPREDICT SUB-WORKFLOW - + sample_and_fastqs = clockwork.out.sample_and_fastqs mpileup_vcf = clockwork.out.mpileup_vcf minos_vcf = clockwork.out.minos_vcf reference = clockwork.out.reference bam = clockwork.out.bam - vcfpredict(bam, mpileup_vcf, minos_vcf, reference) + vcfpredict(sample_and_fastqs, bam, mpileup_vcf, minos_vcf, reference) } diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf index 4a8c31f..cdd1ba9 100644 --- a/modules/vcfpredictModules.nf +++ b/modules/vcfpredictModules.nf @@ -111,6 +111,51 @@ process tbprofiler { """ } +process tbtamr { + tag { sample_name } + label 'medium_memory' + label 'medium_cpu' + label 'tbtamr' + + publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.tbprofiler-out.json', overwrite: 'true' + publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}' + + input: + tuple val(sample_name), path(fq1), path(fq2), path(report_json), val(isSampleTB) + + output: + tuple val(sample_name), path("${sample_name}.tbtamr-out.json"), path("${sample_name}_report.json"), emit: tbprofiler_json + + when: + isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/ + + script: + error_log = "${sample_name}_err.json" + tbtamr_json = "${sample_name}.tbtamr-out.json" + + """ + tbtamr run -r1 $fq1 -r2 $fq2 + + mv tbtamr/tbtamr.json ${tbtamr_json} + + cp ${sample_name}_report.json ${sample_name}_report_previous.json + + echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} + + #tidy up report so we can combine + sed -i '1d;\$d' ${tbtamr_json} + sed -i 's/Seq_ID/resistance_profiler/g' ${tbtamr_json} + + jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${tbtamr_json} > ${report_json} + """ + + stub: + """ + touch ${sample_name}.tbtamr-out.json + touch ${sample_name}_report.json + """ +} + process add_allelic_depth { tag { sample_name } label 'low_memory' diff --git a/nextflow.config b/nextflow.config index 4b3061a..ed167cb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -34,7 +34,7 @@ params { // run VCFMIX 'yes' or 'no' (set to no for synthetic samples) vcfmix = 'yes' - resistance_profiler = "tb-profiler" + resistance_profiler = "tbtamr" update_tbprofiler = "no" diff --git a/singularity/Singularity.tbtamr-0.9.9 b/singularity/Singularity.tbtamr-0.9.9 new file mode 100644 index 0000000..8908ce6 --- /dev/null +++ b/singularity/Singularity.tbtamr-0.9.9 @@ -0,0 +1,55 @@ +Bootstrap: docker +From: ubuntu:jammy +Stage: spython-base + +%labels +base.image="ubuntu:jammy" +dockerfile.version="0.9.9" +software="tbtamr" +description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database." +maintainer3="Tom Whalley" +maintainer3.email="twhalley93@gmail.com" +%post + +mkdir -p / +cd / + +# LABEL instructions tag the image with metadata that might be important to the user + +#set env for root prefix +MAMBA_ROOT_PREFIX="/opt/conda" + +apt-get update && apt-get install -y apt-utils wget bzip2 curl git + +curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest| tar -xvj bin/micromamba \ +&& touch /root/.bashrc \ +&& ./bin/micromamba shell init -s bash -p /opt/conda \ +&& grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc # this line has been modified \ +&& apt-get clean autoremove --yes \ +&& rm -rf /var/lib/{apt,dpkg,cache,log} + + +# install tb-profiler via bioconda; install into 'base' conda env +micromamba install --yes --name base --channel conda-forge --channel bioconda jq requests xlsxwriter tbtamr +micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 +micromamba install --yes --name base --channel conda-forge --channel bioconda samtools +micromamba install --yes bioconda freebayes==1.3.6 #STDERR in current version of freebayes +micromamba clean --all --yes + +# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time +PATH="/opt/conda/bin:${PATH}" + +mkdir -p /data +cd /data + +#wants full path to reference +tbtamr setup +%environment +export MAMBA_ROOT_PREFIX="/opt/conda" +export PATH="/opt/conda/bin:${PATH}" +%runscript +cd /data +exec /bin/bash "$@" +%startscript +cd /data +exec /bin/bash "$@" diff --git a/workflows/clockwork.nf b/workflows/clockwork.nf index 39ba0f4..c980389 100644 --- a/workflows/clockwork.nf +++ b/workflows/clockwork.nf @@ -37,7 +37,7 @@ workflow clockwork { gvcf(alignToRef.out.alignToRef_bam.join(minos.out.minos_vcf, by: 0)) emit: - + sample_and_fastqs = input_seqs_json.map{it[0,1,2]} mpileup_vcf = callVarsMpileup.out.mpileup_vcf.join(minos.out.minos_report, by: 0) minos_vcf = minos.out.minos_vcf.join(alignToRef.out.alignToRef_report, by: 0) reference = getRefFromJSON.out diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf index 8fec00f..609b9d1 100644 --- a/workflows/vcfpredict.nf +++ b/workflows/vcfpredict.nf @@ -7,11 +7,13 @@ include {tbprofiler} from '../modules/vcfpredictModules.nf' params(params) include {tbprofiler_update_db} from '../modules/vcfpredictModules.nf' params(params) include {add_allelic_depth} from '../modules/vcfpredictModules.nf' params(params) include {finalJson} from '../modules/vcfpredictModules.nf' params(params) +include {tbtamr} from '../modules/vcfpredictModules.nf' params(params) // define workflow component workflow vcfpredict { take: + sample_and_fastqs clockwork_bam clockwork_bcftools_tuple minos_vcf_tuple @@ -26,24 +28,32 @@ workflow vcfpredict { } + //get just the vcf + sample_name = minos_vcf_tuple.map{it[0]} + minos_vcf = minos_vcf_tuple.map{it[1]} + do_we_resistance_profile = minos_vcf_tuple.map{it[2]} + report_json = minos_vcf_tuple.map{it[3]} + bam = clockwork_bam.map{it[2]} + fastq_and_report = sample_and_fastqs.combine(report_json).combine(do_we_resistance_profile) + if ( params.resistance_profiler == "tb-profiler"){ - //get just the vcf - sample_name = minos_vcf_tuple.map{it[0]} - minos_vcf = minos_vcf_tuple.map{it[1]} - do_we_resistance_profile = minos_vcf_tuple.map{it[2]} - report_json = minos_vcf_tuple.map{it[3]} - bam = clockwork_bam.map{it[2]} + //if we are local and want to match our references, run this if (params.update_tbprofiler == "yes"){ tbprofiler_update_db(reference_fasta) } //add allelic depth back in: was calculated in mpileup but lost in minos add_allelic_depth(sample_name, minos_vcf, bam, reference_fasta, do_we_resistance_profile) + //run tb-profiler tbprofiler(sample_name, add_allelic_depth.out, report_json, do_we_resistance_profile) + profiling_json = tbprofiler.out.tbprofiler_json + } else if (params.resistance_profiler == "tbtamr"){ + tbtamr(fastq_and_report) + profiling_json = tbtamr.out.tbprofiler_json } if (params.vcfmix == "yes" && params.resistance_profiler != "none"){ - finalJson(vcfmix.out.vcfmix_json.join(tbprofiler.out.tbprofiler_json, by: 0)) + finalJson(vcfmix.out.vcfmix_json.join(profiling_json, by: 0)) } }