Merge pull request #100 from Pathogen-Genomics-Cymru/tbtamr

Tbtamr
Pathogen-Genomics-Cymru · Sep 3, 2024 · 53c33e1 · 53c33e1
2 parents 1dfead9 + 11c5820
commit 53c33e1
Show file tree

Hide file tree

Showing 10 changed files with 169 additions and 15 deletions.
diff --git a/.github/workflows/build-push-quay.yml b/.github/workflows/build-push-quay.yml
@@ -3,7 +3,6 @@ on:
   push:
     branches:
       - main
-      - recursive_decontamination
     paths:
       - '**/Dockerfile*'
       - "bin/"

diff --git a/config/containers.config b/config/containers.config
@@ -12,6 +12,10 @@ process {
         container = "quay.io/pathogen-genomics-cymru/tbprofiler:0.9.9"
     }
 
+    withLabel:tbtamr {
+        container = "quay.io/pathogen-genomics-cymru/tbtamr:0.9.9"
+    }
+
     withName:downloadContamGenomes {
         shell = ['/bin/bash','-u']
         errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }

diff --git a/docker/Dockerfile.tbprofiler-0.9.9 b/docker/Dockerfile.tbprofiler-0.9.9
@@ -1,5 +1,6 @@
 FROM ubuntu:focal
 
+
 #copy the reference genome to pre-compute our index
 COPY resources/tuberculosis.fasta /data/tuberculosis.fasta
 

diff --git a/docker/Dockerfile.tbtamr-0.9.9 b/docker/Dockerfile.tbtamr-0.9.9
@@ -0,0 +1,39 @@
+FROM ubuntu:jammy
+
+WORKDIR /
+
+# LABEL instructions tag the image with metadata that might be important to the user
+LABEL base.image="ubuntu:jammy"
+LABEL dockerfile.version="0.9.9"
+LABEL software="tbtamr"
+LABEL description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database."
+LABEL maintainer3="Tom Whalley"
+LABEL maintainer3.email="[email protected]"
+
+#set env for root prefix
+ENV MAMBA_ROOT_PREFIX="/opt/conda"
+
+RUN apt-get update && apt-get install -y apt-utils wget bzip2 curl git
+
+RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest| tar -xvj bin/micromamba \
+    && touch /root/.bashrc \
+    && ./bin/micromamba shell init -s bash -p /opt/conda  \
+    && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc  > /opt/conda/bashrc   # this line has been modified \
+    && apt-get clean autoremove --yes \
+    && rm -rf /var/lib/{apt,dpkg,cache,log}
+
+
+# install tb-profiler via bioconda; install into 'base' conda env
+RUN micromamba install --yes --name base --channel conda-forge --channel bioconda jq requests xlsxwriter tbtamr
+RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 
+RUN micromamba install --yes --name base --channel conda-forge --channel bioconda samtools 
+RUN micromamba install --yes --name base --channel conda-forge --channel bioconda freebayes==1.3.6 #STDERR in current version of freebayes 
+RUN micromamba clean --all --yes
+
+# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
+ENV PATH="/opt/conda/bin:${PATH}"
+
+WORKDIR /data
+
+#wants full path to reference
+RUN tbtamr setup
diff --git a/main.nf b/main.nf
@@ -50,7 +50,7 @@ Mandatory and conditional parameters:
                       This is the Langmead lab pre-built major-allele-SNP reference; see https://github.com/BenLangmead/bowtie-majref)
 --bowtie_index_name   Name of the bowtie index, e.g. hg19_1kgmaj
 --vcfmix              Run VFCMIX "yes" or "no". Should be set to "no" for synthetic samples
---resistance_profiler Tool to profile resistance with. At the moment options are "tb-profiler" or "none"
+--resistance_profiler Tool to profile resistance with. At the moment options are "tb-profiler", tbt-amr or "none"
 --afanc_myco_db       Path to the Afanc database used for speciation. Obtain from https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_3.0.tar.gz
 --permissive          One of "yes" or "no". If "yes", continue to clockwork flags will be ignored and alignment will be performed anyway.
                       If there are not enough reads and/or not a reference found the programme will still exit.
@@ -85,10 +85,11 @@ nextflow run main.nf -profile docker --filetype bam --input_dir bam_dir --unmix_
 }
 
 
-resistance_profilers = ["tb-profiler", "none"]
+resistance_profilers = ["tb-profiler", "tbtamr", "none"]
 
 if(!resistance_profilers.contains(params.resistance_profiler)){
-    exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler" or "none" to skip.'
+    exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler", "tbtamr" \
+    or "none" to skip.'
     }
 
 
@@ -197,13 +198,13 @@ workflow {
       clockwork(preprocessing_output)
 
       // VCFPREDICT SUB-WORKFLOW
-
+      sample_and_fastqs = clockwork.out.sample_and_fastqs
       mpileup_vcf = clockwork.out.mpileup_vcf
       minos_vcf = clockwork.out.minos_vcf
       reference = clockwork.out.reference
       bam = clockwork.out.bam
 
-      vcfpredict(bam, mpileup_vcf, minos_vcf, reference)
+      vcfpredict(sample_and_fastqs, bam, mpileup_vcf, minos_vcf, reference)
 
 }
 

diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf
@@ -111,6 +111,51 @@ process tbprofiler {
     """
 }
 
+process tbtamr {
+    tag { sample_name }
+    label 'medium_memory'
+    label 'medium_cpu'
+    label 'tbtamr'
+
+    publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.tbprofiler-out.json', overwrite: 'true'
+    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
+
+    input:
+    tuple val(sample_name), path(fq1), path(fq2), path(report_json), val(isSampleTB)
+
+    output:
+    tuple val(sample_name), path("${sample_name}.tbtamr-out.json"), path("${sample_name}_report.json"), emit: tbprofiler_json
+
+    when:
+    isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/
+
+    script:
+    error_log = "${sample_name}_err.json"
+    tbtamr_json = "${sample_name}.tbtamr-out.json"
+
+    """
+    tbtamr run -r1 $fq1 -r2 $fq2
+    
+    mv tbtamr/tbtamr.json ${tbtamr_json}
+    
+    cp ${sample_name}_report.json ${sample_name}_report_previous.json
+
+    echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log}
+
+    #tidy up report so we can combine
+    sed -i '1d;\$d' ${tbtamr_json}
+    sed -i 's/Seq_ID/resistance_profiler/g' ${tbtamr_json}
+
+    jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json  ${tbtamr_json} > ${report_json}
+    """
+
+    stub:
+    """
+    touch ${sample_name}.tbtamr-out.json
+    touch ${sample_name}_report.json
+    """
+}
+
 process add_allelic_depth {
     tag { sample_name }
     label 'low_memory'

diff --git a/nextflow.config b/nextflow.config
@@ -34,7 +34,7 @@ params {
   // run VCFMIX 'yes' or 'no' (set to no for synthetic samples)
   vcfmix = 'yes'
 
-  resistance_profiler = "tb-profiler"
+  resistance_profiler = "tbtamr"
 
   update_tbprofiler = "no"
 

diff --git a/singularity/Singularity.tbtamr-0.9.9 b/singularity/Singularity.tbtamr-0.9.9
@@ -0,0 +1,55 @@
+Bootstrap: docker
+From: ubuntu:jammy
+Stage: spython-base
+
+%labels
+base.image="ubuntu:jammy"
+dockerfile.version="0.9.9"
+software="tbtamr"
+description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database."
+maintainer3="Tom Whalley"
+maintainer3.email="[email protected]"
+%post
+
+mkdir -p /
+cd /
+
+# LABEL instructions tag the image with metadata that might be important to the user
+
+#set env for root prefix
+MAMBA_ROOT_PREFIX="/opt/conda"
+
+apt-get update && apt-get install -y apt-utils wget bzip2 curl git
+
+curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest| tar -xvj bin/micromamba \
+&& touch /root/.bashrc \
+&& ./bin/micromamba shell init -s bash -p /opt/conda  \
+&& grep -v '[ -z "\$PS1" ] && return' /root/.bashrc  > /opt/conda/bashrc   # this line has been modified \
+&& apt-get clean autoremove --yes \
+&& rm -rf /var/lib/{apt,dpkg,cache,log}
+
+
+# install tb-profiler via bioconda; install into 'base' conda env
+micromamba install --yes --name base --channel conda-forge --channel bioconda jq requests xlsxwriter tbtamr
+micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4
+micromamba install --yes --name base --channel conda-forge --channel bioconda samtools
+micromamba install --yes bioconda freebayes==1.3.6 #STDERR in current version of freebayes
+micromamba clean --all --yes
+
+# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
+PATH="/opt/conda/bin:${PATH}"
+
+mkdir -p /data
+cd /data
+
+#wants full path to reference
+tbtamr setup
+%environment
+export MAMBA_ROOT_PREFIX="/opt/conda"
+export PATH="/opt/conda/bin:${PATH}"
+%runscript
+cd /data
+exec /bin/bash "$@"
+%startscript
+cd /data
+exec /bin/bash "$@"
diff --git a/workflows/clockwork.nf b/workflows/clockwork.nf
@@ -37,7 +37,7 @@ workflow clockwork {
       gvcf(alignToRef.out.alignToRef_bam.join(minos.out.minos_vcf, by: 0))
 
     emit:
-
+      sample_and_fastqs = input_seqs_json.map{it[0,1,2]}
       mpileup_vcf = callVarsMpileup.out.mpileup_vcf.join(minos.out.minos_report, by: 0)
       minos_vcf = minos.out.minos_vcf.join(alignToRef.out.alignToRef_report, by: 0)
       reference = getRefFromJSON.out

diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf
@@ -7,11 +7,13 @@ include {tbprofiler} from '../modules/vcfpredictModules.nf' params(params)
 include {tbprofiler_update_db} from '../modules/vcfpredictModules.nf' params(params)
 include {add_allelic_depth} from '../modules/vcfpredictModules.nf' params(params) 
 include {finalJson} from '../modules/vcfpredictModules.nf' params(params) 
+include {tbtamr} from '../modules/vcfpredictModules.nf' params(params)
 
 // define workflow component
 workflow vcfpredict {
 
     take:
+      sample_and_fastqs
       clockwork_bam
       clockwork_bcftools_tuple
       minos_vcf_tuple
@@ -26,24 +28,32 @@ workflow vcfpredict {
 
       }
 
+      //get just the vcf
+      sample_name = minos_vcf_tuple.map{it[0]}
+      minos_vcf = minos_vcf_tuple.map{it[1]}
+      do_we_resistance_profile = minos_vcf_tuple.map{it[2]}
+      report_json  = minos_vcf_tuple.map{it[3]}
+      bam = clockwork_bam.map{it[2]}
+      fastq_and_report = sample_and_fastqs.combine(report_json).combine(do_we_resistance_profile)
+
       if ( params.resistance_profiler == "tb-profiler"){
-        //get just the vcf
-        sample_name = minos_vcf_tuple.map{it[0]}
-        minos_vcf = minos_vcf_tuple.map{it[1]}
-        do_we_resistance_profile = minos_vcf_tuple.map{it[2]}
-        report_json  = minos_vcf_tuple.map{it[3]}
-        bam = clockwork_bam.map{it[2]}
 
+        //if we are local and want to match our references, run this
         if (params.update_tbprofiler == "yes"){
         tbprofiler_update_db(reference_fasta)
         }
 
         //add allelic depth back in: was calculated in mpileup but lost in minos
         add_allelic_depth(sample_name, minos_vcf, bam, reference_fasta, do_we_resistance_profile)
+        //run tb-profiler
         tbprofiler(sample_name, add_allelic_depth.out, report_json, do_we_resistance_profile)
+        profiling_json = tbprofiler.out.tbprofiler_json
+      } else if (params.resistance_profiler == "tbtamr"){
+        tbtamr(fastq_and_report)
+        profiling_json = tbtamr.out.tbprofiler_json
       }
 
       if (params.vcfmix == "yes" && params.resistance_profiler != "none"){
-          finalJson(vcfmix.out.vcfmix_json.join(tbprofiler.out.tbprofiler_json, by: 0))
+          finalJson(vcfmix.out.vcfmix_json.join(profiling_json, by: 0))
       }
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,7 +3,6 @@ on: @@
       push:
         branches:
           - main
-          - recursive_decontamination
         paths:
           - '**/Dockerfile*'
           - "bin/"
@@ Expand Down @@