diff --git a/.github/workflows/build-push-quay.yml b/.github/workflows/build-push-quay.yml index 9043ae6..a1c2b72 100644 --- a/.github/workflows/build-push-quay.yml +++ b/.github/workflows/build-push-quay.yml @@ -2,12 +2,11 @@ name: build-push-quay on: push: branches: - - v0.9.6 - - 0.9.7-dev - - climb + - main paths: - '**/Dockerfile*' - "bin/" + - "resources/" workflow_dispatch: @@ -46,6 +45,7 @@ jobs: - name: Copy folders to docker run: | cp -r bin docker/bin + cp -r resources docker/resources - name: Get image name id: image_name diff --git a/README.md b/README.md index 5c44791..a076e2a 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Pipeline cleans and QCs reads with fastp and FastQC, classifies with Kraken2 & A Note that while Mykrobe is included within this pipeline, it runs as an independent process and is not used for any downstream reporting. -**WARNING**: There are currently known errors with vcfmix and gnomonicus, as such `errorStrategy 'ignore'` has been added to the processes vcfpredict:vcfmix and vcfpredict:gnomonicus to stop the pipeline from crashing. Please check the stdout from nextflow to see whether these processes have ran successfully. +**WARNING**: There are currently known errors with vcfmix, as such `errorStrategy 'ignore'` has been added to the processes vcfpredict:vcfmix to stop the pipeline from crashing. Please check the stdout from nextflow to see whether these processes have ran successfully. ## Quick Start ## This is a Nextflow DSL2 pipeline, it requires a version of Nextflow that supports DSL2 and the stub-run feature. It is recommended to run the pipeline with `NXF_VER=20.11.0-edge`, as the pipeline has been tested using this version. E.g. to download @@ -29,6 +29,8 @@ NXF_VER=20.11.0-edge nextflow run main.nf -profile docker --filetype bam --input --output_dir . --kraken_db /path/to/database --bowtie2_index /path/to/index --bowtie_index_name hg19_1kgmaj ``` +There is also a pre-configured climb profile to run Lodestone on a CLIMB Jupyter Notebook Server. Add ```-profile climb``` to your command invocation. The input directory can point to an S3 bucket natively (e.g. ```--input_dir s3://my-team/bucket```). By default this will run the workflow in Docker containers and take advantage of kubernetes pods. The Kraken2, Bowtie2 and Afanc databases will by default point to the ```pluspf16```, ```hg19_1kgmaj_bt2``` and ```Mycobacteriaciae_DB_7.0``` directories by default. These are mounted on a public S3 bucket hosted on CLIMB. + ### Executors ### By default, the pipeline will just run on the local machine. To run on a cluster, modifications will have to be made to the `nextflow.config` to add in the executor. E.g. for a SLURM cluster add `process.executor = 'slurm'`. For more information on executor options see the Nextflow docs: https://www.nextflow.io/docs/latest/executor.html @@ -63,10 +65,8 @@ Directory containing Bowtie2 index (obtain from ftp://ftp.ccb.jhu.edu/pub/data/b Name of the bowtie index, e.g. hg19_1kgmaj
* **vcfmix**
Run [vcfmix](https://github.com/AlexOrlek/VCFMIX), yes or no. Set to no for synthetic samples
-* **gnomonicus**
-Run [gnomonicus](https://github.com/oxfordmmm/gnomonicus), yes or no
-* **amr_cat**
-Path to AMR catalogue for gnomonicus
+* **resistance_profiler**
+Run resistance profiling for Mycobacterium tubercuclosis. Either ["tb-profiler"](https://tbdr.lshtm.ac.uk/) or "none". * **afanc_myco_db**
Path to the [afanc](https://github.com/ArthurVM/Afanc) database used for speciation. Obtain from https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_7.0.tar.gz
@@ -125,12 +125,10 @@ process clockwork:alignToRef\ 25. (Fail) If < 50% of the reference genome was covered at 10-fold depth process clockwork:minos\ -26. (Warn) If sample is not TB, then it is not passed to gnomonicus - -## Running on CLIMB Jupyter Hub -There is a pre-configured climb profile to run Lodestone on a CLIMB Jupyter Notebook Server. Add ```profile climb``` to your command invocation. The input directory can point to an S3 bucket natively (e.g. ```--input_dir s3://my-team/bucket```). By default this will run the workflow in Docker containers and take advantage of kubernetes pods. The Kraken2, Bowtie2 and Afanc databases will by default point to the ```pluspf16```, ```hg19_1kgmaj_bt2``` and ```Mycobacteriaciae_DB_7.0``` respectively. These are mounted on a public shared volume. +26. (Warn) If sample is not TB, then it is not passed to a resistance profiler ## Acknowledgements ## For a list of direct authors of this pipeline, please see the contributors list. All of the software dependencies of this pipeline are recorded in the version.json The preprocessing sub-workflow is based on the preprocessing nextflow DSL1 pipeline written by Stephen Bush, University of Oxford. The clockwork sub-workflow uses aspects of the variant calling workflow from https://github.com/iqbal-lab-org/clockwork, lead author Martin Hunt, Iqbal Lab at EMBL-EBI + diff --git a/config/containers.config b/config/containers.config new file mode 100644 index 0000000..e961b71 --- /dev/null +++ b/config/containers.config @@ -0,0 +1,48 @@ +params{ + container_enabled = "true" + container_enabled = "true" +} + + +process { + update_tbprofiler = "false" + + + withLabel:low_cpu {cpus = 2} + withLabel:normal_cpu { cpus = 8 } + withLabel:low_memory { memory = '5GB' } + withLabel:medium_memory { memory = '10GB' } + withLabel:high_memory { memory = '18GB' } + + withLabel:getversion { + container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8" + } + + withLabel:preprocessing { + container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8" + } + + withLabel:tbprofiler { + container = "quay.io/pathogen-genomics-cymru/tbprofiler:0.9.8" + } + + withName:downloadContamGenomes { + shell = ['/bin/bash','-u'] + errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' } + maxRetries = 5 + } + + withLabel:retryAfanc { + shell = ['/bin/bash','-u'] + errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' } + maxRetries = 5 + } + + withLabel:clockwork { + container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8" + } + + withLabel:vcfpredict { + container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8" + } + } \ No newline at end of file diff --git a/docker/Dockerfile.tbprofiler-0.9.8 b/docker/Dockerfile.tbprofiler-0.9.8 new file mode 100644 index 0000000..686c9c4 --- /dev/null +++ b/docker/Dockerfile.tbprofiler-0.9.8 @@ -0,0 +1,54 @@ +FROM mambaorg/micromamba:1.3.0 as app + +#copy the reference genome to pre-compute our index +COPY resources/tuberculosis.fasta /data/tuberculosis.fasta + +USER root +WORKDIR / + +ARG TBPROFILER_VER="5.0.1" + +# this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/ +# commits are found on https://github.com/jodyphelan/tbdb/commits/master +# this was the latest commit as of 2023-10-26 +ARG TBDB_VER="e25540b" + +# LABEL instructions tag the image with metadata that might be important to the user +LABEL base.image="micromamba:1.3.0" +LABEL dockerfile.version="1" +LABEL software="tbprofiler" +LABEL software.version="${TBPROFILER_VER}" +LABEL description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database." +LABEL website="https://github.com/jodyphelan/TBProfiler/" +LABEL license="https://github.com/jodyphelan/TBProfiler/blob/master/LICENSE" +LABEL maintainer="John Arnn" +LABEL maintainer.email="jarnn@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" + +# Install dependencies via apt-get; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install tb-profiler via bioconda; install into 'base' conda env +RUN micromamba install --yes --name base --channel conda-forge --channel bioconda \ + tb-profiler=${TBPROFILER_VER} + +RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 +RUN micromamba install --yes --name base --channel conda-forge --channel bioconda samtools +RUN micromamba install --yes --name base --channel conda-forge jq +RUN micromamba clean --all --yes + +# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time +ENV PATH="/opt/conda/bin:${PATH}" + +# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json +# can also run 'tb-profiler list_db' to find the same version info +# In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER} +RUN tb-profiler update_tbdb --commit ${TBDB_VER} + +WORKDIR /data +RUN tb-profiler update_tbdb --match_ref tuberculosis.fasta diff --git a/docker/Dockerfile.vcfpredict-0.9.8 b/docker/Dockerfile.vcfpredict-0.9.8 index 68d928e..3139c59 100644 --- a/docker/Dockerfile.vcfpredict-0.9.8 +++ b/docker/Dockerfile.vcfpredict-0.9.8 @@ -3,19 +3,16 @@ FROM ubuntu:20.04 LABEL maintainer="pricea35@cardiff.ac.uk" \ about.summary="container for the vcf predict workflow" +#add run-vcf to container +COPY bin/ /opt/bin/ +ENV PATH=/opt/bin:$PATH + ENV PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" \ PYTHON="python3 python3-pip python3-dev" ENV vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 \ -gumpy_version=1.0.15 \ -piezo_version=0.3 \ -gnomonicus_version=1.1.2 \ -tuberculosis_amr_catalogues=12d38733ad2e238729a3de9f725081e1d4872968 - -COPY bin/ /opt/bin/ -ENV PATH=/opt/bin:$PATH - +#apt updates RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \ && apt-get install -y $PACKAGES $PYTHON \ @@ -27,25 +24,4 @@ RUN apt-get update \ && pip3 install awscli \ && pip3 install . \ && cp -r data /usr/local/lib/python3.8/dist-packages \ -&& cd .. - -RUN curl -fsSL https://github.com/oxfordmmm/gumpy/archive/refs/tags/v${gumpy_version}.tar.gz | tar -xz \ -&& cd gumpy-${gumpy_version} \ -&& pip3 install . \ -&& cd .. - -RUN curl -fsSL https://github.com/oxfordmmm/piezo/archive/refs/tags/v${piezo_version}.tar.gz | tar -xz \ -&& cd piezo-${piezo_version} \ -&& pip3 install . \ -&& cd .. - -RUN curl -fsSL https://github.com/oxfordmmm/gnomonicus/archive/refs/tags/v${gnomonicus_version}.tar.gz | tar -xz \ -&& cd gnomonicus-${gnomonicus_version} \ -&& pip3 install . \ -&& cd .. - -RUN git clone https://github.com/oxfordmmm/tuberculosis_amr_catalogues.git \ -&& cd tuberculosis_amr_catalogues \ -&& git checkout ${tuberculosis_amr_catalogues} \ -&& cd .. - +&& cd .. \ No newline at end of file diff --git a/main.nf b/main.nf index 837d3ef..0cd98f2 100644 --- a/main.nf +++ b/main.nf @@ -36,24 +36,24 @@ Produces as output one directory per sample, containing the relevant reports & a Mandatory and conditional parameters: ------------------------------------------------------------------------ --input_dir Directory containing fastq OR bam files. Workflow will process one or the other, so don't mix ---filetype File type in input_dir. One of either "fastq" or "bam". fastq files can be gzipped and do not +--filetype File type in input_dir. One of either "fastq" or "bam". fastq files can be gzipped and do not have to literally take the form "*.fastq"; see --pattern --pattern Regex to match files in input_dir, e.g. "*_R{1,2}.fq.gz". Only mandatory if --filetype is "fastq" --output_dir Output directory, in which will be created subdirectories matching base name of fastq/bam files ---unmix_myco Do you want to disambiguate mixed-mycobacterial samples by read alignment? One of "yes" or "no" - If "yes" workflow will remove reads mapping to any minority mycobacterial genomes but in doing so +--unmix_myco Do you want to disambiguate mixed-mycobacterial samples by read alignment? One of "yes" or "no" + If "yes" workflow will remove reads mapping to any minority mycobacterial genomes but in doing so WILL ALMOST CERTAINLY ALSO reduce coverage of the principal species - If "no" then mixed-mycobacterial samples will be left alone. Mixtures of mycobacteria + non-mycobacteria + If "no" then mixed-mycobacterial samples will be left alone. Mixtures of mycobacteria + non-mycobacteria will still be disambiguated --kraken_db Directory containing Kraken2 database files (obtain from https://benlangmead.github.io/aws-indexes/k2) --bowtie2_index Directory containing Bowtie2 index (obtain from ftp://ftp.ccb.jhu.edu/pub/data/bowtie2_indexes/hg19_1kgmaj_bt2.zip This is the Langmead lab pre-built major-allele-SNP reference; see https://github.com/BenLangmead/bowtie-majref) --bowtie_index_name Name of the bowtie index, e.g. hg19_1kgmaj ---vcfmix Run VFCMIX "yes" or "no". Should be set to "no" for synthetic samples ---gnomonicus Run gnomon "yes" or "no" +--vcfmix Run VFCMIX "yes" or "no". Should be set to "no" for synthetic samples +--resistance_profiler Tool to profile resistance with. At the moment options are "tb-profiler" or "none" --amr_cat Path to the AMR catalogue (https://github.com/oxfordmmm/tuberculosis_amr_catalogues is at /tuberculosis_amr_catalogues in the vcfpredict container) ---afanc_myco_db Path to the Afanc database used for speciation. Obtain from https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_3.0.tar.gz +--afanc_myco_db Path to the Afanc database used for speciation. Obtain from https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_3.0.tar.gz Optional parameters: ------------------------------------------------------------------------ @@ -63,17 +63,17 @@ Optional parameters: default: null using this parameter will apply an additional sanity test to your sample - if you DO NOT use this parameter (default option), pipeline will determine principal species from + if you DO NOT use this parameter (default option), pipeline will determine principal species from the reads and consider any other species a contaminant - if you DO use this parameter, pipeline will expect this to be the principal species. It will fail - the sample if reads from this species are not actually the majority + If you DO use this parameter, pipeline will expect this to be the principal species. It will fail + the sample if reads from this species are not actually the majority Profiles: ------------------------------------------------------------------------ singularity to run with singularity -docker to run with docker +docker to run with docker Examples: @@ -86,6 +86,21 @@ nextflow run main.nf -profile docker --filetype bam --input_dir bam_dir --unmix_ } +resistance_profilers = ["tb-profiler", "none"] + +if(!resistance_profilers.contains(params.resistance_profiler)){ + exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler" or "none" to skip.' + } + +//tbprofiler container already has the reference genome in the DB, so skip if using docker +if((params.resistance_profiler == "tb-profiler") && (params.container_enabled == true)) { + update_tbprofiler = true +} else { + update_tbprofiler = false +} + +resistance_profiler = params.resistance_profiler + // confirm that mandatory parameters have been set and that the conditional parameter, --pattern, has been used appropriately if ( params.input_dir == "" ) { exit 1, "error: --input_dir is mandatory (run with --help to see parameters)" @@ -118,18 +133,17 @@ M Y C O B A C T E R I A L P I P E L I N E Parameters used: ------------------------------------------------------------------------ ---input_dir ${params.input_dir} ---filetype ${params.filetype} ---pattern ${params.pattern} ---output_dir ${params.output_dir} ---unmix_myco ${params.unmix_myco} ---kraken_db ${params.kraken_db} +--input_dir ${params.input_dir} +--filetype ${params.filetype} +--pattern ${params.pattern} +--output_dir ${params.output_dir} +--unmix_myco ${params.unmix_myco} +--kraken_db ${params.kraken_db} --bowtie2_index ${params.bowtie2_index} --bowtie_index_name ${params.bowtie_index_name} ---species ${params.species} ---vcfmix ${params.vcfmix} ---gnomonicus ${params.gnomonicus} ---amr_cat ${params.amr_cat} +--resistance_profiler ${params.resistance_profiler} +--species ${params.species} +--vcfmix ${params.vcfmix} --afanc_myco_db ${params.afanc_myco_db} Runtime data: @@ -198,9 +212,10 @@ workflow { mpileup_vcf = clockwork.out.mpileup_vcf minos_vcf = clockwork.out.minos_vcf - genbank = channel.fromPath(params.gnomonicus_genbank) + reference = clockwork.out.reference + bam = clockwork.out.bam - vcfpredict(mpileup_vcf, minos_vcf, genbank) + vcfpredict(bam, mpileup_vcf, minos_vcf, reference) } diff --git a/modules/clockworkModules.nf b/modules/clockworkModules.nf index 4a2675d..0bea703 100644 --- a/modules/clockworkModules.nf +++ b/modules/clockworkModules.nf @@ -47,7 +47,7 @@ process alignToRef { doWeAlign =~ /NOW\_ALIGN\_TO\_REF\_${sample_name}/ output: - tuple val(sample_name), path("${sample_name}_report.json"), path("${sample_name}.bam"), path("${sample_name}.fa"), stdout, emit: alignToRef_bam + tuple val(sample_name), path("${sample_name}_report.json"), path("${sample_name}.bam"), path(reference_path), stdout, emit: alignToRef_bam path("${sample_name}.bam.bai", emit: alignToRef_bai) path("${sample_name}_alignmentStats.json", emit: alignToRef_json) path "${sample_name}_err.json", emit: alignToRef_log optional true @@ -63,9 +63,8 @@ process alignToRef { """ echo $reference_path - cp ${reference_path} ${sample_name}.fa - minimap2 -ax sr ${sample_name}.fa -t ${task.cpus} $fq1 $fq2 | samtools fixmate -m - - | samtools sort -T tmp - | samtools markdup --reference ${sample_name}.fa - minimap.bam + minimap2 -ax sr $reference_path -t ${task.cpus} $fq1 $fq2 | samtools fixmate -m - - | samtools sort -T tmp - | samtools markdup --reference $reference_path - minimap.bam java -jar /usr/local/bin/picard.jar AddOrReplaceReadGroups INPUT=minimap.bam OUTPUT=${bam} RGID=${sample_name} RGLB=lib RGPL=Illumina RGPU=unit RGSM=sample @@ -206,7 +205,7 @@ process callVarsCortex { process minos { /** - * @QCcheckpoint check if top species is TB, if yes pass vcf to gnomonicus + * @QCcheckpoint check if top species is TB, if yes pass vcf to resistance profiling */ tag { sample_name } @@ -241,7 +240,7 @@ process minos { cp ${sample_name}_report.json ${sample_name}_report_previous.json - if [[ \$top_hit =~ ^"Mycobacterium tuberculosis" ]]; then printf "CREATE_ANTIBIOGRAM_${sample_name}"; else echo '{"gnomonicus-warning":"sample is not TB so cannot produce antibiogram using gnomonicus"}' | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi + if [[ \$top_hit =~ ^"Mycobacterium tuberculosis" ]]; then printf "CREATE_ANTIBIOGRAM_${sample_name}"; else echo '{"resistance-profiling-warning":"sample is not TB so cannot produce antibiogram using resistance profiling tools"}' | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi """ stub: @@ -296,7 +295,7 @@ process gvcf { cp ${sample_name}_report.json ${sample_name}_report_previous.json - if [ ${params.vcfmix} == "no" ] && [ ${params.gnomonicus} == "no" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi + if [ ${params.vcfmix} == "no" ] && [ ${params.resistance_profiler} == "none" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi """ stub: diff --git a/modules/preprocessingModules.nf b/modules/preprocessingModules.nf index 9d7177a..b59d0cc 100644 --- a/modules/preprocessingModules.nf +++ b/modules/preprocessingModules.nf @@ -337,7 +337,7 @@ process afanc { tag { sample_name } label 'preprocessing' label 'normal_cpu' - label 'medium_memory' + label 'high_memory' label 'retry_afanc' publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP", mode: 'copy', pattern: '*_afanc_report.json' @@ -398,6 +398,7 @@ process mykrobe { label 'medium_memory' publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP", mode: 'copy', pattern: '*_mykrobe_report.json' + publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP", mode: 'copy', pattern: '*_mykrobe_report.csv' input: tuple val(sample_name), path(fq1), path(fq2), val(run_mykrobe), path(software_json) @@ -410,10 +411,10 @@ process mykrobe { tuple val(sample_name), path(fq1), path(fq2), stdout, emit: mykrobe_fqs script: - mykrobe_report = "${sample_name}_mykrobe_report.json" + mykrobe_report = "${sample_name}_mykrobe_report" """ - mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json --output ${mykrobe_report} -1 $fq1 $fq2 + mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json_and_csv --output ${mykrobe_report} -1 $fq1 $fq2 printf ${sample_name} """ @@ -421,7 +422,7 @@ process mykrobe { mykrobe_report = "${sample_name}_mykrobe_report.json" """ - touch ${mykrobe_report} + touch ${mykrobe_report}.json printf ${sample_name} """ } @@ -434,7 +435,7 @@ process bowtie2 { tag { sample_name } label 'preprocessing' label 'normal_cpu' - label 'low_memory' + label 'medium_memory' publishDir "${params.output_dir}/$sample_name/output_reads", mode: 'copy', pattern: '*.fq.gz', overwrite: 'true' @@ -733,6 +734,7 @@ process reMykrobe { label 'low_memory' publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP_and_postContamRemoval", mode: 'copy', pattern: '*_mykrobe_report.json' + publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP_and_postContamRemoval", mode: 'copy', pattern: '*_mykrobe_report.csv' input: tuple val(sample_name), path(fq1), path(fq2), path(software_json) @@ -741,17 +743,17 @@ process reMykrobe { tuple val(sample_name), path("${sample_name}_mykrobe_report.json"), emit: reMykrobe_report script: - mykrobe_report = "${sample_name}_mykrobe_report.json" + mykrobe_report = "${sample_name}_mykrobe_report" """ - mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json --output ${mykrobe_report} -1 $fq1 $fq2 + mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json_and_csv --output ${mykrobe_report} -1 $fq1 $fq2 """ stub: mykrobe_report = "${sample_name}_mykrobe_report.json" """ - touch ${mykrobe_report} + touch ${mykrobe_report}.json """ } diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf index cee38b6..042b403 100644 --- a/modules/vcfpredictModules.nf +++ b/modules/vcfpredictModules.nf @@ -33,7 +33,7 @@ process vcfmix { jq -s ".[0] * .[1]" ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json} - if [ ${params.gnomonicus} == "no" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}; fi + if [ ${params.resistance_profiler} == "none" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}; fi """ stub: @@ -48,6 +48,84 @@ process vcfmix { """ } +process tbprofiler_update_db { + label 'low_memory' + label 'low_cpu' + label 'tbprofiler' + + input: + path(reference) + + script: + """ + tb-profiler update_tbdb --match_ref $reference + """ +} + +process tbprofiler { + label 'medium_memory' + label 'medium_cpu' + label 'tbprofiler' + + publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.tbprofiler-out.json', overwrite: 'true' + publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}' + + input: + val(sample_name) + path(minos_vcf) + path(report_json) + val(isSampleTB) + + output: + tuple val(sample_name), path("${sample_name}.tbprofiler-out.json"), path("${sample_name}_report.json"), emit: tbprofiler_json + + when: + isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/ + + script: + error_log = "${sample_name}_err.json" + tbprofiler_json = "${sample_name}.tbprofiler-out.json" + + """ + bgzip ${minos_vcf} + tb-profiler profile --vcf ${minos_vcf}.gz --threads ${task.cpus} + mv results/tbprofiler.results.json ${tbprofiler_json} + + cp ${sample_name}_report.json ${sample_name}_report_previous.json + + echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} + + jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${tbprofiler_json} > ${report_json} + """ +} + +process add_allelic_depth { + label 'low_memory' + label 'low_cpu' + label 'tbprofiler' + + input: + val(sample_name) + path(minos_vcf) + path(bam) + path(reference) + val(isSampleTB) + + output: + path("${sample_name}_allelic_depth.minos.vcf") + + when: + isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/ + + script: + """ + samtools faidx $reference + samtools dict $reference -o ${reference.baseName}.dict + gatk VariantAnnotator -R $reference -I $bam -V $minos_vcf -A DepthPerAlleleBySample -O ${sample_name}_allelic_depth.minos.vcf + """ + +} + process gnomonicus { tag {sample_name} diff --git a/nextflow.config b/nextflow.config index 21122da..43a0d71 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,13 +1,3 @@ -// config for lodestone - -manifest { - name = "pathogen-genomics-cymru/lodestone" -} - - -trace.overwrite = true -report.overwrite = true - params { // help message @@ -43,13 +33,9 @@ params { // run VCFMIX 'yes' or 'no' (set to no for synthetic samples) vcfmix = 'yes' - - // run gnomonicus 'yes' or 'no' - gnomonicus = 'yes' - - // path to AMR catalogue for gnomon - // https://github.com/oxfordmmm/tuberculosis_amr_catalogues available at path /tuberculosis_amr_catalogues in container - amr_cat = "/tuberculosis_amr_catalogues/catalogues/NC_000962.3/NC_000962.3_WHO-UCN-GTB-PCI-2021.7_v1.0_GARC1_RUS.csv" + + resistance_profiler = "tb-profiler" + update_tbprofiler = "true" // path to singularity recipes directory (needed to strip software versions in getversion) sing_dir = "${baseDir}/singularity" @@ -63,231 +49,76 @@ params { //path to resources directory resource_dir = "${baseDir}/resources" refseq = "${resource_dir}/assembly_summary_refseq.txt" - gnomonicus_genbank = "${resource_dir}/H37rV_v3.gbk" - + container_enabled = "false" } profiles { - climb { - - //this is pre-defined in the CLIMB nextflow.config; however it has been added to allow - //-profile climb to still work outside of CLIMB system (e.g. to access S3 buckets) - aws { - profile = "climb" - client { - endpoint = 'https://s3.climb.ac.uk' - s3PathStyleAccess = true - } - } - - docker.enabled = true - fixOwnership = true - runOptions = "-u \$(id -u):\$(id -g)" - - // define containers for each process - process { - k8s { - pullPolicy = "always" - } - withLabel:low_cpu {cpus = 2} - withLabel:normal_cpu { cpus = 8 } - withLabel:low_memory { memory = '5GB' } - withLabel:medium_memory { memory = '10GB' } - withLabel:high_memory { memory = '18GB' } - - withLabel:preprocessing { - container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7r9" - } + climb { + includeConfig 'config/containers.config' + + //add in docker configs as the above config file is generic for any containerised run + docker.enabled = true + fixOwnership = true + runOptions = "-u \$(id -u):\$(id -g)" + + withLabel:getversion{ + executor = "local" + container = null + } - withLabel:getversion{ - executor = "local" - } - - withLabel:afanc_parse{ - executor = "local" - } - withName:downloadContamGenomes { - shell = ['/bin/bash','-u'] - errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' } - maxRetries = 5 - } - - withLabel:retry_afanc { - shell = ['/bin/bash','-u'] - errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' } - maxRetries = 5 - } - - withLabel:clockwork { - container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.7r3" - } - - withLabel:vcfpredict { - container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.7r3" - } - } - params{ - bowtie2_index = "s3://microbial-bioin-sp3/bowtie_hg19" - bowtie_index_name = "hg19_1kgmaj" - kraken_db = "s3://microbial-bioin-sp3/kraken_pluspf_16gb/" - afanc_myco_db = "s3://microbial-bioin-sp3/Mycobacteriaciae_DB_7.0/" - - resource_dir = "s3://microbial-bioin-sp3/lodestone_resources" - refseq = "${resource_dir}/assembly_summary_refseq.txt" - gnomonicus_genbank = "${resource_dir}/H37rV_v3.gbk" + withLabel:afanc_parse{ + executor = "local" + container = null + } + + k8s { + computeResourceType = 'Job' + } + + //params specific to paths on the climb system + params{ + bowtie2_index = "s3://microbial-bioin-sp3/bowtie_hg19" + bowtie_index_name = "hg19_1kgmaj" + kraken_db = "s3://microbial-bioin-sp3/kraken_pluspf_16gb/" + afanc_myco_db = "s3://microbial-bioin-sp3/Mycobacteriaciae_DB_7.0/" + resource_dir = "s3://microbial-bioin-sp3/lodestone_resources" + refseq = "${resource_dir}/assembly_summary_refseq.txt" } - } - singularity { - - params{ - resource_dir = "/resources" } + + singularity { + includeConfig 'config/containers.config' - singularity.enabled = 'true' - singularity.autoMounts = 'true' - - // path to the singularity containers - singularity.cacheDir = "${baseDir}/singularity" - - process { - withLabel:low_cpu {cpus = 2} - withLabel:normal_cpu { cpus = 8 } - - withLabel:low_memory { memory = '5GB' } - withLabel:medium_memory { memory = '10GB' } - withLabel:high_memory { memory = '18GB' } - - withLabel:getversion { - container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7" - } - - withLabel:preprocessing { - container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7" - } - - withName:downloadContamGenomes { - shell = ['/bin/bash','-u'] - errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' } - maxRetries = 5 - } - - withLabel:retryAfanc { - shell = ['/bin/bash','-u'] - // Afanc sometimes fails curl in slurm, retry if so (error is masked as error status 1) - errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' } - maxRetries = 5 - } - - withLabel:clockwork { - container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.7" - } - - withLabel:vcfpredict { - container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.7" - } + singularity.enabled = 'true' + singularity.autoMounts = 'true' + //path to the singularity containers + singularity.cacheDir = "${baseDir}/singularity" } - } sp3 { - + + includeConfig 'config/containers.config' + + //add in singularity configs as the above config file is generic for any containerised run singularity.enabled = 'true' singularity.autoMounts = 'true' - // path to the singularity containers singularity.cacheDir = "/data/images" - - params{ - resource_dir = "/resources" - } process { - scratch = true - errorStrategy = 'ignore' - - withLabel:low_cpu {cpus = 2} - withLabel:normal_cpu { cpus = 8 } - withLabel:low_memory { memory = '5GB' } - withLabel:medium_memory { memory = '10GB' } - withLabel:high_memory { memory = '18GB' } - - withLabel:getversion { - container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7" - } - - withLabel:preprocessing { - container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7" - } - - withName:downloadContamGenomes { - shell = ['/bin/bash','-u'] - errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' } - maxRetries = 5 - } - - withLabel:retryAfanc { - shell = ['/bin/bash','-u'] - errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' } - maxRetries = 5 - } - - withLabel:clockwork { - container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.7" - } - - withLabel:vcfpredict { - container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.7" - } + scratch = true } - } + } docker { - + includeConfig 'config/containers.config' + + //add in docker configs as the above config file is generic for any containerised run docker.enabled = true fixOwnership = true runOptions = "-u \$(id -u):\$(id -g)" - - params{ - resource_dir = "/resources" - } - - // define containers for each process - process { - withLabel:low_cpu {cpus = 2} - withLabel:normal_cpu { cpus = 8 } - withLabel:low_memory { memory = '5GB' } - withLabel:medium_memory { memory = '10GB' } - withLabel:high_memory { memory = '18GB' } - - withLabel:getversion { - container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7" - } - - withLabel:preprocessing { - container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7" - } - - withName:downloadContamGenomes { - shell = ['/bin/bash','-u'] - errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' } - maxRetries = 5 - } - - withLabel:retryAfanc { - shell = ['/bin/bash','-u'] - errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' } - maxRetries = 5 - } - - withLabel:clockwork { - container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.7" - } - - withLabel:vcfpredict { - container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.7" - } - } } } diff --git a/singularity/Singularity.clockwork-0.9.7 b/singularity/Singularity.clockwork-0.9.8 similarity index 97% rename from singularity/Singularity.clockwork-0.9.7 rename to singularity/Singularity.clockwork-0.9.8 index f3f3c24..0e13714 100644 --- a/singularity/Singularity.clockwork-0.9.7 +++ b/singularity/Singularity.clockwork-0.9.8 @@ -2,6 +2,8 @@ Bootstrap: docker From: debian:buster Stage: spython-base +%files +bin/ /opt/bin/ %labels maintainer="pricea35@cardiff.ac.uk" about.summary="container for the clockwork workflow" @@ -26,6 +28,9 @@ clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" PYTHON="python2.7 python-dev" +PATH=/opt/bin:$PATH + + apt-get update \ && apt-get install -y $PACKAGES $PYTHON \ && curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \ @@ -36,7 +41,7 @@ apt-get update \ && ln -s /usr/local/bin/python3.6 /usr/local/bin/python3 \ && ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \ && pip3 install --upgrade pip \ -&& pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools \ +&& pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools awscli \ && wget -qO - https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | apt-key add - \ && add-apt-repository --yes https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/ \ && apt-get update && apt-get install -y adoptopenjdk-8-hotspot @@ -136,6 +141,7 @@ export python_version=3.6.5 export clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 export PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" export PYTHON="python2.7 python-dev" +export PATH=/opt/bin:$PATH export CLOCKWORK_CORTEX_DIR=/cortex export PATH=${PATH}:/clockwork/python/scripts export PICARD_JAR=/usr/local/bin/picard.jar @@ -145,4 +151,4 @@ export LANGUAGE=en_US.UTF-8 %runscript exec /bin/bash "$@" %startscript -exec /bin/bash "$@" \ No newline at end of file +exec /bin/bash "$@" diff --git a/singularity/Singularity.preprocessing-0.9.7 b/singularity/Singularity.preprocessing-0.9.8 similarity index 95% rename from singularity/Singularity.preprocessing-0.9.7 rename to singularity/Singularity.preprocessing-0.9.8 index 7ca3b35..a164d85 100644 --- a/singularity/Singularity.preprocessing-0.9.7 +++ b/singularity/Singularity.preprocessing-0.9.8 @@ -2,6 +2,8 @@ Bootstrap: docker From: ubuntu:focal Stage: spython-base +%files +bin/ /opt/bin/ %labels maintainer="pricea35@cardiff.ac.uk" about.summary="container for the preprocessing workflow" @@ -25,13 +27,15 @@ fastani_version=1.33 PACKAGES="procps curl git wget build-essential zlib1g-dev libncurses-dev libz-dev libbz2-dev liblzma-dev libcurl4-openssl-dev libgsl-dev rsync unzip ncbi-blast+ pigz jq libtbb-dev openjdk-11-jre-headless autoconf r-base-core locales locales-all" PYTHON="python3 python3-pip python3-dev" -PYTHON_PACKAGES="biopython" +PYTHON_PACKAGES="biopython awscli boto3" PATH=${PATH}:/usr/local/bin/mccortex/bin:/usr/local/bin/bwa-${bwa_version}:/opt/edirect LD_LIBRARY_PATH=/usr/local/lib export DEBIAN_FRONTEND="noninteractive" +PATH=/opt/bin:$PATH + apt-get update \ && DEBIAN_FRONTEND="noninteractive" apt-get install -y $PACKAGES $PYTHON \ && pip3 install --upgrade pip \ @@ -82,7 +86,7 @@ curl -fsSL https://github.com/OpenGene/fastp/archive/v${fastp_version}.tar.gz | && cd .. \ && rm -r fastp-${fastp_version} -wget http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v${fastqc_version}.zip \ +wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v${fastqc_version}.zip \ && unzip fastqc_v${fastqc_version}.zip \ && chmod +x FastQC/fastqc \ && mv FastQC/* /usr/local/bin \ @@ -102,10 +106,9 @@ curl -fsSL https://github.com/ArthurVM/Afanc/archive/refs/tags/v${afanc_version} && mv mash-Linux64-v${mash_version}/mash /usr/local/bin \ && rm -r mash-Linux* \ && wget https://github.com/ParBLiSS/FastANI/releases/download/v${fastani_version}/fastANI-Linux64-v${fastani_version}.zip \ -&& unzip fastANI-Linux64-v${fastani_version}.zip \ +&& unzip fastANI-Linux64-v${fastani_version}.zip \ && mv fastANI /usr/local/bin - sh -c "$(curl -fsSL ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/install-edirect.sh)" \ && mkdir -p /opt/edirect \ && mv /root/edirect/* /opt/edirect @@ -149,9 +152,10 @@ export mash_version=2.3 export fastani_version=1.33 export PACKAGES="procps curl git wget build-essential zlib1g-dev libncurses-dev libz-dev libbz2-dev liblzma-dev libcurl4-openssl-dev libgsl-dev rsync unzip ncbi-blast+ pigz jq libtbb-dev openjdk-11-jre-headless autoconf r-base-core locales locales-all" export PYTHON="python3 python3-pip python3-dev" -export PYTHON_PACKAGES="biopython" +export PYTHON_PACKAGES="biopython awscli boto3" export PATH=${PATH}:/usr/local/bin/mccortex/bin:/usr/local/bin/bwa-${bwa_version}:/opt/edirect export LD_LIBRARY_PATH=/usr/local/lib +export PATH=/opt/bin:$PATH export LC_ALL=en_US.UTF-8 export LANG=en_US.UTF-8 export LANGUAGE=en_US.UTF-8 diff --git a/singularity/Singularity.tbprofiler-0.9.8 b/singularity/Singularity.tbprofiler-0.9.8 new file mode 100644 index 0000000..33be3bd --- /dev/null +++ b/singularity/Singularity.tbprofiler-0.9.8 @@ -0,0 +1,70 @@ +Bootstrap: docker +From: mambaorg/micromamba:1.3.0 +Stage: app + +%files +resources/tuberculosis.fasta /data/tuberculosis.fasta +%labels +base.image="micromamba:1.3.0" +dockerfile.version="1" +software="tbprofiler" +software.version="${TBPROFILER_VER}" +description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database." +website="https://github.com/jodyphelan/TBProfiler/" +license="https://github.com/jodyphelan/TBProfiler/blob/master/LICENSE" +maintainer="John Arnn" +maintainer.email="jarnn@utah.gov" +maintainer2="Curtis Kapsak" +maintainer2.email="kapsakcj@gmail.com" +%post + +#copy the reference genome to pre-compute our index + +su - root # USER root +mkdir -p / +cd / + +TBPROFILER_VER="5.0.1" + +# this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/ +# commits are found on https://github.com/jodyphelan/tbdb/commits/master +# this was the latest commit as of 2023-10-26 +TBDB_VER="e25540b" + +# LABEL instructions tag the image with metadata that might be important to the user + +# Install dependencies via apt-get; cleanup apt garbage +apt-get update && apt-get install -y --no-install-recommends \ +wget \ +ca-certificates \ +procps && \ +apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install tb-profiler via bioconda; install into 'base' conda env +micromamba install --yes --name base --channel conda-forge --channel bioconda \ +tb-profiler=${TBPROFILER_VER} + +micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 +micromamba install --yes --name base --channel conda-forge --channel bioconda samtools +micromamba install --yes --name base --channel conda-forge jq +micromamba clean --all --yes + +# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time +PATH="/opt/conda/bin:${PATH}" + +# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json +# can also run 'tb-profiler list_db' to find the same version info +# In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER} +tb-profiler update_tbdb --commit ${TBDB_VER} + +mkdir -p /data +cd /data +tb-profiler update_tbdb --match_ref tuberculosis.fasta +%environment +export PATH="/opt/conda/bin:${PATH}" +%runscript +cd /data +exec /bin/bash "$@" +%startscript +cd /data +exec /bin/bash "$@" diff --git a/singularity/Singularity.vcfpredict-0.9.7 b/singularity/Singularity.vcfpredict-0.9.8 similarity index 51% rename from singularity/Singularity.vcfpredict-0.9.7 rename to singularity/Singularity.vcfpredict-0.9.8 index ff29506..0146e7d 100644 --- a/singularity/Singularity.vcfpredict-0.9.7 +++ b/singularity/Singularity.vcfpredict-0.9.8 @@ -2,22 +2,22 @@ Bootstrap: docker From: ubuntu:20.04 Stage: spython-base +%files +bin/ /opt/bin/ %labels maintainer="pricea35@cardiff.ac.uk" about.summary="container for the vcf predict workflow" %post +#add run-vcf to container +PATH=/opt/bin:$PATH PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" PYTHON="python3 python3-pip python3-dev" vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 -gumpy_version=1.0.15 -piezo_version=0.3 -gnomonicus_version=1.1.2 -tuberculosis_amr_catalogues=12d38733ad2e238729a3de9f725081e1d4872968 - +#apt updates apt-get update \ && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \ && apt-get install -y $PACKAGES $PYTHON \ @@ -26,38 +26,15 @@ apt-get update \ && cd VCFMIX \ && git checkout ${vcfmix_version} \ && pip3 install recursive_diff \ +&& pip3 install awscli \ && pip3 install . \ && cp -r data /usr/local/lib/python3.8/dist-packages \ && cd .. - -curl -fsSL https://github.com/oxfordmmm/gumpy/archive/refs/tags/v${gumpy_version}.tar.gz | tar -xz \ -&& cd gumpy-${gumpy_version} \ -&& pip3 install . \ -&& cd .. - -curl -fsSL https://github.com/oxfordmmm/piezo/archive/refs/tags/v${piezo_version}.tar.gz | tar -xz \ -&& cd piezo-${piezo_version} \ -&& pip3 install . \ -&& cd .. - -curl -fsSL https://github.com/oxfordmmm/gnomonicus/archive/refs/tags/v${gnomonicus_version}.tar.gz | tar -xz \ -&& cd gnomonicus-${gnomonicus_version} \ -&& pip3 install . \ -&& cd .. - -git clone https://github.com/oxfordmmm/tuberculosis_amr_catalogues.git \ -&& cd tuberculosis_amr_catalogues \ -&& git checkout ${tuberculosis_amr_catalogues} \ -&& cd .. - %environment +export PATH=/opt/bin:$PATH export PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" export PYTHON="python3 python3-pip python3-dev" export vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 -export gumpy_version=1.0.15 -export piezo_version=0.3 -export gnomonicus_version=1.1.2 -export tuberculosis_amr_catalogues=12d38733ad2e238729a3de9f725081e1d4872968 %runscript exec /bin/bash "$@" %startscript diff --git a/workflows/clockwork.nf b/workflows/clockwork.nf index 3ffbaa0..148f523 100644 --- a/workflows/clockwork.nf +++ b/workflows/clockwork.nf @@ -39,5 +39,7 @@ workflow clockwork { mpileup_vcf = callVarsMpileup.out.mpileup_vcf.join(minos.out.minos_report, by: 0) minos_vcf = minos.out.minos_vcf.join(alignToRef.out.alignToRef_report, by: 0) + reference = getRefFromJSON.out + bam = alignToRef.out.alignToRef_bam } diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf index 5097dad..fbb19aa 100644 --- a/workflows/preprocessing.nf +++ b/workflows/preprocessing.nf @@ -66,7 +66,6 @@ workflow preprocessing { bowtie2(kraken2.out.kraken2_fqs, bowtie_dir.toList()) identifyBacterialContaminants(bowtie2.out.bowtie2_fqs.join(speciation_report, by: 0).join(kraken2.out.kraken2_json, by: 0), resource_dir, refseq_path) - identifyBacterialContaminants.out.prev_sample_json.view() downloadContamGenomes(identifyBacterialContaminants.out.contam_list) diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf index 9efc651..8fec00f 100644 --- a/workflows/vcfpredict.nf +++ b/workflows/vcfpredict.nf @@ -3,36 +3,47 @@ nextflow.enable.dsl = 2 // import modules include {vcfmix} from '../modules/vcfpredictModules.nf' params(params) -include {gnomonicus} from '../modules/vcfpredictModules.nf' params(params) -include {finalJson} from '../modules/vcfpredictModules.nf' params(params) +include {tbprofiler} from '../modules/vcfpredictModules.nf' params(params) +include {tbprofiler_update_db} from '../modules/vcfpredictModules.nf' params(params) +include {add_allelic_depth} from '../modules/vcfpredictModules.nf' params(params) +include {finalJson} from '../modules/vcfpredictModules.nf' params(params) // define workflow component workflow vcfpredict { take: - - clockwork_bcftools - clockwork_minos - genbank + clockwork_bam + clockwork_bcftools_tuple + minos_vcf_tuple + reference_fasta + main: if ( params.vcfmix == "yes" ) { - vcfmix(clockwork_bcftools) + vcfmix(clockwork_bcftools_tuple) } - if ( params.gnomonicus == "yes" ) { - - gnomonicus(clockwork_minos, genbank) - + if ( params.resistance_profiler == "tb-profiler"){ + //get just the vcf + sample_name = minos_vcf_tuple.map{it[0]} + minos_vcf = minos_vcf_tuple.map{it[1]} + do_we_resistance_profile = minos_vcf_tuple.map{it[2]} + report_json = minos_vcf_tuple.map{it[3]} + bam = clockwork_bam.map{it[2]} + + if (params.update_tbprofiler == "yes"){ + tbprofiler_update_db(reference_fasta) + } + + //add allelic depth back in: was calculated in mpileup but lost in minos + add_allelic_depth(sample_name, minos_vcf, bam, reference_fasta, do_we_resistance_profile) + tbprofiler(sample_name, add_allelic_depth.out, report_json, do_we_resistance_profile) } - - if ( (params.vcfmix == "yes") && (params.gnomonicus == "yes") ) { - - finalJson(vcfmix.out.vcfmix_json.join(gnomonicus.out.gnomon_json, by: 0)) - + + if (params.vcfmix == "yes" && params.resistance_profiler != "none"){ + finalJson(vcfmix.out.vcfmix_json.join(tbprofiler.out.tbprofiler_json, by: 0)) } - }