diff --git a/.github/workflows/build-push-quay.yml b/.github/workflows/build-push-quay.yml
index 9043ae6..a1c2b72 100644
--- a/.github/workflows/build-push-quay.yml
+++ b/.github/workflows/build-push-quay.yml
@@ -2,12 +2,11 @@ name: build-push-quay
on:
push:
branches:
- - v0.9.6
- - 0.9.7-dev
- - climb
+ - main
paths:
- '**/Dockerfile*'
- "bin/"
+ - "resources/"
workflow_dispatch:
@@ -46,6 +45,7 @@ jobs:
- name: Copy folders to docker
run: |
cp -r bin docker/bin
+ cp -r resources docker/resources
- name: Get image name
id: image_name
diff --git a/README.md b/README.md
index 5c44791..a076e2a 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ Pipeline cleans and QCs reads with fastp and FastQC, classifies with Kraken2 & A
Note that while Mykrobe is included within this pipeline, it runs as an independent process and is not used for any downstream reporting.
-**WARNING**: There are currently known errors with vcfmix and gnomonicus, as such `errorStrategy 'ignore'` has been added to the processes vcfpredict:vcfmix and vcfpredict:gnomonicus to stop the pipeline from crashing. Please check the stdout from nextflow to see whether these processes have ran successfully.
+**WARNING**: There are currently known errors with vcfmix, as such `errorStrategy 'ignore'` has been added to the processes vcfpredict:vcfmix to stop the pipeline from crashing. Please check the stdout from nextflow to see whether these processes have ran successfully.
## Quick Start ##
This is a Nextflow DSL2 pipeline, it requires a version of Nextflow that supports DSL2 and the stub-run feature. It is recommended to run the pipeline with `NXF_VER=20.11.0-edge`, as the pipeline has been tested using this version. E.g. to download
@@ -29,6 +29,8 @@ NXF_VER=20.11.0-edge nextflow run main.nf -profile docker --filetype bam --input
--output_dir . --kraken_db /path/to/database --bowtie2_index /path/to/index --bowtie_index_name hg19_1kgmaj
```
+There is also a pre-configured climb profile to run Lodestone on a CLIMB Jupyter Notebook Server. Add ```-profile climb``` to your command invocation. The input directory can point to an S3 bucket natively (e.g. ```--input_dir s3://my-team/bucket```). By default this will run the workflow in Docker containers and take advantage of kubernetes pods. The Kraken2, Bowtie2 and Afanc databases will by default point to the ```pluspf16```, ```hg19_1kgmaj_bt2``` and ```Mycobacteriaciae_DB_7.0``` directories by default. These are mounted on a public S3 bucket hosted on CLIMB.
+
### Executors ###
By default, the pipeline will just run on the local machine. To run on a cluster, modifications will have to be made to the `nextflow.config` to add in the executor. E.g. for a SLURM cluster add `process.executor = 'slurm'`. For more information on executor options see the Nextflow docs: https://www.nextflow.io/docs/latest/executor.html
@@ -63,10 +65,8 @@ Directory containing Bowtie2 index (obtain from ftp://ftp.ccb.jhu.edu/pub/data/b
Name of the bowtie index, e.g. hg19_1kgmaj
* **vcfmix**
Run [vcfmix](https://github.com/AlexOrlek/VCFMIX), yes or no. Set to no for synthetic samples
-* **gnomonicus**
-Run [gnomonicus](https://github.com/oxfordmmm/gnomonicus), yes or no
-* **amr_cat**
-Path to AMR catalogue for gnomonicus
+* **resistance_profiler**
+Run resistance profiling for Mycobacterium tubercuclosis. Either ["tb-profiler"](https://tbdr.lshtm.ac.uk/) or "none".
* **afanc_myco_db**
Path to the [afanc](https://github.com/ArthurVM/Afanc) database used for speciation. Obtain from https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_7.0.tar.gz
@@ -125,12 +125,10 @@ process clockwork:alignToRef\
25. (Fail) If < 50% of the reference genome was covered at 10-fold depth
process clockwork:minos\
-26. (Warn) If sample is not TB, then it is not passed to gnomonicus
-
-## Running on CLIMB Jupyter Hub
-There is a pre-configured climb profile to run Lodestone on a CLIMB Jupyter Notebook Server. Add ```profile climb``` to your command invocation. The input directory can point to an S3 bucket natively (e.g. ```--input_dir s3://my-team/bucket```). By default this will run the workflow in Docker containers and take advantage of kubernetes pods. The Kraken2, Bowtie2 and Afanc databases will by default point to the ```pluspf16```, ```hg19_1kgmaj_bt2``` and ```Mycobacteriaciae_DB_7.0``` respectively. These are mounted on a public shared volume.
+26. (Warn) If sample is not TB, then it is not passed to a resistance profiler
## Acknowledgements ##
For a list of direct authors of this pipeline, please see the contributors list. All of the software dependencies of this pipeline are recorded in the version.json
The preprocessing sub-workflow is based on the preprocessing nextflow DSL1 pipeline written by Stephen Bush, University of Oxford. The clockwork sub-workflow uses aspects of the variant calling workflow from https://github.com/iqbal-lab-org/clockwork, lead author Martin Hunt, Iqbal Lab at EMBL-EBI
+
diff --git a/config/containers.config b/config/containers.config
new file mode 100644
index 0000000..e961b71
--- /dev/null
+++ b/config/containers.config
@@ -0,0 +1,48 @@
+params{
+ container_enabled = "true"
+ container_enabled = "true"
+}
+
+
+process {
+ update_tbprofiler = "false"
+
+
+ withLabel:low_cpu {cpus = 2}
+ withLabel:normal_cpu { cpus = 8 }
+ withLabel:low_memory { memory = '5GB' }
+ withLabel:medium_memory { memory = '10GB' }
+ withLabel:high_memory { memory = '18GB' }
+
+ withLabel:getversion {
+ container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
+ }
+
+ withLabel:preprocessing {
+ container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
+ }
+
+ withLabel:tbprofiler {
+ container = "quay.io/pathogen-genomics-cymru/tbprofiler:0.9.8"
+ }
+
+ withName:downloadContamGenomes {
+ shell = ['/bin/bash','-u']
+ errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
+ maxRetries = 5
+ }
+
+ withLabel:retryAfanc {
+ shell = ['/bin/bash','-u']
+ errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' }
+ maxRetries = 5
+ }
+
+ withLabel:clockwork {
+ container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8"
+ }
+
+ withLabel:vcfpredict {
+ container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8"
+ }
+ }
\ No newline at end of file
diff --git a/docker/Dockerfile.tbprofiler-0.9.8 b/docker/Dockerfile.tbprofiler-0.9.8
new file mode 100644
index 0000000..686c9c4
--- /dev/null
+++ b/docker/Dockerfile.tbprofiler-0.9.8
@@ -0,0 +1,54 @@
+FROM mambaorg/micromamba:1.3.0 as app
+
+#copy the reference genome to pre-compute our index
+COPY resources/tuberculosis.fasta /data/tuberculosis.fasta
+
+USER root
+WORKDIR /
+
+ARG TBPROFILER_VER="5.0.1"
+
+# this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/
+# commits are found on https://github.com/jodyphelan/tbdb/commits/master
+# this was the latest commit as of 2023-10-26
+ARG TBDB_VER="e25540b"
+
+# LABEL instructions tag the image with metadata that might be important to the user
+LABEL base.image="micromamba:1.3.0"
+LABEL dockerfile.version="1"
+LABEL software="tbprofiler"
+LABEL software.version="${TBPROFILER_VER}"
+LABEL description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database."
+LABEL website="https://github.com/jodyphelan/TBProfiler/"
+LABEL license="https://github.com/jodyphelan/TBProfiler/blob/master/LICENSE"
+LABEL maintainer="John Arnn"
+LABEL maintainer.email="jarnn@utah.gov"
+LABEL maintainer2="Curtis Kapsak"
+LABEL maintainer2.email="kapsakcj@gmail.com"
+
+# Install dependencies via apt-get; cleanup apt garbage
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ wget \
+ ca-certificates \
+ procps && \
+ apt-get autoclean && rm -rf /var/lib/apt/lists/*
+
+# install tb-profiler via bioconda; install into 'base' conda env
+RUN micromamba install --yes --name base --channel conda-forge --channel bioconda \
+ tb-profiler=${TBPROFILER_VER}
+
+RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4
+RUN micromamba install --yes --name base --channel conda-forge --channel bioconda samtools
+RUN micromamba install --yes --name base --channel conda-forge jq
+RUN micromamba clean --all --yes
+
+# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
+ENV PATH="/opt/conda/bin:${PATH}"
+
+# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json
+# can also run 'tb-profiler list_db' to find the same version info
+# In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER}
+RUN tb-profiler update_tbdb --commit ${TBDB_VER}
+
+WORKDIR /data
+RUN tb-profiler update_tbdb --match_ref tuberculosis.fasta
diff --git a/docker/Dockerfile.vcfpredict-0.9.8 b/docker/Dockerfile.vcfpredict-0.9.8
index 68d928e..3139c59 100644
--- a/docker/Dockerfile.vcfpredict-0.9.8
+++ b/docker/Dockerfile.vcfpredict-0.9.8
@@ -3,19 +3,16 @@ FROM ubuntu:20.04
LABEL maintainer="pricea35@cardiff.ac.uk" \
about.summary="container for the vcf predict workflow"
+#add run-vcf to container
+COPY bin/ /opt/bin/
+ENV PATH=/opt/bin:$PATH
+
ENV PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" \
PYTHON="python3 python3-pip python3-dev"
ENV vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 \
-gumpy_version=1.0.15 \
-piezo_version=0.3 \
-gnomonicus_version=1.1.2 \
-tuberculosis_amr_catalogues=12d38733ad2e238729a3de9f725081e1d4872968
-
-COPY bin/ /opt/bin/
-ENV PATH=/opt/bin:$PATH
-
+#apt updates
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \
&& apt-get install -y $PACKAGES $PYTHON \
@@ -27,25 +24,4 @@ RUN apt-get update \
&& pip3 install awscli \
&& pip3 install . \
&& cp -r data /usr/local/lib/python3.8/dist-packages \
-&& cd ..
-
-RUN curl -fsSL https://github.com/oxfordmmm/gumpy/archive/refs/tags/v${gumpy_version}.tar.gz | tar -xz \
-&& cd gumpy-${gumpy_version} \
-&& pip3 install . \
-&& cd ..
-
-RUN curl -fsSL https://github.com/oxfordmmm/piezo/archive/refs/tags/v${piezo_version}.tar.gz | tar -xz \
-&& cd piezo-${piezo_version} \
-&& pip3 install . \
-&& cd ..
-
-RUN curl -fsSL https://github.com/oxfordmmm/gnomonicus/archive/refs/tags/v${gnomonicus_version}.tar.gz | tar -xz \
-&& cd gnomonicus-${gnomonicus_version} \
-&& pip3 install . \
-&& cd ..
-
-RUN git clone https://github.com/oxfordmmm/tuberculosis_amr_catalogues.git \
-&& cd tuberculosis_amr_catalogues \
-&& git checkout ${tuberculosis_amr_catalogues} \
-&& cd ..
-
+&& cd ..
\ No newline at end of file
diff --git a/main.nf b/main.nf
index 837d3ef..0cd98f2 100644
--- a/main.nf
+++ b/main.nf
@@ -36,24 +36,24 @@ Produces as output one directory per sample, containing the relevant reports & a
Mandatory and conditional parameters:
------------------------------------------------------------------------
--input_dir Directory containing fastq OR bam files. Workflow will process one or the other, so don't mix
---filetype File type in input_dir. One of either "fastq" or "bam". fastq files can be gzipped and do not
+--filetype File type in input_dir. One of either "fastq" or "bam". fastq files can be gzipped and do not
have to literally take the form "*.fastq"; see --pattern
--pattern Regex to match files in input_dir, e.g. "*_R{1,2}.fq.gz". Only mandatory if --filetype is "fastq"
--output_dir Output directory, in which will be created subdirectories matching base name of fastq/bam files
---unmix_myco Do you want to disambiguate mixed-mycobacterial samples by read alignment? One of "yes" or "no"
- If "yes" workflow will remove reads mapping to any minority mycobacterial genomes but in doing so
+--unmix_myco Do you want to disambiguate mixed-mycobacterial samples by read alignment? One of "yes" or "no"
+ If "yes" workflow will remove reads mapping to any minority mycobacterial genomes but in doing so
WILL ALMOST CERTAINLY ALSO reduce coverage of the principal species
- If "no" then mixed-mycobacterial samples will be left alone. Mixtures of mycobacteria + non-mycobacteria
+ If "no" then mixed-mycobacterial samples will be left alone. Mixtures of mycobacteria + non-mycobacteria
will still be disambiguated
--kraken_db Directory containing Kraken2 database files (obtain from https://benlangmead.github.io/aws-indexes/k2)
--bowtie2_index Directory containing Bowtie2 index (obtain from ftp://ftp.ccb.jhu.edu/pub/data/bowtie2_indexes/hg19_1kgmaj_bt2.zip
This is the Langmead lab pre-built major-allele-SNP reference; see https://github.com/BenLangmead/bowtie-majref)
--bowtie_index_name Name of the bowtie index, e.g. hg19_1kgmaj
---vcfmix Run VFCMIX "yes" or "no". Should be set to "no" for synthetic samples
---gnomonicus Run gnomon "yes" or "no"
+--vcfmix Run VFCMIX "yes" or "no". Should be set to "no" for synthetic samples
+--resistance_profiler Tool to profile resistance with. At the moment options are "tb-profiler" or "none"
--amr_cat Path to the AMR catalogue (https://github.com/oxfordmmm/tuberculosis_amr_catalogues is at /tuberculosis_amr_catalogues
in the vcfpredict container)
---afanc_myco_db Path to the Afanc database used for speciation. Obtain from https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_3.0.tar.gz
+--afanc_myco_db Path to the Afanc database used for speciation. Obtain from https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_3.0.tar.gz
Optional parameters:
------------------------------------------------------------------------
@@ -63,17 +63,17 @@ Optional parameters:
default: null
using this parameter will apply an additional sanity test to your sample
- if you DO NOT use this parameter (default option), pipeline will determine principal species from
+ if you DO NOT use this parameter (default option), pipeline will determine principal species from
the reads and consider any other species a contaminant
- if you DO use this parameter, pipeline will expect this to be the principal species. It will fail
- the sample if reads from this species are not actually the majority
+ If you DO use this parameter, pipeline will expect this to be the principal species. It will fail
+ the sample if reads from this species are not actually the majority
Profiles:
------------------------------------------------------------------------
singularity to run with singularity
-docker to run with docker
+docker to run with docker
Examples:
@@ -86,6 +86,21 @@ nextflow run main.nf -profile docker --filetype bam --input_dir bam_dir --unmix_
}
+resistance_profilers = ["tb-profiler", "none"]
+
+if(!resistance_profilers.contains(params.resistance_profiler)){
+ exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler" or "none" to skip.'
+ }
+
+//tbprofiler container already has the reference genome in the DB, so skip if using docker
+if((params.resistance_profiler == "tb-profiler") && (params.container_enabled == true)) {
+ update_tbprofiler = true
+} else {
+ update_tbprofiler = false
+}
+
+resistance_profiler = params.resistance_profiler
+
// confirm that mandatory parameters have been set and that the conditional parameter, --pattern, has been used appropriately
if ( params.input_dir == "" ) {
exit 1, "error: --input_dir is mandatory (run with --help to see parameters)"
@@ -118,18 +133,17 @@ M Y C O B A C T E R I A L P I P E L I N E
Parameters used:
------------------------------------------------------------------------
---input_dir ${params.input_dir}
---filetype ${params.filetype}
---pattern ${params.pattern}
---output_dir ${params.output_dir}
---unmix_myco ${params.unmix_myco}
---kraken_db ${params.kraken_db}
+--input_dir ${params.input_dir}
+--filetype ${params.filetype}
+--pattern ${params.pattern}
+--output_dir ${params.output_dir}
+--unmix_myco ${params.unmix_myco}
+--kraken_db ${params.kraken_db}
--bowtie2_index ${params.bowtie2_index}
--bowtie_index_name ${params.bowtie_index_name}
---species ${params.species}
---vcfmix ${params.vcfmix}
---gnomonicus ${params.gnomonicus}
---amr_cat ${params.amr_cat}
+--resistance_profiler ${params.resistance_profiler}
+--species ${params.species}
+--vcfmix ${params.vcfmix}
--afanc_myco_db ${params.afanc_myco_db}
Runtime data:
@@ -198,9 +212,10 @@ workflow {
mpileup_vcf = clockwork.out.mpileup_vcf
minos_vcf = clockwork.out.minos_vcf
- genbank = channel.fromPath(params.gnomonicus_genbank)
+ reference = clockwork.out.reference
+ bam = clockwork.out.bam
- vcfpredict(mpileup_vcf, minos_vcf, genbank)
+ vcfpredict(bam, mpileup_vcf, minos_vcf, reference)
}
diff --git a/modules/clockworkModules.nf b/modules/clockworkModules.nf
index 4a2675d..0bea703 100644
--- a/modules/clockworkModules.nf
+++ b/modules/clockworkModules.nf
@@ -47,7 +47,7 @@ process alignToRef {
doWeAlign =~ /NOW\_ALIGN\_TO\_REF\_${sample_name}/
output:
- tuple val(sample_name), path("${sample_name}_report.json"), path("${sample_name}.bam"), path("${sample_name}.fa"), stdout, emit: alignToRef_bam
+ tuple val(sample_name), path("${sample_name}_report.json"), path("${sample_name}.bam"), path(reference_path), stdout, emit: alignToRef_bam
path("${sample_name}.bam.bai", emit: alignToRef_bai)
path("${sample_name}_alignmentStats.json", emit: alignToRef_json)
path "${sample_name}_err.json", emit: alignToRef_log optional true
@@ -63,9 +63,8 @@ process alignToRef {
"""
echo $reference_path
- cp ${reference_path} ${sample_name}.fa
- minimap2 -ax sr ${sample_name}.fa -t ${task.cpus} $fq1 $fq2 | samtools fixmate -m - - | samtools sort -T tmp - | samtools markdup --reference ${sample_name}.fa - minimap.bam
+ minimap2 -ax sr $reference_path -t ${task.cpus} $fq1 $fq2 | samtools fixmate -m - - | samtools sort -T tmp - | samtools markdup --reference $reference_path - minimap.bam
java -jar /usr/local/bin/picard.jar AddOrReplaceReadGroups INPUT=minimap.bam OUTPUT=${bam} RGID=${sample_name} RGLB=lib RGPL=Illumina RGPU=unit RGSM=sample
@@ -206,7 +205,7 @@ process callVarsCortex {
process minos {
/**
- * @QCcheckpoint check if top species is TB, if yes pass vcf to gnomonicus
+ * @QCcheckpoint check if top species is TB, if yes pass vcf to resistance profiling
*/
tag { sample_name }
@@ -241,7 +240,7 @@ process minos {
cp ${sample_name}_report.json ${sample_name}_report_previous.json
- if [[ \$top_hit =~ ^"Mycobacterium tuberculosis" ]]; then printf "CREATE_ANTIBIOGRAM_${sample_name}"; else echo '{"gnomonicus-warning":"sample is not TB so cannot produce antibiogram using gnomonicus"}' | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
+ if [[ \$top_hit =~ ^"Mycobacterium tuberculosis" ]]; then printf "CREATE_ANTIBIOGRAM_${sample_name}"; else echo '{"resistance-profiling-warning":"sample is not TB so cannot produce antibiogram using resistance profiling tools"}' | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
"""
stub:
@@ -296,7 +295,7 @@ process gvcf {
cp ${sample_name}_report.json ${sample_name}_report_previous.json
- if [ ${params.vcfmix} == "no" ] && [ ${params.gnomonicus} == "no" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
+ if [ ${params.vcfmix} == "no" ] && [ ${params.resistance_profiler} == "none" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
"""
stub:
diff --git a/modules/preprocessingModules.nf b/modules/preprocessingModules.nf
index 9d7177a..b59d0cc 100644
--- a/modules/preprocessingModules.nf
+++ b/modules/preprocessingModules.nf
@@ -337,7 +337,7 @@ process afanc {
tag { sample_name }
label 'preprocessing'
label 'normal_cpu'
- label 'medium_memory'
+ label 'high_memory'
label 'retry_afanc'
publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP", mode: 'copy', pattern: '*_afanc_report.json'
@@ -398,6 +398,7 @@ process mykrobe {
label 'medium_memory'
publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP", mode: 'copy', pattern: '*_mykrobe_report.json'
+ publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP", mode: 'copy', pattern: '*_mykrobe_report.csv'
input:
tuple val(sample_name), path(fq1), path(fq2), val(run_mykrobe), path(software_json)
@@ -410,10 +411,10 @@ process mykrobe {
tuple val(sample_name), path(fq1), path(fq2), stdout, emit: mykrobe_fqs
script:
- mykrobe_report = "${sample_name}_mykrobe_report.json"
+ mykrobe_report = "${sample_name}_mykrobe_report"
"""
- mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json --output ${mykrobe_report} -1 $fq1 $fq2
+ mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json_and_csv --output ${mykrobe_report} -1 $fq1 $fq2
printf ${sample_name}
"""
@@ -421,7 +422,7 @@ process mykrobe {
mykrobe_report = "${sample_name}_mykrobe_report.json"
"""
- touch ${mykrobe_report}
+ touch ${mykrobe_report}.json
printf ${sample_name}
"""
}
@@ -434,7 +435,7 @@ process bowtie2 {
tag { sample_name }
label 'preprocessing'
label 'normal_cpu'
- label 'low_memory'
+ label 'medium_memory'
publishDir "${params.output_dir}/$sample_name/output_reads", mode: 'copy', pattern: '*.fq.gz', overwrite: 'true'
@@ -733,6 +734,7 @@ process reMykrobe {
label 'low_memory'
publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP_and_postContamRemoval", mode: 'copy', pattern: '*_mykrobe_report.json'
+ publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP_and_postContamRemoval", mode: 'copy', pattern: '*_mykrobe_report.csv'
input:
tuple val(sample_name), path(fq1), path(fq2), path(software_json)
@@ -741,17 +743,17 @@ process reMykrobe {
tuple val(sample_name), path("${sample_name}_mykrobe_report.json"), emit: reMykrobe_report
script:
- mykrobe_report = "${sample_name}_mykrobe_report.json"
+ mykrobe_report = "${sample_name}_mykrobe_report"
"""
- mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json --output ${mykrobe_report} -1 $fq1 $fq2
+ mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json_and_csv --output ${mykrobe_report} -1 $fq1 $fq2
"""
stub:
mykrobe_report = "${sample_name}_mykrobe_report.json"
"""
- touch ${mykrobe_report}
+ touch ${mykrobe_report}.json
"""
}
diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf
index cee38b6..042b403 100644
--- a/modules/vcfpredictModules.nf
+++ b/modules/vcfpredictModules.nf
@@ -33,7 +33,7 @@ process vcfmix {
jq -s ".[0] * .[1]" ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}
- if [ ${params.gnomonicus} == "no" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}; fi
+ if [ ${params.resistance_profiler} == "none" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}; fi
"""
stub:
@@ -48,6 +48,84 @@ process vcfmix {
"""
}
+process tbprofiler_update_db {
+ label 'low_memory'
+ label 'low_cpu'
+ label 'tbprofiler'
+
+ input:
+ path(reference)
+
+ script:
+ """
+ tb-profiler update_tbdb --match_ref $reference
+ """
+}
+
+process tbprofiler {
+ label 'medium_memory'
+ label 'medium_cpu'
+ label 'tbprofiler'
+
+ publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.tbprofiler-out.json', overwrite: 'true'
+ publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
+
+ input:
+ val(sample_name)
+ path(minos_vcf)
+ path(report_json)
+ val(isSampleTB)
+
+ output:
+ tuple val(sample_name), path("${sample_name}.tbprofiler-out.json"), path("${sample_name}_report.json"), emit: tbprofiler_json
+
+ when:
+ isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/
+
+ script:
+ error_log = "${sample_name}_err.json"
+ tbprofiler_json = "${sample_name}.tbprofiler-out.json"
+
+ """
+ bgzip ${minos_vcf}
+ tb-profiler profile --vcf ${minos_vcf}.gz --threads ${task.cpus}
+ mv results/tbprofiler.results.json ${tbprofiler_json}
+
+ cp ${sample_name}_report.json ${sample_name}_report_previous.json
+
+ echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log}
+
+ jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${tbprofiler_json} > ${report_json}
+ """
+}
+
+process add_allelic_depth {
+ label 'low_memory'
+ label 'low_cpu'
+ label 'tbprofiler'
+
+ input:
+ val(sample_name)
+ path(minos_vcf)
+ path(bam)
+ path(reference)
+ val(isSampleTB)
+
+ output:
+ path("${sample_name}_allelic_depth.minos.vcf")
+
+ when:
+ isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/
+
+ script:
+ """
+ samtools faidx $reference
+ samtools dict $reference -o ${reference.baseName}.dict
+ gatk VariantAnnotator -R $reference -I $bam -V $minos_vcf -A DepthPerAlleleBySample -O ${sample_name}_allelic_depth.minos.vcf
+ """
+
+}
+
process gnomonicus {
tag {sample_name}
diff --git a/nextflow.config b/nextflow.config
index 21122da..43a0d71 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,13 +1,3 @@
-// config for lodestone
-
-manifest {
- name = "pathogen-genomics-cymru/lodestone"
-}
-
-
-trace.overwrite = true
-report.overwrite = true
-
params {
// help message
@@ -43,13 +33,9 @@ params {
// run VCFMIX 'yes' or 'no' (set to no for synthetic samples)
vcfmix = 'yes'
-
- // run gnomonicus 'yes' or 'no'
- gnomonicus = 'yes'
-
- // path to AMR catalogue for gnomon
- // https://github.com/oxfordmmm/tuberculosis_amr_catalogues available at path /tuberculosis_amr_catalogues in container
- amr_cat = "/tuberculosis_amr_catalogues/catalogues/NC_000962.3/NC_000962.3_WHO-UCN-GTB-PCI-2021.7_v1.0_GARC1_RUS.csv"
+
+ resistance_profiler = "tb-profiler"
+ update_tbprofiler = "true"
// path to singularity recipes directory (needed to strip software versions in getversion)
sing_dir = "${baseDir}/singularity"
@@ -63,231 +49,76 @@ params {
//path to resources directory
resource_dir = "${baseDir}/resources"
refseq = "${resource_dir}/assembly_summary_refseq.txt"
- gnomonicus_genbank = "${resource_dir}/H37rV_v3.gbk"
-
+ container_enabled = "false"
}
profiles {
- climb {
-
- //this is pre-defined in the CLIMB nextflow.config; however it has been added to allow
- //-profile climb to still work outside of CLIMB system (e.g. to access S3 buckets)
- aws {
- profile = "climb"
- client {
- endpoint = 'https://s3.climb.ac.uk'
- s3PathStyleAccess = true
- }
- }
-
- docker.enabled = true
- fixOwnership = true
- runOptions = "-u \$(id -u):\$(id -g)"
-
- // define containers for each process
- process {
- k8s {
- pullPolicy = "always"
- }
- withLabel:low_cpu {cpus = 2}
- withLabel:normal_cpu { cpus = 8 }
- withLabel:low_memory { memory = '5GB' }
- withLabel:medium_memory { memory = '10GB' }
- withLabel:high_memory { memory = '18GB' }
-
- withLabel:preprocessing {
- container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7r9"
- }
+ climb {
+ includeConfig 'config/containers.config'
+
+ //add in docker configs as the above config file is generic for any containerised run
+ docker.enabled = true
+ fixOwnership = true
+ runOptions = "-u \$(id -u):\$(id -g)"
+
+ withLabel:getversion{
+ executor = "local"
+ container = null
+ }
- withLabel:getversion{
- executor = "local"
- }
-
- withLabel:afanc_parse{
- executor = "local"
- }
- withName:downloadContamGenomes {
- shell = ['/bin/bash','-u']
- errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
- maxRetries = 5
- }
-
- withLabel:retry_afanc {
- shell = ['/bin/bash','-u']
- errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' }
- maxRetries = 5
- }
-
- withLabel:clockwork {
- container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.7r3"
- }
-
- withLabel:vcfpredict {
- container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.7r3"
- }
- }
- params{
- bowtie2_index = "s3://microbial-bioin-sp3/bowtie_hg19"
- bowtie_index_name = "hg19_1kgmaj"
- kraken_db = "s3://microbial-bioin-sp3/kraken_pluspf_16gb/"
- afanc_myco_db = "s3://microbial-bioin-sp3/Mycobacteriaciae_DB_7.0/"
-
- resource_dir = "s3://microbial-bioin-sp3/lodestone_resources"
- refseq = "${resource_dir}/assembly_summary_refseq.txt"
- gnomonicus_genbank = "${resource_dir}/H37rV_v3.gbk"
+ withLabel:afanc_parse{
+ executor = "local"
+ container = null
+ }
+
+ k8s {
+ computeResourceType = 'Job'
+ }
+
+ //params specific to paths on the climb system
+ params{
+ bowtie2_index = "s3://microbial-bioin-sp3/bowtie_hg19"
+ bowtie_index_name = "hg19_1kgmaj"
+ kraken_db = "s3://microbial-bioin-sp3/kraken_pluspf_16gb/"
+ afanc_myco_db = "s3://microbial-bioin-sp3/Mycobacteriaciae_DB_7.0/"
+ resource_dir = "s3://microbial-bioin-sp3/lodestone_resources"
+ refseq = "${resource_dir}/assembly_summary_refseq.txt"
}
- }
- singularity {
-
- params{
- resource_dir = "/resources"
}
+
+ singularity {
+ includeConfig 'config/containers.config'
- singularity.enabled = 'true'
- singularity.autoMounts = 'true'
-
- // path to the singularity containers
- singularity.cacheDir = "${baseDir}/singularity"
-
- process {
- withLabel:low_cpu {cpus = 2}
- withLabel:normal_cpu { cpus = 8 }
-
- withLabel:low_memory { memory = '5GB' }
- withLabel:medium_memory { memory = '10GB' }
- withLabel:high_memory { memory = '18GB' }
-
- withLabel:getversion {
- container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7"
- }
-
- withLabel:preprocessing {
- container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7"
- }
-
- withName:downloadContamGenomes {
- shell = ['/bin/bash','-u']
- errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
- maxRetries = 5
- }
-
- withLabel:retryAfanc {
- shell = ['/bin/bash','-u']
- // Afanc sometimes fails curl in slurm, retry if so (error is masked as error status 1)
- errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' }
- maxRetries = 5
- }
-
- withLabel:clockwork {
- container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.7"
- }
-
- withLabel:vcfpredict {
- container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.7"
- }
+ singularity.enabled = 'true'
+ singularity.autoMounts = 'true'
+ //path to the singularity containers
+ singularity.cacheDir = "${baseDir}/singularity"
}
- }
sp3 {
-
+
+ includeConfig 'config/containers.config'
+
+ //add in singularity configs as the above config file is generic for any containerised run
singularity.enabled = 'true'
singularity.autoMounts = 'true'
-
// path to the singularity containers
singularity.cacheDir = "/data/images"
-
- params{
- resource_dir = "/resources"
- }
process {
- scratch = true
- errorStrategy = 'ignore'
-
- withLabel:low_cpu {cpus = 2}
- withLabel:normal_cpu { cpus = 8 }
- withLabel:low_memory { memory = '5GB' }
- withLabel:medium_memory { memory = '10GB' }
- withLabel:high_memory { memory = '18GB' }
-
- withLabel:getversion {
- container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7"
- }
-
- withLabel:preprocessing {
- container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7"
- }
-
- withName:downloadContamGenomes {
- shell = ['/bin/bash','-u']
- errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
- maxRetries = 5
- }
-
- withLabel:retryAfanc {
- shell = ['/bin/bash','-u']
- errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' }
- maxRetries = 5
- }
-
- withLabel:clockwork {
- container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.7"
- }
-
- withLabel:vcfpredict {
- container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.7"
- }
+ scratch = true
}
- }
+ }
docker {
-
+ includeConfig 'config/containers.config'
+
+ //add in docker configs as the above config file is generic for any containerised run
docker.enabled = true
fixOwnership = true
runOptions = "-u \$(id -u):\$(id -g)"
-
- params{
- resource_dir = "/resources"
- }
-
- // define containers for each process
- process {
- withLabel:low_cpu {cpus = 2}
- withLabel:normal_cpu { cpus = 8 }
- withLabel:low_memory { memory = '5GB' }
- withLabel:medium_memory { memory = '10GB' }
- withLabel:high_memory { memory = '18GB' }
-
- withLabel:getversion {
- container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7"
- }
-
- withLabel:preprocessing {
- container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7"
- }
-
- withName:downloadContamGenomes {
- shell = ['/bin/bash','-u']
- errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
- maxRetries = 5
- }
-
- withLabel:retryAfanc {
- shell = ['/bin/bash','-u']
- errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' }
- maxRetries = 5
- }
-
- withLabel:clockwork {
- container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.7"
- }
-
- withLabel:vcfpredict {
- container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.7"
- }
- }
}
}
diff --git a/singularity/Singularity.clockwork-0.9.7 b/singularity/Singularity.clockwork-0.9.8
similarity index 97%
rename from singularity/Singularity.clockwork-0.9.7
rename to singularity/Singularity.clockwork-0.9.8
index f3f3c24..0e13714 100644
--- a/singularity/Singularity.clockwork-0.9.7
+++ b/singularity/Singularity.clockwork-0.9.8
@@ -2,6 +2,8 @@ Bootstrap: docker
From: debian:buster
Stage: spython-base
+%files
+bin/ /opt/bin/
%labels
maintainer="pricea35@cardiff.ac.uk"
about.summary="container for the clockwork workflow"
@@ -26,6 +28,9 @@ clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5
PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all"
PYTHON="python2.7 python-dev"
+PATH=/opt/bin:$PATH
+
+
apt-get update \
&& apt-get install -y $PACKAGES $PYTHON \
&& curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \
@@ -36,7 +41,7 @@ apt-get update \
&& ln -s /usr/local/bin/python3.6 /usr/local/bin/python3 \
&& ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \
&& pip3 install --upgrade pip \
-&& pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools \
+&& pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools awscli \
&& wget -qO - https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | apt-key add - \
&& add-apt-repository --yes https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/ \
&& apt-get update && apt-get install -y adoptopenjdk-8-hotspot
@@ -136,6 +141,7 @@ export python_version=3.6.5
export clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5
export PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all"
export PYTHON="python2.7 python-dev"
+export PATH=/opt/bin:$PATH
export CLOCKWORK_CORTEX_DIR=/cortex
export PATH=${PATH}:/clockwork/python/scripts
export PICARD_JAR=/usr/local/bin/picard.jar
@@ -145,4 +151,4 @@ export LANGUAGE=en_US.UTF-8
%runscript
exec /bin/bash "$@"
%startscript
-exec /bin/bash "$@"
\ No newline at end of file
+exec /bin/bash "$@"
diff --git a/singularity/Singularity.preprocessing-0.9.7 b/singularity/Singularity.preprocessing-0.9.8
similarity index 95%
rename from singularity/Singularity.preprocessing-0.9.7
rename to singularity/Singularity.preprocessing-0.9.8
index 7ca3b35..a164d85 100644
--- a/singularity/Singularity.preprocessing-0.9.7
+++ b/singularity/Singularity.preprocessing-0.9.8
@@ -2,6 +2,8 @@ Bootstrap: docker
From: ubuntu:focal
Stage: spython-base
+%files
+bin/ /opt/bin/
%labels
maintainer="pricea35@cardiff.ac.uk"
about.summary="container for the preprocessing workflow"
@@ -25,13 +27,15 @@ fastani_version=1.33
PACKAGES="procps curl git wget build-essential zlib1g-dev libncurses-dev libz-dev libbz2-dev liblzma-dev libcurl4-openssl-dev libgsl-dev rsync unzip ncbi-blast+ pigz jq libtbb-dev openjdk-11-jre-headless autoconf r-base-core locales locales-all"
PYTHON="python3 python3-pip python3-dev"
-PYTHON_PACKAGES="biopython"
+PYTHON_PACKAGES="biopython awscli boto3"
PATH=${PATH}:/usr/local/bin/mccortex/bin:/usr/local/bin/bwa-${bwa_version}:/opt/edirect
LD_LIBRARY_PATH=/usr/local/lib
export DEBIAN_FRONTEND="noninteractive"
+PATH=/opt/bin:$PATH
+
apt-get update \
&& DEBIAN_FRONTEND="noninteractive" apt-get install -y $PACKAGES $PYTHON \
&& pip3 install --upgrade pip \
@@ -82,7 +86,7 @@ curl -fsSL https://github.com/OpenGene/fastp/archive/v${fastp_version}.tar.gz |
&& cd .. \
&& rm -r fastp-${fastp_version}
-wget http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v${fastqc_version}.zip \
+wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v${fastqc_version}.zip \
&& unzip fastqc_v${fastqc_version}.zip \
&& chmod +x FastQC/fastqc \
&& mv FastQC/* /usr/local/bin \
@@ -102,10 +106,9 @@ curl -fsSL https://github.com/ArthurVM/Afanc/archive/refs/tags/v${afanc_version}
&& mv mash-Linux64-v${mash_version}/mash /usr/local/bin \
&& rm -r mash-Linux* \
&& wget https://github.com/ParBLiSS/FastANI/releases/download/v${fastani_version}/fastANI-Linux64-v${fastani_version}.zip \
-&& unzip fastANI-Linux64-v${fastani_version}.zip \
+&& unzip fastANI-Linux64-v${fastani_version}.zip \
&& mv fastANI /usr/local/bin
-
sh -c "$(curl -fsSL ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/install-edirect.sh)" \
&& mkdir -p /opt/edirect \
&& mv /root/edirect/* /opt/edirect
@@ -149,9 +152,10 @@ export mash_version=2.3
export fastani_version=1.33
export PACKAGES="procps curl git wget build-essential zlib1g-dev libncurses-dev libz-dev libbz2-dev liblzma-dev libcurl4-openssl-dev libgsl-dev rsync unzip ncbi-blast+ pigz jq libtbb-dev openjdk-11-jre-headless autoconf r-base-core locales locales-all"
export PYTHON="python3 python3-pip python3-dev"
-export PYTHON_PACKAGES="biopython"
+export PYTHON_PACKAGES="biopython awscli boto3"
export PATH=${PATH}:/usr/local/bin/mccortex/bin:/usr/local/bin/bwa-${bwa_version}:/opt/edirect
export LD_LIBRARY_PATH=/usr/local/lib
+export PATH=/opt/bin:$PATH
export LC_ALL=en_US.UTF-8
export LANG=en_US.UTF-8
export LANGUAGE=en_US.UTF-8
diff --git a/singularity/Singularity.tbprofiler-0.9.8 b/singularity/Singularity.tbprofiler-0.9.8
new file mode 100644
index 0000000..33be3bd
--- /dev/null
+++ b/singularity/Singularity.tbprofiler-0.9.8
@@ -0,0 +1,70 @@
+Bootstrap: docker
+From: mambaorg/micromamba:1.3.0
+Stage: app
+
+%files
+resources/tuberculosis.fasta /data/tuberculosis.fasta
+%labels
+base.image="micromamba:1.3.0"
+dockerfile.version="1"
+software="tbprofiler"
+software.version="${TBPROFILER_VER}"
+description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database."
+website="https://github.com/jodyphelan/TBProfiler/"
+license="https://github.com/jodyphelan/TBProfiler/blob/master/LICENSE"
+maintainer="John Arnn"
+maintainer.email="jarnn@utah.gov"
+maintainer2="Curtis Kapsak"
+maintainer2.email="kapsakcj@gmail.com"
+%post
+
+#copy the reference genome to pre-compute our index
+
+su - root # USER root
+mkdir -p /
+cd /
+
+TBPROFILER_VER="5.0.1"
+
+# this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/
+# commits are found on https://github.com/jodyphelan/tbdb/commits/master
+# this was the latest commit as of 2023-10-26
+TBDB_VER="e25540b"
+
+# LABEL instructions tag the image with metadata that might be important to the user
+
+# Install dependencies via apt-get; cleanup apt garbage
+apt-get update && apt-get install -y --no-install-recommends \
+wget \
+ca-certificates \
+procps && \
+apt-get autoclean && rm -rf /var/lib/apt/lists/*
+
+# install tb-profiler via bioconda; install into 'base' conda env
+micromamba install --yes --name base --channel conda-forge --channel bioconda \
+tb-profiler=${TBPROFILER_VER}
+
+micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4
+micromamba install --yes --name base --channel conda-forge --channel bioconda samtools
+micromamba install --yes --name base --channel conda-forge jq
+micromamba clean --all --yes
+
+# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
+PATH="/opt/conda/bin:${PATH}"
+
+# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json
+# can also run 'tb-profiler list_db' to find the same version info
+# In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER}
+tb-profiler update_tbdb --commit ${TBDB_VER}
+
+mkdir -p /data
+cd /data
+tb-profiler update_tbdb --match_ref tuberculosis.fasta
+%environment
+export PATH="/opt/conda/bin:${PATH}"
+%runscript
+cd /data
+exec /bin/bash "$@"
+%startscript
+cd /data
+exec /bin/bash "$@"
diff --git a/singularity/Singularity.vcfpredict-0.9.7 b/singularity/Singularity.vcfpredict-0.9.8
similarity index 51%
rename from singularity/Singularity.vcfpredict-0.9.7
rename to singularity/Singularity.vcfpredict-0.9.8
index ff29506..0146e7d 100644
--- a/singularity/Singularity.vcfpredict-0.9.7
+++ b/singularity/Singularity.vcfpredict-0.9.8
@@ -2,22 +2,22 @@ Bootstrap: docker
From: ubuntu:20.04
Stage: spython-base
+%files
+bin/ /opt/bin/
%labels
maintainer="pricea35@cardiff.ac.uk"
about.summary="container for the vcf predict workflow"
%post
+#add run-vcf to container
+PATH=/opt/bin:$PATH
PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq"
PYTHON="python3 python3-pip python3-dev"
vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417
-gumpy_version=1.0.15
-piezo_version=0.3
-gnomonicus_version=1.1.2
-tuberculosis_amr_catalogues=12d38733ad2e238729a3de9f725081e1d4872968
-
+#apt updates
apt-get update \
&& DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \
&& apt-get install -y $PACKAGES $PYTHON \
@@ -26,38 +26,15 @@ apt-get update \
&& cd VCFMIX \
&& git checkout ${vcfmix_version} \
&& pip3 install recursive_diff \
+&& pip3 install awscli \
&& pip3 install . \
&& cp -r data /usr/local/lib/python3.8/dist-packages \
&& cd ..
-
-curl -fsSL https://github.com/oxfordmmm/gumpy/archive/refs/tags/v${gumpy_version}.tar.gz | tar -xz \
-&& cd gumpy-${gumpy_version} \
-&& pip3 install . \
-&& cd ..
-
-curl -fsSL https://github.com/oxfordmmm/piezo/archive/refs/tags/v${piezo_version}.tar.gz | tar -xz \
-&& cd piezo-${piezo_version} \
-&& pip3 install . \
-&& cd ..
-
-curl -fsSL https://github.com/oxfordmmm/gnomonicus/archive/refs/tags/v${gnomonicus_version}.tar.gz | tar -xz \
-&& cd gnomonicus-${gnomonicus_version} \
-&& pip3 install . \
-&& cd ..
-
-git clone https://github.com/oxfordmmm/tuberculosis_amr_catalogues.git \
-&& cd tuberculosis_amr_catalogues \
-&& git checkout ${tuberculosis_amr_catalogues} \
-&& cd ..
-
%environment
+export PATH=/opt/bin:$PATH
export PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq"
export PYTHON="python3 python3-pip python3-dev"
export vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417
-export gumpy_version=1.0.15
-export piezo_version=0.3
-export gnomonicus_version=1.1.2
-export tuberculosis_amr_catalogues=12d38733ad2e238729a3de9f725081e1d4872968
%runscript
exec /bin/bash "$@"
%startscript
diff --git a/workflows/clockwork.nf b/workflows/clockwork.nf
index 3ffbaa0..148f523 100644
--- a/workflows/clockwork.nf
+++ b/workflows/clockwork.nf
@@ -39,5 +39,7 @@ workflow clockwork {
mpileup_vcf = callVarsMpileup.out.mpileup_vcf.join(minos.out.minos_report, by: 0)
minos_vcf = minos.out.minos_vcf.join(alignToRef.out.alignToRef_report, by: 0)
+ reference = getRefFromJSON.out
+ bam = alignToRef.out.alignToRef_bam
}
diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf
index 5097dad..fbb19aa 100644
--- a/workflows/preprocessing.nf
+++ b/workflows/preprocessing.nf
@@ -66,7 +66,6 @@ workflow preprocessing {
bowtie2(kraken2.out.kraken2_fqs, bowtie_dir.toList())
identifyBacterialContaminants(bowtie2.out.bowtie2_fqs.join(speciation_report, by: 0).join(kraken2.out.kraken2_json, by: 0), resource_dir, refseq_path)
- identifyBacterialContaminants.out.prev_sample_json.view()
downloadContamGenomes(identifyBacterialContaminants.out.contam_list)
diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf
index 9efc651..8fec00f 100644
--- a/workflows/vcfpredict.nf
+++ b/workflows/vcfpredict.nf
@@ -3,36 +3,47 @@ nextflow.enable.dsl = 2
// import modules
include {vcfmix} from '../modules/vcfpredictModules.nf' params(params)
-include {gnomonicus} from '../modules/vcfpredictModules.nf' params(params)
-include {finalJson} from '../modules/vcfpredictModules.nf' params(params)
+include {tbprofiler} from '../modules/vcfpredictModules.nf' params(params)
+include {tbprofiler_update_db} from '../modules/vcfpredictModules.nf' params(params)
+include {add_allelic_depth} from '../modules/vcfpredictModules.nf' params(params)
+include {finalJson} from '../modules/vcfpredictModules.nf' params(params)
// define workflow component
workflow vcfpredict {
take:
-
- clockwork_bcftools
- clockwork_minos
- genbank
+ clockwork_bam
+ clockwork_bcftools_tuple
+ minos_vcf_tuple
+ reference_fasta
+
main:
if ( params.vcfmix == "yes" ) {
- vcfmix(clockwork_bcftools)
+ vcfmix(clockwork_bcftools_tuple)
}
- if ( params.gnomonicus == "yes" ) {
-
- gnomonicus(clockwork_minos, genbank)
-
+ if ( params.resistance_profiler == "tb-profiler"){
+ //get just the vcf
+ sample_name = minos_vcf_tuple.map{it[0]}
+ minos_vcf = minos_vcf_tuple.map{it[1]}
+ do_we_resistance_profile = minos_vcf_tuple.map{it[2]}
+ report_json = minos_vcf_tuple.map{it[3]}
+ bam = clockwork_bam.map{it[2]}
+
+ if (params.update_tbprofiler == "yes"){
+ tbprofiler_update_db(reference_fasta)
+ }
+
+ //add allelic depth back in: was calculated in mpileup but lost in minos
+ add_allelic_depth(sample_name, minos_vcf, bam, reference_fasta, do_we_resistance_profile)
+ tbprofiler(sample_name, add_allelic_depth.out, report_json, do_we_resistance_profile)
}
-
- if ( (params.vcfmix == "yes") && (params.gnomonicus == "yes") ) {
-
- finalJson(vcfmix.out.vcfmix_json.join(gnomonicus.out.gnomon_json, by: 0))
-
+
+ if (params.vcfmix == "yes" && params.resistance_profiler != "none"){
+ finalJson(vcfmix.out.vcfmix_json.join(tbprofiler.out.tbprofiler_json, by: 0))
}
-
}