diff --git a/.github/workflows/build-push-quay.yml b/.github/workflows/build-push-quay.yml index af2c572..7abc3b2 100644 --- a/.github/workflows/build-push-quay.yml +++ b/.github/workflows/build-push-quay.yml @@ -3,7 +3,7 @@ on: push: branches: - main - - ntmprofiler + - bcg paths: - '**/Dockerfile*' - "bin/" diff --git a/README.md b/README.md index 55ec2b4..c321523 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,20 @@ ![Build Status](https://github.com/Pathogen-Genomics-Cymru/lodestone/workflows/build-push-quay/badge.svg) ![Build Status](https://github.com/Pathogen-Genomics-Cymru/lodestone/workflows/pytest/badge.svg) ![Build Status](https://github.com/Pathogen-Genomics-Cymru/lodestone/workflows/stub-run/badge.svg) - + +## Table of Contents +- [What is Lodestone](#what-is-lodestone) +- [Quick Start](#quick-start) +- [Executors](#executors) +- [System Requirements](#system-requirements) +- [Parameters](#parameters) +- [Stub Runs](#stub-runs) +- [Checkpoints](#checkpoints) +- [Acknowledgments](#acknowledgements) +- [License](#-license) + +## What is Lodestone? + This pipeline takes as input reads presumed to be from one of 10 mycobacterial genomes: abscessus, africanum, avium, bovis, chelonae, chimaera, fortuitum, intracellulare, kansasii, tuberculosis. Input should be in the form of one directory containing pairs of fastq(.gz) or bam files. Pipeline cleans and QCs reads with fastp and FastQC, classifies with Kraken2 & Afanc, removes non-bacterial content, and - by alignment to any minority genomes - disambiguates mixtures of bacterial reads. Cleaned reads are aligned to either of the 10 supported genomes and variants called. Produces as output one directory per sample, containing cleaned fastqs, sorted, indexed BAM, VCF, F2 and F47 statistics, an antibiogram and summary reports. @@ -40,7 +53,7 @@ By default, the pipeline will just run on the local machine. To run on a cluster ### System Requirements ### Minimum recommended requirements: 32GB RAM, 8CPU -## Params ## +## Paramaters ## The following parameters should be set in `nextflow.config` or specified on the command line: * **input_dir**
@@ -84,7 +97,7 @@ For more information on the parameters run `nextflow run main.nf --help` The path to the singularity images can also be changed in the singularity profile in `nextflow.config`. Default value is `${baseDir}/singularity` -## Stub-run ## +## Stub runs ## To test the stub run: ``` NXF_VER=20.11.0-edge nextflow run main.nf -stub -config testing.config @@ -150,3 +163,5 @@ For a list of direct authors of this pipeline, please see the contributors list. The preprocessing sub-workflow is based on the preprocessing nextflow DSL1 pipeline written by Stephen Bush, University of Oxford. The clockwork sub-workflow uses aspects of the variant calling workflow from https://github.com/iqbal-lab-org/clockwork, lead author Martin Hunt, Iqbal Lab at EMBL-EBI +## License +The tool is licensed under the V3 GNU Affero GPL license. Please see the [LICENSE](LICENSE) file for more details. diff --git a/bin/identify_tophit_and_contaminants2.py b/bin/identify_tophit_and_contaminants2.py index 93f8547..2c0f743 100755 --- a/bin/identify_tophit_and_contaminants2.py +++ b/bin/identify_tophit_and_contaminants2.py @@ -359,8 +359,21 @@ def process_reports(afanc_json_path, kraken_json_path, supposed_species, unmix_m # IS THE TOP SPECIES HIT ONE OF THE 10 ACCEPTABLE POSSIBILITIES? IF SO, PROVIDE A LINK TO THE REFERENCE GENOME re_top_species = re.findall(r"^(Mycobact|Mycolicibac)\w+ (abscessus|africanum|avium|bovis|chelonae|chimaera|fortuitum|intracellulare|kansasii|tuberculosis).*?$", top_species) + re_top_variant = re.findall(r"^(Mycobact|Mycolicibac)\w+ (abscessus|africanum|avium|bovis|chelonae|chimaera|fortuitum|intracellulare|kansasii|tuberculosis) ()\w+ (bovis|orgis|caprae).*?$", top_species) + if len(re_top_variant) != 0: + re_top_species = re_top_variant if len(re_top_species) > 0: - identified_species = re_top_species[0][1] + if len(re_top_species[0]) == 2: + identified_species = re_top_species[0][1] + #deal with lineages + lineage_dict = {"La1.": "bovis", + "La2.": "caprae", + "La3.": "orygis"} + for lineage in lineage_dict: + if lineage in top_species: + identified_species = lineage_dict[lineage] + else: + identified_species = re_top_species[0][3] #we have bovis (or orgis/caprae) with variant in the name if supposed_species == 'null': out['summary_questions']['is_the_top_species_appropriate'] = 'yes' elif ((supposed_species != 'null') & (supposed_species == identified_species)): diff --git a/bin/run-vcfmix.py b/bin/run-vcfmix.py index e75da76..ecfe9c5 100755 --- a/bin/run-vcfmix.py +++ b/bin/run-vcfmix.py @@ -9,10 +9,10 @@ def go(vcf_file): # create a lineagescan object - v = lineageScan() + v = lineageScan(minos=True) - # assuming postfix of ".bcftools.vcf" - sampleid = vcf_file[:-13] + # assuming postfix of ".minos.vcf" + sampleid = vcf_file.replace("_allelic_depth.minos.vcf", "") print(sampleid) res = v.parse(vcffile=vcf_file, sample_id=sampleid) diff --git a/config/containers.config b/config/containers.config index 7e44b62..383c4ed 100644 --- a/config/containers.config +++ b/config/containers.config @@ -33,10 +33,10 @@ process { } withLabel:clockwork { - container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.9" + container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.9r1" } withLabel:vcfpredict { - container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.9" + container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.9r1" } } diff --git a/docker/Dockerfile.clockwork-0.9.9 b/docker/Dockerfile.clockwork-0.9.9 index 57c30f8..af6fe38 100644 --- a/docker/Dockerfile.clockwork-0.9.9 +++ b/docker/Dockerfile.clockwork-0.9.9 @@ -1,4 +1,5 @@ -FROM debian:buster +FROM ubuntu:focal + LABEL maintainer="pricea35@cardiff.ac.uk" \ about.summary="container for the clockwork workflow" @@ -16,7 +17,8 @@ vcftools_version=0.1.15 \ mccortex_version=97aba198d632ee98ac1aa496db33d1a7a8cb7e51 \ stampy_version=1.0.32r3761 \ python_version=3.6.5 \ -clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 +clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 \ +gatk_version=4.6.0.0 ENV PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" \ PYTHON="python2.7 python-dev" @@ -24,9 +26,8 @@ PYTHON="python2.7 python-dev" COPY bin/ /opt/bin/ ENV PATH=/opt/bin:$PATH - RUN apt-get update \ -&& apt-get install -y $PACKAGES $PYTHON \ +&& DEBIAN_FRONTEND=noninteractive apt-get install -y $PACKAGES $PYTHON \ && curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \ && cd Python-${python_version} \ && ./configure --enable-optimizations \ @@ -36,7 +37,15 @@ RUN apt-get update \ && ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \ && pip3 install --upgrade pip \ && pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools awscli \ -&& apt-get update && apt-get install -y openjdk-11-jdk +&& apt-get update + +#update jdk +RUN wget https://download.java.net/java/GA/jdk18/43f95e8614114aeaa8e8a5fcf20a682d/36/GPL/openjdk-18_linux-x64_bin.tar.gz +RUN tar -xvf openjdk-18_linux-x64_bin.tar.gz +RUN mv jdk-18* /opt/ +ENV JAVA_HOME=/opt/jdk-18 +ENV PATH=$PATH:$JAVA_HOME/bin + RUN curl -fsSL https://github.com/samtools/samtools/archive/${samtools_version}.tar.gz | tar -xz \ && curl -fsSL https://github.com/samtools/htslib/releases/download/${htslib_version}/htslib-${htslib_version}.tar.bz2 | tar -xj \ @@ -107,8 +116,12 @@ RUN git clone --recursive https://github.com/iqbal-lab/cortex.git \ && pip3 install . \ && chmod +x scripts/clockwork +RUN wget https://github.com/broadinstitute/gatk/releases/download/${gatk_version}/gatk-${gatk_version}.zip -O /tmp/gatk-${gatk_version}.zip\ + && unzip /tmp/gatk-${gatk_version}.zip -d /opt/ \ + && rm /tmp/gatk-${gatk_version}.zip -f + ENV CLOCKWORK_CORTEX_DIR=/cortex \ -PATH=${PATH}:/clockwork/python/scripts \ +PATH=${PATH}:/clockwork/python/scripts:/opt/gatk-${gatk_version} \ PICARD_JAR=/usr/local/bin/picard.jar ENV LC_ALL en_US.UTF-8 \ diff --git a/docker/Dockerfile.clockwork-0.9.9r1 b/docker/Dockerfile.clockwork-0.9.9r1 new file mode 100644 index 0000000..af6fe38 --- /dev/null +++ b/docker/Dockerfile.clockwork-0.9.9r1 @@ -0,0 +1,131 @@ +FROM ubuntu:focal + + +LABEL maintainer="pricea35@cardiff.ac.uk" \ +about.summary="container for the clockwork workflow" + +ENV samtools_version=1.12 \ +htslib_version=1.12 \ +bcftools_version=1.12 \ +minimap2_version=2.17 \ +picard_version=2.18.16 \ +gramtools_version=8af53f6c8c0d72ef95223e89ab82119b717044f2 \ +vt_version=2187ff6347086e38f71bd9f8ca622cd7dcfbb40c \ +minos_version=0.11.0 \ +cortex_version=3a235272e4e0121be64527f01e73f9e066d378d3 \ +vcftools_version=0.1.15 \ +mccortex_version=97aba198d632ee98ac1aa496db33d1a7a8cb7e51 \ +stampy_version=1.0.32r3761 \ +python_version=3.6.5 \ +clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 \ +gatk_version=4.6.0.0 + +ENV PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" \ +PYTHON="python2.7 python-dev" + +COPY bin/ /opt/bin/ +ENV PATH=/opt/bin:$PATH + +RUN apt-get update \ +&& DEBIAN_FRONTEND=noninteractive apt-get install -y $PACKAGES $PYTHON \ +&& curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \ +&& cd Python-${python_version} \ +&& ./configure --enable-optimizations \ +&& make altinstall \ +&& cd .. \ +&& ln -s /usr/local/bin/python3.6 /usr/local/bin/python3 \ +&& ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \ +&& pip3 install --upgrade pip \ +&& pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools awscli \ +&& apt-get update + +#update jdk +RUN wget https://download.java.net/java/GA/jdk18/43f95e8614114aeaa8e8a5fcf20a682d/36/GPL/openjdk-18_linux-x64_bin.tar.gz +RUN tar -xvf openjdk-18_linux-x64_bin.tar.gz +RUN mv jdk-18* /opt/ +ENV JAVA_HOME=/opt/jdk-18 +ENV PATH=$PATH:$JAVA_HOME/bin + + +RUN curl -fsSL https://github.com/samtools/samtools/archive/${samtools_version}.tar.gz | tar -xz \ +&& curl -fsSL https://github.com/samtools/htslib/releases/download/${htslib_version}/htslib-${htslib_version}.tar.bz2 | tar -xj \ +&& make -C samtools-${samtools_version} -j HTSDIR=../htslib-${htslib_version} \ +&& make -C samtools-${samtools_version} -j HTSDIR=../htslib-${htslib_version} prefix=/usr/local install \ +&& rm -r samtools-${samtools_version} \ +&& curl -fsSL https://github.com/samtools/bcftools/archive/refs/tags/${bcftools_version}.tar.gz | tar -xz \ +&& make -C bcftools-${bcftools_version} -j HTSDIR=../htslib-${htslib_version} \ +&& make -C bcftools-${bcftools_version} -j HTSDIR=../htslib-${htslib_version} prefix=/usr/local install \ +&& rm -r bcftools-${bcftools_version} + + +RUN curl -fsSL minimap2-${minimap2_version}.tar.gz https://github.com/lh3/minimap2/archive/v${minimap2_version}.tar.gz | tar -xz \ +&& cd minimap2-${minimap2_version} \ +&& make \ +&& chmod +x minimap2 \ +&& mv minimap2 /usr/local/bin \ +&& cd .. \ +&& rm -r minimap2-${minimap2_version} \ +&& wget https://github.com/broadinstitute/picard/releases/download/${picard_version}/picard.jar -O /usr/local/bin/picard.jar + + +RUN git clone https://github.com/atks/vt.git vt-git \ +&& cd vt-git \ +&& git checkout ${vt_version} \ +&& make \ +&& cd .. \ +&& mv vt-git/vt /usr/local/bin \ +&& pip3 install tox "six>=1.14.0" \ +&& git clone https://github.com/iqbal-lab-org/gramtools \ +&& cd gramtools \ +&& git checkout ${gramtools_version} \ +&& pip3 install . \ +&& cd .. \ +&& pip3 install cython \ +&& pip3 install git+https://github.com/iqbal-lab-org/minos@v${minos_version} + + +RUN git clone --recursive https://github.com/iqbal-lab/cortex.git \ +&& cd cortex \ +&& git checkout ${cortex_version} \ +&& bash install.sh \ +&& make NUM_COLS=1 cortex_var \ +&& make NUM_COLS=2 cortex_var \ +&& cd .. \ +&& mkdir bioinf-tools \ +&& cd bioinf-tools \ +&& curl -fsSL http://www.well.ox.ac.uk/~gerton/software/Stampy/stampy-${stampy_version}.tgz | tar -xz \ +&& make -C stampy-* \ +&& cp -s stampy-*/stampy.py . \ +&& curl -fsSL https://github.com/vcftools/vcftools/releases/download/v${vcftools_version}/vcftools-${vcftools_version}.tar.gz | tar -xz \ +&& cd vcftools-${vcftools_version} \ +&& ./configure --prefix $PWD/install \ +&& make && make install \ +&& ln -s src/perl/ . \ +&& cd .. \ +&& git clone --recursive https://github.com/mcveanlab/mccortex \ +&& cd mccortex \ +&& git checkout ${mccortex_version} \ +&& make all \ +&& cd .. \ +&& cp -s mccortex/bin/mccortex31 . \ +&& cd .. \ +&& git clone https://github.com/iqbal-lab-org/clockwork \ +&& cd clockwork \ +&& git checkout ${clockwork_version} \ +&& cd python \ +&& pip3 install . \ +&& chmod +x scripts/clockwork + +RUN wget https://github.com/broadinstitute/gatk/releases/download/${gatk_version}/gatk-${gatk_version}.zip -O /tmp/gatk-${gatk_version}.zip\ + && unzip /tmp/gatk-${gatk_version}.zip -d /opt/ \ + && rm /tmp/gatk-${gatk_version}.zip -f + +ENV CLOCKWORK_CORTEX_DIR=/cortex \ +PATH=${PATH}:/clockwork/python/scripts:/opt/gatk-${gatk_version} \ +PICARD_JAR=/usr/local/bin/picard.jar + +ENV LC_ALL en_US.UTF-8 \ +LANG en_US.UTF-8 \ +LANGUAGE en_US.UTF-8 + + diff --git a/docker/Dockerfile.tbprofiler-0.9.9 b/docker/Dockerfile.tbprofiler-0.9.9 index 42c3832..d9d5210 100644 --- a/docker/Dockerfile.tbprofiler-0.9.9 +++ b/docker/Dockerfile.tbprofiler-0.9.9 @@ -42,8 +42,7 @@ RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest| tar -xvj bin # install tb-profiler via bioconda; install into 'base' conda env RUN micromamba install --yes --name base --channel conda-forge --channel bioconda \ tb-profiler=${TBPROFILER_VER} - -RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 +RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 RUN micromamba install --yes --name base --channel conda-forge --channel bioconda samtools RUN micromamba install --yes --name base --channel conda-forge jq RUN micromamba clean --all --yes diff --git a/docker/Dockerfile.tbtamr-0.9.9 b/docker/Dockerfile.tbtamr-0.9.9 index 79c960f..3043027 100644 --- a/docker/Dockerfile.tbtamr-0.9.9 +++ b/docker/Dockerfile.tbtamr-0.9.9 @@ -2,6 +2,9 @@ FROM ubuntu:jammy WORKDIR / +ENV freebayes_version=1.3.6 \ + tbtamr_version=0.0.4 + # LABEL instructions tag the image with metadata that might be important to the user LABEL base.image="ubuntu:jammy" LABEL dockerfile.version="0.9.9" diff --git a/docker/Dockerfile.vcfpredict-0.9.9r1 b/docker/Dockerfile.vcfpredict-0.9.9r1 new file mode 100644 index 0000000..39e867d --- /dev/null +++ b/docker/Dockerfile.vcfpredict-0.9.9r1 @@ -0,0 +1,26 @@ +FROM ubuntu:20.04 + +LABEL maintainer="pricea35@cardiff.ac.uk" \ +about.summary="container for the vcf predict workflow" + +#add run-vcf to container +COPY bin/ /opt/bin/ +ENV PATH=/opt/bin:$PATH + +ENV PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" \ +PYTHON="python3 python3-pip python3-dev" + +ENV vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 + +#apt updates +RUN apt-get update \ +&& DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \ +&& apt-get install -y $PACKAGES $PYTHON \ +&& apt-get install -y python3-packaging \ +&& git clone https://github.com/whalleyt/VCFMIX.git \ +&& cd VCFMIX \ +&& pip3 install recursive_diff \ +&& pip3 install awscli \ +&& pip3 install . \ +&& cp -r data /usr/local/lib/python3.8/dist-packages \ +&& cd .. diff --git a/main.nf b/main.nf index 9fdbe51..0f08724 100644 --- a/main.nf +++ b/main.nf @@ -85,11 +85,10 @@ nextflow run main.nf -profile docker --filetype bam --input_dir bam_dir --unmix_ } -resistance_profilers = ["tb-profiler", "tbtamr", "none"] +resistance_profilers = ["tb-profiler", "tbtamr"] if(!resistance_profilers.contains(params.resistance_profiler)){ - exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler", "tbtamr" \ - or "none" to skip.' + exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler" or "tbtamr"' } @@ -199,13 +198,10 @@ workflow { clockwork(preprocessing_output) // VCFPREDICT SUB-WORKFLOW - sample_and_fastqs = clockwork.out.sample_and_fastqs - mpileup_vcf = clockwork.out.mpileup_vcf - minos_vcf = clockwork.out.minos_vcf - reference = clockwork.out.reference - bam = clockwork.out.bam + profiler_input_vcf = clockwork.out.profiler_input_vcf + profiler_input_fq = clockwork.out.profiler_input_fq - vcfpredict(sample_and_fastqs, bam, mpileup_vcf, minos_vcf, reference) + vcfpredict(profiler_input_fq, profiler_input_vcf) } diff --git a/modules/clockworkModules.nf b/modules/clockworkModules.nf index 5ad1772..2c90f15 100644 --- a/modules/clockworkModules.nf +++ b/modules/clockworkModules.nf @@ -233,7 +233,7 @@ process minos { output: tuple val(sample_name), path(report_json), path(bam), path(ref), emit: minos_bam - tuple val(sample_name), path("${sample_name}.minos.vcf"), stdout, emit: minos_vcf + tuple val(sample_name), path("${sample_name}_allelic_depth.minos.vcf"), stdout, emit: minos_vcf tuple val(sample_name), path("${sample_name}_report.json"), emit: minos_report path "${sample_name}_err.json", emit: minos_log optional true @@ -259,11 +259,23 @@ process minos { cp minos/final.vcf ${minos_vcf} rm -rf minos - top_hit=\$(jq -r '.top_hit.name' ${report_json}) + samtools faidx $ref + samtools dict $ref -o ${ref.baseName}.dict + mkdir tmp + + gatk VariantAnnotator -R $ref -I $bam -V $minos_vcf -A DepthPerAlleleBySample -O ${sample_name}_allelic_depth.minos.vcf --tmp-dir tmp + + top_hit=\$(jq -r '.top_hit.file_paths.ref_fa' ${report_json}) cp ${sample_name}_report.json ${sample_name}_report_previous.json - if [[ \$top_hit =~ ^"Mycobacterium tuberculosis" ]]; then printf "CREATE_ANTIBIOGRAM_${sample_name}"; else echo '{"resistance-profiling-warning":"sample is not TB so cannot produce antibiogram using resistance profiling tools"}' | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi + if [[ \$top_hit =~ "/tuberculosis.fasta" ]]; then + printf "CREATE_ANTIBIOGRAM_${sample_name}" + else + printf "CREATE_NTM_ANTIBIOGRAM_${sample_name}" + echo '{"resistance-profiling-warning":"sample is not TB so cannot produce antibiogram using resistance profiling tools"}' \ + | jq '.' > ${error_log} && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json} + fi """ stub: @@ -299,6 +311,9 @@ process gvcf { path("${sample_name}.fa", emit: gvcf_fa) path "${sample_name}_err.json", emit: gvcf_log optional true path "${sample_name}_report.json", emit: gvcf_report optional true + tuple val(sample_name), path(minos_vcf), path(report_json), emit: vcfmix_input + tuple val(sample_name), path(minos_vcf), path(report_json), path(bam), path(ref), val(isSampleTB), emit: tbprofiler + tuple val(sample_name), path(report_json), path(minos_vcf), val(isSampleTB), emit: gvcf_report_resistance script: gvcf = "${sample_name}.gvcf.vcf" diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf index a6df54f..cb86b5c 100644 --- a/modules/vcfpredictModules.nf +++ b/modules/vcfpredictModules.nf @@ -27,13 +27,11 @@ process vcfmix { error_log = "${sample_name}_err.json" """ - run-vcfmix.py ${bcftools_vcf} + run-vcfmix.py ${vcf} cp ${sample_name}_report.json ${sample_name}_report_previous.json jq -s ".[0] * .[1]" ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json} - - if [ ${params.resistance_profiler} == "none" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}; fi """ stub: @@ -74,14 +72,12 @@ process tbprofiler { publishDir "${params.output_dir}${sample_name}", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}' input: - val(sample_name) - path(minos_vcf) - path(report_json) - val(isSampleTB) + tuple val(sample_name), path(minos_vcf), path(report_json), val(isSampleTB) output: tuple val(sample_name), path("${sample_name}.tbprofiler-out.json"), path("${sample_name}_report.json"), emit: tbprofiler_json path("${sample_name}/${sample_name}.results.json"), emit: collate_json + tuple val(sample_name), path(minos_vcf), path(report_json), emit: vcfmix_in when: isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/ @@ -91,7 +87,10 @@ process tbprofiler { tbprofiler_json = "${sample_name}.tbprofiler-out.json" """ + #keep the original vcf so we can collate the output and pass it down + cp ${minos_vcf} tmp.vcf bgzip ${minos_vcf} + mv tmp.vcf ${minos_vcf} mkdir tmp tb-profiler profile --vcf ${minos_vcf}.gz --threads ${task.cpus} --temp tmp --prefix ${sample_name} @@ -108,8 +107,10 @@ process tbprofiler { stub: """ + mkdir ${sample_name} touch ${sample_name}.tbprofiler-out.json touch ${sample_name}_report.json + touch ${sample_name}/${sample_name}.results.json """ } @@ -120,14 +121,15 @@ process ntmprofiler { label 'ntmprofiler' input: - tuple val(sample_name), path(fq1), path(fq2), path(report_json), val(isSampleTB) + tuple val(sample_name), path(fq1), path(fq2), path(report_json), path(vcf), val(isSampleNTM) output: tuple val(sample_name), path("${sample_name}.ntmprofiler-out.json"), path("${sample_name}_report.json"), emit: ntmprofiler_json path("${sample_name}.results.json"), emit: collate_json + tuple val(sample_name), path(vcf), path(report_json), emit: vcfmix_in when: - isSampleTB != /CREATE\_ANTIBIOGRAM\_${sample_name}/ + isSampleNTM =~ /CREATE\_NTM\_ANTIBIOGRAM\_${sample_name}/ script: error_log = "${sample_name}_err.json" @@ -145,6 +147,13 @@ process ntmprofiler { jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${ntmprofiler_json} > ${report_json} """ + + stub: + """ + touch ${sample_name}.ntmprofiler-out.json + touch ${sample_name}_report.json + touch ${sample_name}.results.json + """ } process tbtamr { @@ -157,11 +166,12 @@ process tbtamr { publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}' input: - tuple val(sample_name), path(fq1), path(fq2), path(report_json), val(isSampleTB) + tuple val(sample_name), path(fq1), path(fq2), path(report_json), path(vcf), val(isSampleTB) output: tuple val(sample_name), path("${sample_name}.tbtamr-out.json"), path("${sample_name}_report.json"), emit: tbtamr_json path(sample_name), emit: collate_json + tuple val(sample_name), path(vcf), path(report_json), emit: vcfmix_in when: isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/ @@ -258,15 +268,11 @@ process add_allelic_depth { label 'tbprofiler' input: - val(sample_name) - path(minos_vcf) - path(bam) - path(reference) - val(isSampleTB) + tuple val(sample_name), path(minos_vcf), path(report_json), path(bam), path(reference), val(isSampleTB) output: - path("${sample_name}_allelic_depth.minos.vcf") - + tuple val(sample_name), path("${sample_name}_allelic_depth.minos.vcf"), path(report_json), val(isSampleTB) + when: isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/ diff --git a/nextflow.config b/nextflow.config index f26774c..95a3eca 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,9 +31,6 @@ params { // name of the bowtie index, e.g. hg19_1kgmaj bowtie_index_name = "hg19_1kgmaj" - // run VCFMIX 'yes' or 'no' (set to no for synthetic samples) - vcfmix = 'yes' - // resistance params resistance_profiler = "tb-profiler" update_tbprofiler = "no" diff --git a/singularity/Singularity.clockwork-0.9.9r1 b/singularity/Singularity.clockwork-0.9.9r1 new file mode 100644 index 0000000..b4ab6b9 --- /dev/null +++ b/singularity/Singularity.clockwork-0.9.9r1 @@ -0,0 +1,168 @@ +Bootstrap: docker +From: ubuntu:focal +Stage: spython-base + +%files +bin/ /opt/bin/ +%labels +maintainer="pricea35@cardiff.ac.uk" +about.summary="container for the clockwork workflow" +%post + + + +samtools_version=1.12 +htslib_version=1.12 +bcftools_version=1.12 +minimap2_version=2.17 +picard_version=2.18.16 +gramtools_version=8af53f6c8c0d72ef95223e89ab82119b717044f2 +vt_version=2187ff6347086e38f71bd9f8ca622cd7dcfbb40c +minos_version=0.11.0 +cortex_version=3a235272e4e0121be64527f01e73f9e066d378d3 +vcftools_version=0.1.15 +mccortex_version=97aba198d632ee98ac1aa496db33d1a7a8cb7e51 +stampy_version=1.0.32r3761 +python_version=3.6.5 +clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 +gatk_version=4.6.0.0 + +PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" +PYTHON="python2.7 python-dev" + +PATH=/opt/bin:$PATH + +apt-get update \ +&& DEBIAN_FRONTEND=noninteractive apt-get install -y $PACKAGES $PYTHON \ +&& curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \ +&& cd Python-${python_version} \ +&& ./configure --enable-optimizations \ +&& make altinstall \ +&& cd .. \ +&& ln -s /usr/local/bin/python3.6 /usr/local/bin/python3 \ +&& ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \ +&& pip3 install --upgrade pip \ +&& pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools awscli \ +&& apt-get update + +#update jdk +wget https://download.java.net/java/GA/jdk18/43f95e8614114aeaa8e8a5fcf20a682d/36/GPL/openjdk-18_linux-x64_bin.tar.gz +tar -xvf openjdk-18_linux-x64_bin.tar.gz +mv jdk-18* /opt/ +JAVA_HOME=/opt/jdk-18 +PATH=$PATH:$JAVA_HOME/bin + + +curl -fsSL https://github.com/samtools/samtools/archive/${samtools_version}.tar.gz | tar -xz \ +&& curl -fsSL https://github.com/samtools/htslib/releases/download/${htslib_version}/htslib-${htslib_version}.tar.bz2 | tar -xj \ +&& make -C samtools-${samtools_version} -j HTSDIR=../htslib-${htslib_version} \ +&& make -C samtools-${samtools_version} -j HTSDIR=../htslib-${htslib_version} prefix=/usr/local install \ +&& rm -r samtools-${samtools_version} \ +&& curl -fsSL https://github.com/samtools/bcftools/archive/refs/tags/${bcftools_version}.tar.gz | tar -xz \ +&& make -C bcftools-${bcftools_version} -j HTSDIR=../htslib-${htslib_version} \ +&& make -C bcftools-${bcftools_version} -j HTSDIR=../htslib-${htslib_version} prefix=/usr/local install \ +&& rm -r bcftools-${bcftools_version} + + +curl -fsSL minimap2-${minimap2_version}.tar.gz https://github.com/lh3/minimap2/archive/v${minimap2_version}.tar.gz | tar -xz \ +&& cd minimap2-${minimap2_version} \ +&& make \ +&& chmod +x minimap2 \ +&& mv minimap2 /usr/local/bin \ +&& cd .. \ +&& rm -r minimap2-${minimap2_version} \ +&& wget https://github.com/broadinstitute/picard/releases/download/${picard_version}/picard.jar -O /usr/local/bin/picard.jar + + +git clone https://github.com/atks/vt.git vt-git \ +&& cd vt-git \ +&& git checkout ${vt_version} \ +&& make \ +&& cd .. \ +&& mv vt-git/vt /usr/local/bin \ +&& pip3 install tox "six>=1.14.0" \ +&& git clone https://github.com/iqbal-lab-org/gramtools \ +&& cd gramtools \ +&& git checkout ${gramtools_version} \ +&& pip3 install . \ +&& cd .. \ +&& pip3 install cython \ +&& pip3 install git+https://github.com/iqbal-lab-org/minos@v${minos_version} + + +git clone --recursive https://github.com/iqbal-lab/cortex.git \ +&& cd cortex \ +&& git checkout ${cortex_version} \ +&& bash install.sh \ +&& make NUM_COLS=1 cortex_var \ +&& make NUM_COLS=2 cortex_var \ +&& cd .. \ +&& mkdir bioinf-tools \ +&& cd bioinf-tools \ +&& curl -fsSL http://www.well.ox.ac.uk/~gerton/software/Stampy/stampy-${stampy_version}.tgz | tar -xz \ +&& make -C stampy-* \ +&& cp -s stampy-*/stampy.py . \ +&& curl -fsSL https://github.com/vcftools/vcftools/releases/download/v${vcftools_version}/vcftools-${vcftools_version}.tar.gz | tar -xz \ +&& cd vcftools-${vcftools_version} \ +&& ./configure --prefix $PWD/install \ +&& make && make install \ +&& ln -s src/perl/ . \ +&& cd .. \ +&& git clone --recursive https://github.com/mcveanlab/mccortex \ +&& cd mccortex \ +&& git checkout ${mccortex_version} \ +&& make all \ +&& cd .. \ +&& cp -s mccortex/bin/mccortex31 . \ +&& cd .. \ +&& git clone https://github.com/iqbal-lab-org/clockwork \ +&& cd clockwork \ +&& git checkout ${clockwork_version} \ +&& cd python \ +&& pip3 install . \ +&& chmod +x scripts/clockwork + +wget https://github.com/broadinstitute/gatk/releases/download/${gatk_version}/gatk-${gatk_version}.zip -O /tmp/gatk-${gatk_version}.zip\ +&& unzip /tmp/gatk-${gatk_version}.zip -d /opt/ \ +&& rm /tmp/gatk-${gatk_version}.zip -f + +CLOCKWORK_CORTEX_DIR=/cortex +PATH=${PATH}:/clockwork/python/scripts:/opt/gatk-${gatk_version} +PICARD_JAR=/usr/local/bin/picard.jar + +LC_ALL=en_US.UTF-8 +LANG=en_US.UTF-8 +LANGUAGE=en_US.UTF-8 + + +%environment +export samtools_version=1.12 +export htslib_version=1.12 +export bcftools_version=1.12 +export minimap2_version=2.17 +export picard_version=2.18.16 +export gramtools_version=8af53f6c8c0d72ef95223e89ab82119b717044f2 +export vt_version=2187ff6347086e38f71bd9f8ca622cd7dcfbb40c +export minos_version=0.11.0 +export cortex_version=3a235272e4e0121be64527f01e73f9e066d378d3 +export vcftools_version=0.1.15 +export mccortex_version=97aba198d632ee98ac1aa496db33d1a7a8cb7e51 +export stampy_version=1.0.32r3761 +export python_version=3.6.5 +export clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 +export gatk_version=4.6.0.0 +export PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" +export PYTHON="python2.7 python-dev" +export PATH=/opt/bin:$PATH +export JAVA_HOME=/opt/jdk-18 +export PATH=$PATH:$JAVA_HOME/bin +export CLOCKWORK_CORTEX_DIR=/cortex +export PATH=${PATH}:/clockwork/python/scripts:/opt/gatk-${gatk_version} +export PICARD_JAR=/usr/local/bin/picard.jar +export LC_ALL=en_US.UTF-8 +export LANG=en_US.UTF-8 +export LANGUAGE=en_US.UTF-8 +%runscript +exec /bin/bash "$@" +%startscript +exec /bin/bash "$@" diff --git a/singularity/Singularity.tbtamr-0.9.9 b/singularity/Singularity.tbtamr-0.9.9 index 8908ce6..7be1dc1 100644 --- a/singularity/Singularity.tbtamr-0.9.9 +++ b/singularity/Singularity.tbtamr-0.9.9 @@ -14,6 +14,9 @@ maintainer3.email="twhalley93@gmail.com" mkdir -p / cd / +freebayes_version=1.3.6 +tbtamr_version=0.0.4 + # LABEL instructions tag the image with metadata that might be important to the user #set env for root prefix @@ -33,7 +36,7 @@ curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest| tar -xvj bin/mic micromamba install --yes --name base --channel conda-forge --channel bioconda jq requests xlsxwriter tbtamr micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 micromamba install --yes --name base --channel conda-forge --channel bioconda samtools -micromamba install --yes bioconda freebayes==1.3.6 #STDERR in current version of freebayes +micromamba install --yes --name base --channel conda-forge --channel bioconda freebayes==1.3.6 #STDERR in current version of freebayes micromamba clean --all --yes # hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time @@ -45,6 +48,8 @@ cd /data #wants full path to reference tbtamr setup %environment +export freebayes_version=1.3.6 +export tbtamr_version=0.0.4 export MAMBA_ROOT_PREFIX="/opt/conda" export PATH="/opt/conda/bin:${PATH}" %runscript diff --git a/singularity/Singularity.vcfpredict-0.9.9r1 b/singularity/Singularity.vcfpredict-0.9.9r1 new file mode 100644 index 0000000..b7860ff --- /dev/null +++ b/singularity/Singularity.vcfpredict-0.9.9r1 @@ -0,0 +1,42 @@ +Bootstrap: docker +From: ubuntu:20.04 +Stage: spython-base + +%files +bin/ /opt/bin/ +%labels +maintainer="pricea35@cardiff.ac.uk" +about.summary="container for the vcf predict workflow" +%post + + +#add run-vcf to container +PATH=/opt/bin:$PATH + +PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" +PYTHON="python3 python3-pip python3-dev" + +vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 + +#apt updates +apt-get update \ +&& DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \ +&& apt-get install -y $PACKAGES $PYTHON \ +&& apt-get install -y python3-packaging \ +&& git clone https://github.com/whalleyt/VCFMIX.git \ +&& cd VCFMIX \ +&& git checkout ${vcfmix_version} \ +&& pip3 install recursive_diff \ +&& pip3 install awscli \ +&& pip3 install . \ +&& cp -r data /usr/local/lib/python3.8/dist-packages \ +&& cd .. +%environment +export PATH=/opt/bin:$PATH +export PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" +export PYTHON="python3 python3-pip python3-dev" +export vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 +%runscript +exec /bin/bash "$@" +%startscript +exec /bin/bash "$@" diff --git a/testing.config b/testing.config index e8edf8b..9400bb5 100644 --- a/testing.config +++ b/testing.config @@ -1,5 +1,6 @@ // E.g. to run: NXF_VER=20.11.0-edge nextflow run main.nf -stub -config testing.config + // dry-run parameters // OK or null diff --git a/workflows/clockwork.nf b/workflows/clockwork.nf index af04335..940546e 100644 --- a/workflows/clockwork.nf +++ b/workflows/clockwork.nf @@ -38,11 +38,11 @@ workflow clockwork { gvcf(alignToRef.out.alignToRef_bam.join(minos.out.minos_vcf, by: 0)) - emit: - sample_and_fastqs = input_seqs_json.map{it[0,1,2]} - mpileup_vcf = callVarsMpileup.out.mpileup_vcf.join(minos.out.minos_report, by: 0) - minos_vcf = minos.out.minos_vcf.join(alignToRef.out.alignToRef_report, by: 0) - reference = getRefFromJSON.out - bam = alignToRef.out.alignToRef_bam + report_for_ntm = gvcf.out.gvcf_report_resistance + sample_and_fqs = input_seqs_json.map{it[0,1,2]} + profiler_input_fq = sample_and_fqs.join(report_for_ntm, by:0) + emit: + profiler_input_vcf = gvcf.out.tbprofiler + profiler_input_fq = profiler_input_fq } diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf index 773e114..1c07f49 100644 --- a/workflows/vcfpredict.nf +++ b/workflows/vcfpredict.nf @@ -16,35 +16,16 @@ include {ntmprofiler_collate} from '../modules/vcfpredictModules.nf' params(para workflow vcfpredict { take: - sample_and_fastqs - clockwork_bam - clockwork_bcftools_tuple - minos_vcf_tuple - reference_fasta - + profiler_input_fq + profiler_input_vcf main: - - if ( params.vcfmix == "yes" ) { - - vcfmix(clockwork_bcftools_tuple) - - } - - //get just the vcf - sample_name = minos_vcf_tuple.map{it[0]} - minos_vcf = minos_vcf_tuple.map{it[1]} - do_we_resistance_profile = minos_vcf_tuple.map{it[2]} - report_json = minos_vcf_tuple.map{it[3]} - bam = clockwork_bam.map{it[2]} - fastq_and_report = sample_and_fastqs.combine(report_json).combine(do_we_resistance_profile) - //ntm-profiling: e.g. everything down being passed into tbtamr/tb-profiler //at the moment it is only ran on fastqs; need to find a sensible way //of linking up the references - ntmprofiler(fastq_and_report) + ntmprofiler(profiler_input_fq) - ntm_profiling_json = ntmprofiler.out.ntmprofiler_json + ntm_profiling_out = ntmprofiler.out.vcfmix_in if(params.collate == "yes"){ collated_ntm_jsons = ntmprofiler.out.collate_json.collect() @@ -58,26 +39,26 @@ workflow vcfpredict { tbprofiler_update_db(reference_fasta) } - //add allelic depth back in: was calculated in mpileup but lost in minos - add_allelic_depth(sample_name, minos_vcf, bam, reference_fasta, do_we_resistance_profile) //run tb-profiler - tbprofiler(sample_name, add_allelic_depth.out, report_json, do_we_resistance_profile) - profiling_json = tbprofiler.out.tbprofiler_json + tbprofiler(profiler_input_vcf) + + tb_profiling_out = tbprofiler.out.vcfmix_in + if(params.collate == "yes"){ collated_jsons = tbprofiler.out.collate_json.collect() tbprofiler_collate(collated_jsons) } } else if (params.resistance_profiler == "tbtamr"){ - tbtamr(fastq_and_report) - profiling_json = tbtamr.out.tbtamr_json + tbtamr(profiler_input_fq) + + tb_profiling_out = tbtamr.out.vcfmix_in + if(params.collate == "yes"){ collated_jsons = tbtamr.out.collate_json.collect() tbtamr_collate(collated_jsons) } } - if (params.vcfmix == "yes" && params.resistance_profiler != "none"){ - profiling_jsons = profiling_json.combine(ntm_profiling_json) - finalJson(vcfmix.out.vcfmix_json.join(profiling_json, by: 0)) - } + profiling_jsons = ntm_profiling_out.mix(tb_profiling_out) + vcfmix(profiling_jsons) }