From 402a4ccd54b4174454599bd77ae1b07cb8f11c15 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Tue, 10 Sep 2024 13:52:54 +0000 Subject: [PATCH 01/32] change regex --- bin/identify_tophit_and_contaminants2.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bin/identify_tophit_and_contaminants2.py b/bin/identify_tophit_and_contaminants2.py index 93f8547..989f77b 100755 --- a/bin/identify_tophit_and_contaminants2.py +++ b/bin/identify_tophit_and_contaminants2.py @@ -358,9 +358,12 @@ def process_reports(afanc_json_path, kraken_json_path, supposed_species, unmix_m out['summary_questions']['were_contaminants_removed'] = 'no' # IS THE TOP SPECIES HIT ONE OF THE 10 ACCEPTABLE POSSIBILITIES? IF SO, PROVIDE A LINK TO THE REFERENCE GENOME - re_top_species = re.findall(r"^(Mycobact|Mycolicibac)\w+ (abscessus|africanum|avium|bovis|chelonae|chimaera|fortuitum|intracellulare|kansasii|tuberculosis).*?$", top_species) + re_top_species = re.findall(r"^(Mycobact|Mycolicibac)\w+ (abscessus|africanum|avium|bovis|chelonae|chimaera|fortuitum|intracellulare|kansasii|tuberculosis) ()\w+ (bovis|orgis|caprae).*?$", top_species) if len(re_top_species) > 0: - identified_species = re_top_species[0][1] + if len(re_top_species[0]) == 2: + identified_species = re_top_species[0][1] + else: + identified_species = re_top_species[0][3] #we have bovis (or orgis/caprae) with variant in the name if supposed_species == 'null': out['summary_questions']['is_the_top_species_appropriate'] = 'yes' elif ((supposed_species != 'null') & (supposed_species == identified_species)): From 85e729a8a2e410711926d0789bc1cadc8ac29866 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Tue, 10 Sep 2024 13:55:20 +0000 Subject: [PATCH 02/32] push build --- .github/workflows/build-push-quay.yml | 2 +- docker/Dockerfile.preprocessing-0.9.9 | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-push-quay.yml b/.github/workflows/build-push-quay.yml index af2c572..7abc3b2 100644 --- a/.github/workflows/build-push-quay.yml +++ b/.github/workflows/build-push-quay.yml @@ -3,7 +3,7 @@ on: push: branches: - main - - ntmprofiler + - bcg paths: - '**/Dockerfile*' - "bin/" diff --git a/docker/Dockerfile.preprocessing-0.9.9 b/docker/Dockerfile.preprocessing-0.9.9 index 0c4da95..ab92979 100644 --- a/docker/Dockerfile.preprocessing-0.9.9 +++ b/docker/Dockerfile.preprocessing-0.9.9 @@ -1,5 +1,6 @@ FROM ubuntu:focal + LABEL maintainer="pricea35@cardiff.ac.uk" \ about.summary="container for the preprocessing workflow" From 9e2c8ab8415b6b99375622012158870ab2b2a140 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Tue, 10 Sep 2024 14:47:51 +0000 Subject: [PATCH 03/32] bump --- docker/Dockerfile.preprocessing-0.9.9 | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/Dockerfile.preprocessing-0.9.9 b/docker/Dockerfile.preprocessing-0.9.9 index ab92979..0c4da95 100644 --- a/docker/Dockerfile.preprocessing-0.9.9 +++ b/docker/Dockerfile.preprocessing-0.9.9 @@ -1,6 +1,5 @@ FROM ubuntu:focal - LABEL maintainer="pricea35@cardiff.ac.uk" \ about.summary="container for the preprocessing workflow" From 599b88a2d978969bdb704ecdd8125f9725ea0035 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Tue, 10 Sep 2024 15:49:50 +0000 Subject: [PATCH 04/32] bcg --- bin/identify_tophit_and_contaminants2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/identify_tophit_and_contaminants2.py b/bin/identify_tophit_and_contaminants2.py index 989f77b..c396d3b 100755 --- a/bin/identify_tophit_and_contaminants2.py +++ b/bin/identify_tophit_and_contaminants2.py @@ -358,7 +358,10 @@ def process_reports(afanc_json_path, kraken_json_path, supposed_species, unmix_m out['summary_questions']['were_contaminants_removed'] = 'no' # IS THE TOP SPECIES HIT ONE OF THE 10 ACCEPTABLE POSSIBILITIES? IF SO, PROVIDE A LINK TO THE REFERENCE GENOME - re_top_species = re.findall(r"^(Mycobact|Mycolicibac)\w+ (abscessus|africanum|avium|bovis|chelonae|chimaera|fortuitum|intracellulare|kansasii|tuberculosis) ()\w+ (bovis|orgis|caprae).*?$", top_species) + re_top_species = re.findall(r"^(Mycobact|Mycolicibac)\w+ (abscessus|africanum|avium|bovis|chelonae|chimaera|fortuitum|intracellulare|kansasii|tuberculosis).*?$", top_species) + re_top_variant = re.findall(r"^(Mycobact|Mycolicibac)\w+ (abscessus|africanum|avium|bovis|chelonae|chimaera|fortuitum|intracellulare|kansasii|tuberculosis) ()\w+ (bovis|orgis|caprae).*?$", top_species) + if len(re_top_variant) != 0: + re_top_species = re_top_variant if len(re_top_species) > 0: if len(re_top_species[0]) == 2: identified_species = re_top_species[0][1] From 36a244bb26dc13f005da6b3907234173b8ced65d Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Tue, 10 Sep 2024 15:50:16 +0000 Subject: [PATCH 05/32] bump --- docker/Dockerfile.preprocessing-0.9.9 | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/Dockerfile.preprocessing-0.9.9 b/docker/Dockerfile.preprocessing-0.9.9 index 0c4da95..ab92979 100644 --- a/docker/Dockerfile.preprocessing-0.9.9 +++ b/docker/Dockerfile.preprocessing-0.9.9 @@ -1,5 +1,6 @@ FROM ubuntu:focal + LABEL maintainer="pricea35@cardiff.ac.uk" \ about.summary="container for the preprocessing workflow" From 2bf69fc52ed823b0c135724a313a792e124c09c1 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Wed, 11 Sep 2024 08:29:34 +0000 Subject: [PATCH 06/32] update recipes --- docker/Dockerfile.tbtamr-0.9.9 | 3 +++ singularity/Singularity.tbtamr-0.9.9 | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile.tbtamr-0.9.9 b/docker/Dockerfile.tbtamr-0.9.9 index 79c960f..3043027 100644 --- a/docker/Dockerfile.tbtamr-0.9.9 +++ b/docker/Dockerfile.tbtamr-0.9.9 @@ -2,6 +2,9 @@ FROM ubuntu:jammy WORKDIR / +ENV freebayes_version=1.3.6 \ + tbtamr_version=0.0.4 + # LABEL instructions tag the image with metadata that might be important to the user LABEL base.image="ubuntu:jammy" LABEL dockerfile.version="0.9.9" diff --git a/singularity/Singularity.tbtamr-0.9.9 b/singularity/Singularity.tbtamr-0.9.9 index 8908ce6..7be1dc1 100644 --- a/singularity/Singularity.tbtamr-0.9.9 +++ b/singularity/Singularity.tbtamr-0.9.9 @@ -14,6 +14,9 @@ maintainer3.email="twhalley93@gmail.com" mkdir -p / cd / +freebayes_version=1.3.6 +tbtamr_version=0.0.4 + # LABEL instructions tag the image with metadata that might be important to the user #set env for root prefix @@ -33,7 +36,7 @@ curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest| tar -xvj bin/mic micromamba install --yes --name base --channel conda-forge --channel bioconda jq requests xlsxwriter tbtamr micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 micromamba install --yes --name base --channel conda-forge --channel bioconda samtools -micromamba install --yes bioconda freebayes==1.3.6 #STDERR in current version of freebayes +micromamba install --yes --name base --channel conda-forge --channel bioconda freebayes==1.3.6 #STDERR in current version of freebayes micromamba clean --all --yes # hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time @@ -45,6 +48,8 @@ cd /data #wants full path to reference tbtamr setup %environment +export freebayes_version=1.3.6 +export tbtamr_version=0.0.4 export MAMBA_ROOT_PREFIX="/opt/conda" export PATH="/opt/conda/bin:${PATH}" %runscript From 520d7574a1226e59cf20b5cc6fa5cb9b146cba87 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Wed, 11 Sep 2024 09:45:31 +0000 Subject: [PATCH 07/32] lineage dictionary --- bin/identify_tophit_and_contaminants2.py | 7 +++++++ docker/Dockerfile.preprocessing-0.9.9 | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/bin/identify_tophit_and_contaminants2.py b/bin/identify_tophit_and_contaminants2.py index c396d3b..2c0f743 100755 --- a/bin/identify_tophit_and_contaminants2.py +++ b/bin/identify_tophit_and_contaminants2.py @@ -365,6 +365,13 @@ def process_reports(afanc_json_path, kraken_json_path, supposed_species, unmix_m if len(re_top_species) > 0: if len(re_top_species[0]) == 2: identified_species = re_top_species[0][1] + #deal with lineages + lineage_dict = {"La1.": "bovis", + "La2.": "caprae", + "La3.": "orygis"} + for lineage in lineage_dict: + if lineage in top_species: + identified_species = lineage_dict[lineage] else: identified_species = re_top_species[0][3] #we have bovis (or orgis/caprae) with variant in the name if supposed_species == 'null': diff --git a/docker/Dockerfile.preprocessing-0.9.9 b/docker/Dockerfile.preprocessing-0.9.9 index ab92979..0c4da95 100644 --- a/docker/Dockerfile.preprocessing-0.9.9 +++ b/docker/Dockerfile.preprocessing-0.9.9 @@ -1,6 +1,5 @@ FROM ubuntu:focal - LABEL maintainer="pricea35@cardiff.ac.uk" \ about.summary="container for the preprocessing workflow" From 445977243e5746dbfc62c4ba829524a454ab02c1 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Wed, 11 Sep 2024 10:01:32 +0000 Subject: [PATCH 08/32] update readme --- README.md | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 55ec2b4..83e315f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,20 @@ ![Build Status](https://github.com/Pathogen-Genomics-Cymru/lodestone/workflows/build-push-quay/badge.svg) ![Build Status](https://github.com/Pathogen-Genomics-Cymru/lodestone/workflows/pytest/badge.svg) ![Build Status](https://github.com/Pathogen-Genomics-Cymru/lodestone/workflows/stub-run/badge.svg) - + +## Table of Contents +- [What is Lodestone](#-what-is-lodestone) +- [Quick Start](#-quick-start) +- [Executors](#-executors) +- [System Requirements](#-system-requirements) +- [Parameters](#-parameters) +- [Stub Runs](#-stub-runs) +- [Checkpoints](#-checkpoints) +- [Acknowledgments](#-acknowledgements) +- [License](#-license) + +## What is Lodestone? + This pipeline takes as input reads presumed to be from one of 10 mycobacterial genomes: abscessus, africanum, avium, bovis, chelonae, chimaera, fortuitum, intracellulare, kansasii, tuberculosis. Input should be in the form of one directory containing pairs of fastq(.gz) or bam files. Pipeline cleans and QCs reads with fastp and FastQC, classifies with Kraken2 & Afanc, removes non-bacterial content, and - by alignment to any minority genomes - disambiguates mixtures of bacterial reads. Cleaned reads are aligned to either of the 10 supported genomes and variants called. Produces as output one directory per sample, containing cleaned fastqs, sorted, indexed BAM, VCF, F2 and F47 statistics, an antibiogram and summary reports. @@ -40,7 +53,7 @@ By default, the pipeline will just run on the local machine. To run on a cluster ### System Requirements ### Minimum recommended requirements: 32GB RAM, 8CPU -## Params ## +## Paramaters ## The following parameters should be set in `nextflow.config` or specified on the command line: * **input_dir**
@@ -84,7 +97,7 @@ For more information on the parameters run `nextflow run main.nf --help` The path to the singularity images can also be changed in the singularity profile in `nextflow.config`. Default value is `${baseDir}/singularity` -## Stub-run ## +## Stub runs ## To test the stub run: ``` NXF_VER=20.11.0-edge nextflow run main.nf -stub -config testing.config @@ -150,3 +163,5 @@ For a list of direct authors of this pipeline, please see the contributors list. The preprocessing sub-workflow is based on the preprocessing nextflow DSL1 pipeline written by Stephen Bush, University of Oxford. The clockwork sub-workflow uses aspects of the variant calling workflow from https://github.com/iqbal-lab-org/clockwork, lead author Martin Hunt, Iqbal Lab at EMBL-EBI +## License +The tool is licensed under the V3 GNU Affero GPL license. Please see the [LICENSE](LICENSE) file for more details. From 803358b2122ee43b6cf8f2f03c433fbb558ff883 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Wed, 11 Sep 2024 10:03:35 +0000 Subject: [PATCH 09/32] update readme --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 83e315f..c321523 100644 --- a/README.md +++ b/README.md @@ -4,14 +4,14 @@ ![Build Status](https://github.com/Pathogen-Genomics-Cymru/lodestone/workflows/stub-run/badge.svg) ## Table of Contents -- [What is Lodestone](#-what-is-lodestone) -- [Quick Start](#-quick-start) -- [Executors](#-executors) -- [System Requirements](#-system-requirements) -- [Parameters](#-parameters) -- [Stub Runs](#-stub-runs) -- [Checkpoints](#-checkpoints) -- [Acknowledgments](#-acknowledgements) +- [What is Lodestone](#what-is-lodestone) +- [Quick Start](#quick-start) +- [Executors](#executors) +- [System Requirements](#system-requirements) +- [Parameters](#parameters) +- [Stub Runs](#stub-runs) +- [Checkpoints](#checkpoints) +- [Acknowledgments](#acknowledgements) - [License](#-license) ## What is Lodestone? From 7d44e1ace9c4b495193628d460d36801d612f759 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Wed, 11 Sep 2024 11:39:11 +0000 Subject: [PATCH 10/32] change logic for how we proceed tb or no tb --- modules/clockworkModules.nf | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/modules/clockworkModules.nf b/modules/clockworkModules.nf index 5ad1772..1e269ab 100644 --- a/modules/clockworkModules.nf +++ b/modules/clockworkModules.nf @@ -259,11 +259,17 @@ process minos { cp minos/final.vcf ${minos_vcf} rm -rf minos - top_hit=\$(jq -r '.top_hit.name' ${report_json}) + top_hit=\$(jq -r '.top_hit.file_paths.ref_fa' ${report_json}) cp ${sample_name}_report.json ${sample_name}_report_previous.json - if [[ \$top_hit =~ ^"Mycobacterium tuberculosis" ]]; then printf "CREATE_ANTIBIOGRAM_${sample_name}"; else echo '{"resistance-profiling-warning":"sample is not TB so cannot produce antibiogram using resistance profiling tools"}' | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi + if [[ \$top_hit =~ "tuberculosis" ]]; then + printf "CREATE_ANTIBIOGRAM_${sample_name}" + else + printf "CREATE_NTM_ANTIBIOGRAM_${sample_name}" + echo '{"resistance-profiling-warning":"sample is not TB so cannot produce antibiogram using resistance profiling tools"}' \ + | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json} + fi """ stub: From 01e6c4b43928584886f9fa8d6edd8374e546dfa6 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Wed, 11 Sep 2024 11:40:01 +0000 Subject: [PATCH 11/32] add a specific when condition for ntmprofiler --- modules/vcfpredictModules.nf | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf index a6df54f..dbafaf3 100644 --- a/modules/vcfpredictModules.nf +++ b/modules/vcfpredictModules.nf @@ -108,8 +108,10 @@ process tbprofiler { stub: """ + mkdir ${sample_name} touch ${sample_name}.tbprofiler-out.json touch ${sample_name}_report.json + touch ${sample_name}/${sample_name}.results.json """ } @@ -127,7 +129,7 @@ process ntmprofiler { path("${sample_name}.results.json"), emit: collate_json when: - isSampleTB != /CREATE\_ANTIBIOGRAM\_${sample_name}/ + isSampleTB =~ /CREATE\_NTM_\ANTIBIOGRAM\_${sample_name}/ script: error_log = "${sample_name}_err.json" @@ -145,6 +147,13 @@ process ntmprofiler { jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${ntmprofiler_json} > ${report_json} """ + + stub: + """ + touch ${sample_name}.ntmprofiler-out.json + touch ${sample_name}_report.json + touch ${sample_name}.results.json + """ } process tbtamr { From 2d071d09c13ef499e6a323fd57249d159c83cfb7 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Wed, 11 Sep 2024 11:44:31 +0000 Subject: [PATCH 12/32] more specific regex --- modules/clockworkModules.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/clockworkModules.nf b/modules/clockworkModules.nf index 1e269ab..11f5e69 100644 --- a/modules/clockworkModules.nf +++ b/modules/clockworkModules.nf @@ -263,7 +263,7 @@ process minos { cp ${sample_name}_report.json ${sample_name}_report_previous.json - if [[ \$top_hit =~ "tuberculosis" ]]; then + if [[ \$top_hit =~ "/tuberculosis.fasta" ]]; then printf "CREATE_ANTIBIOGRAM_${sample_name}" else printf "CREATE_NTM_ANTIBIOGRAM_${sample_name}" From 2acbce604332b2c2da9cfa76fb6ddda18ec192b3 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Wed, 11 Sep 2024 13:50:49 +0000 Subject: [PATCH 13/32] update logic in workflow --- modules/clockworkModules.nf | 2 +- modules/vcfpredictModules.nf | 6 ++++-- workflows/vcfpredict.nf | 8 +++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/modules/clockworkModules.nf b/modules/clockworkModules.nf index 11f5e69..a7c9ba6 100644 --- a/modules/clockworkModules.nf +++ b/modules/clockworkModules.nf @@ -268,7 +268,7 @@ process minos { else printf "CREATE_NTM_ANTIBIOGRAM_${sample_name}" echo '{"resistance-profiling-warning":"sample is not TB so cannot produce antibiogram using resistance profiling tools"}' \ - | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json} + | jq '.' > ${error_log} && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json} fi """ diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf index dbafaf3..33c72d8 100644 --- a/modules/vcfpredictModules.nf +++ b/modules/vcfpredictModules.nf @@ -122,14 +122,16 @@ process ntmprofiler { label 'ntmprofiler' input: - tuple val(sample_name), path(fq1), path(fq2), path(report_json), val(isSampleTB) + tuple val(sample_name), path(fq1), path(fq2) + path(report_json) + val(isSampleNTM) output: tuple val(sample_name), path("${sample_name}.ntmprofiler-out.json"), path("${sample_name}_report.json"), emit: ntmprofiler_json path("${sample_name}.results.json"), emit: collate_json when: - isSampleTB =~ /CREATE\_NTM_\ANTIBIOGRAM\_${sample_name}/ + isSampleNTM =~ /CREATE\_NTM\_ANTIBIOGRAM\_${sample_name}/ script: error_log = "${sample_name}_err.json" diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf index 773e114..124901a 100644 --- a/workflows/vcfpredict.nf +++ b/workflows/vcfpredict.nf @@ -35,14 +35,15 @@ workflow vcfpredict { sample_name = minos_vcf_tuple.map{it[0]} minos_vcf = minos_vcf_tuple.map{it[1]} do_we_resistance_profile = minos_vcf_tuple.map{it[2]} - report_json = minos_vcf_tuple.map{it[3]} + report_json = minos_vcf_tuple.map{it[3]} bam = clockwork_bam.map{it[2]} - fastq_and_report = sample_and_fastqs.combine(report_json).combine(do_we_resistance_profile) + + sample_and_fastqs.view() //ntm-profiling: e.g. everything down being passed into tbtamr/tb-profiler //at the moment it is only ran on fastqs; need to find a sensible way //of linking up the references - ntmprofiler(fastq_and_report) + ntmprofiler(sample_and_fastqs, report_json, do_we_resistance_profile) ntm_profiling_json = ntmprofiler.out.ntmprofiler_json @@ -63,6 +64,7 @@ workflow vcfpredict { //run tb-profiler tbprofiler(sample_name, add_allelic_depth.out, report_json, do_we_resistance_profile) profiling_json = tbprofiler.out.tbprofiler_json + if(params.collate == "yes"){ collated_jsons = tbprofiler.out.collate_json.collect() tbprofiler_collate(collated_jsons) From 791f9cca29390b29857a3cdd87b6e0ab155f849e Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Wed, 11 Sep 2024 14:13:19 +0000 Subject: [PATCH 14/32] remove view() --- workflows/vcfpredict.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf index 124901a..bf14b67 100644 --- a/workflows/vcfpredict.nf +++ b/workflows/vcfpredict.nf @@ -38,8 +38,6 @@ workflow vcfpredict { report_json = minos_vcf_tuple.map{it[3]} bam = clockwork_bam.map{it[2]} - sample_and_fastqs.view() - //ntm-profiling: e.g. everything down being passed into tbtamr/tb-profiler //at the moment it is only ran on fastqs; need to find a sensible way //of linking up the references From cd912e755a0e288232a62840049a9e5ee8ad4d78 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Thu, 12 Sep 2024 14:40:13 +0000 Subject: [PATCH 15/32] push bin to docker clockwork --- docker/Dockerfile.clockwork-0.9.9 | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/Dockerfile.clockwork-0.9.9 b/docker/Dockerfile.clockwork-0.9.9 index 57c30f8..a31d050 100644 --- a/docker/Dockerfile.clockwork-0.9.9 +++ b/docker/Dockerfile.clockwork-0.9.9 @@ -1,5 +1,6 @@ FROM debian:buster + LABEL maintainer="pricea35@cardiff.ac.uk" \ about.summary="container for the clockwork workflow" From da64d10e6f6e02638088214a2f62b114ec1d0d58 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Fri, 13 Sep 2024 12:27:54 +0000 Subject: [PATCH 16/32] vcfmix --- docker/Dockerfile.vcfpredict-0.9.9 | 1 + main.nf | 14 ++++----- modules/clockworkModules.nf | 3 ++ modules/vcfpredictModules.nf | 28 +++++++----------- nextflow.config | 3 -- workflows/clockwork.nf | 12 ++++---- workflows/vcfpredict.nf | 46 ++++++++++-------------------- 7 files changed, 40 insertions(+), 67 deletions(-) diff --git a/docker/Dockerfile.vcfpredict-0.9.9 b/docker/Dockerfile.vcfpredict-0.9.9 index 9de1bb7..2cd093a 100644 --- a/docker/Dockerfile.vcfpredict-0.9.9 +++ b/docker/Dockerfile.vcfpredict-0.9.9 @@ -1,5 +1,6 @@ FROM ubuntu:20.04 + LABEL maintainer="pricea35@cardiff.ac.uk" \ about.summary="container for the vcf predict workflow" diff --git a/main.nf b/main.nf index 9fdbe51..0f08724 100644 --- a/main.nf +++ b/main.nf @@ -85,11 +85,10 @@ nextflow run main.nf -profile docker --filetype bam --input_dir bam_dir --unmix_ } -resistance_profilers = ["tb-profiler", "tbtamr", "none"] +resistance_profilers = ["tb-profiler", "tbtamr"] if(!resistance_profilers.contains(params.resistance_profiler)){ - exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler", "tbtamr" \ - or "none" to skip.' + exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler" or "tbtamr"' } @@ -199,13 +198,10 @@ workflow { clockwork(preprocessing_output) // VCFPREDICT SUB-WORKFLOW - sample_and_fastqs = clockwork.out.sample_and_fastqs - mpileup_vcf = clockwork.out.mpileup_vcf - minos_vcf = clockwork.out.minos_vcf - reference = clockwork.out.reference - bam = clockwork.out.bam + profiler_input_vcf = clockwork.out.profiler_input_vcf + profiler_input_fq = clockwork.out.profiler_input_fq - vcfpredict(sample_and_fastqs, bam, mpileup_vcf, minos_vcf, reference) + vcfpredict(profiler_input_fq, profiler_input_vcf) } diff --git a/modules/clockworkModules.nf b/modules/clockworkModules.nf index a7c9ba6..a1ea0e6 100644 --- a/modules/clockworkModules.nf +++ b/modules/clockworkModules.nf @@ -305,6 +305,9 @@ process gvcf { path("${sample_name}.fa", emit: gvcf_fa) path "${sample_name}_err.json", emit: gvcf_log optional true path "${sample_name}_report.json", emit: gvcf_report optional true + tuple val(sample_name), path(minos_vcf), path(report_json), emit: vcfmix_input + tuple val(sample_name), path(minos_vcf), path(report_json), path(bam), path(ref), val(isSampleTB), emit: tbprofiler + tuple val(sample_name), path(report_json), path(minos_vcf), val(isSampleTB), emit: gvcf_report_resistance script: gvcf = "${sample_name}.gvcf.vcf" diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf index 33c72d8..c0e85f5 100644 --- a/modules/vcfpredictModules.nf +++ b/modules/vcfpredictModules.nf @@ -27,13 +27,11 @@ process vcfmix { error_log = "${sample_name}_err.json" """ - run-vcfmix.py ${bcftools_vcf} + run-vcfmix.py $vcf cp ${sample_name}_report.json ${sample_name}_report_previous.json jq -s ".[0] * .[1]" ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json} - - if [ ${params.resistance_profiler} == "none" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}; fi """ stub: @@ -74,14 +72,12 @@ process tbprofiler { publishDir "${params.output_dir}${sample_name}", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}' input: - val(sample_name) - path(minos_vcf) - path(report_json) - val(isSampleTB) + tuple val(sample_name), path(minos_vcf), path(report_json), val(isSampleTB) output: tuple val(sample_name), path("${sample_name}.tbprofiler-out.json"), path("${sample_name}_report.json"), emit: tbprofiler_json path("${sample_name}/${sample_name}.results.json"), emit: collate_json + tuple val(sample_name), path(vcf), path(report_json), emit: vcfmix_in when: isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/ @@ -122,13 +118,12 @@ process ntmprofiler { label 'ntmprofiler' input: - tuple val(sample_name), path(fq1), path(fq2) - path(report_json) - val(isSampleNTM) + tuple val(sample_name), path(fq1), path(fq2), path(report_json), path(vcf), val(isSampleNTM) output: tuple val(sample_name), path("${sample_name}.ntmprofiler-out.json"), path("${sample_name}_report.json"), emit: ntmprofiler_json path("${sample_name}.results.json"), emit: collate_json + tuple val(sample_name), path(vcf), path(report_json), emit: vcfmix_in when: isSampleNTM =~ /CREATE\_NTM\_ANTIBIOGRAM\_${sample_name}/ @@ -168,11 +163,12 @@ process tbtamr { publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}' input: - tuple val(sample_name), path(fq1), path(fq2), path(report_json), val(isSampleTB) + tuple val(sample_name), path(fq1), path(fq2), path(report_json), path(vcf), val(isSampleTB) output: tuple val(sample_name), path("${sample_name}.tbtamr-out.json"), path("${sample_name}_report.json"), emit: tbtamr_json path(sample_name), emit: collate_json + tuple val(sample_name), path(vcf), path(report_json), emit: vcfmix_in when: isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/ @@ -269,15 +265,11 @@ process add_allelic_depth { label 'tbprofiler' input: - val(sample_name) - path(minos_vcf) - path(bam) - path(reference) - val(isSampleTB) + tuple val(sample_name), path(minos_vcf), path(report_json), path(bam), path(reference), val(isSampleTB) output: - path("${sample_name}_allelic_depth.minos.vcf") - + tuple val(sample_name), path("${sample_name}_allelic_depth.minos.vcf"), path(report_json), val(isSampleTB) + when: isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/ diff --git a/nextflow.config b/nextflow.config index f26774c..95a3eca 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,9 +31,6 @@ params { // name of the bowtie index, e.g. hg19_1kgmaj bowtie_index_name = "hg19_1kgmaj" - // run VCFMIX 'yes' or 'no' (set to no for synthetic samples) - vcfmix = 'yes' - // resistance params resistance_profiler = "tb-profiler" update_tbprofiler = "no" diff --git a/workflows/clockwork.nf b/workflows/clockwork.nf index af04335..940546e 100644 --- a/workflows/clockwork.nf +++ b/workflows/clockwork.nf @@ -38,11 +38,11 @@ workflow clockwork { gvcf(alignToRef.out.alignToRef_bam.join(minos.out.minos_vcf, by: 0)) - emit: - sample_and_fastqs = input_seqs_json.map{it[0,1,2]} - mpileup_vcf = callVarsMpileup.out.mpileup_vcf.join(minos.out.minos_report, by: 0) - minos_vcf = minos.out.minos_vcf.join(alignToRef.out.alignToRef_report, by: 0) - reference = getRefFromJSON.out - bam = alignToRef.out.alignToRef_bam + report_for_ntm = gvcf.out.gvcf_report_resistance + sample_and_fqs = input_seqs_json.map{it[0,1,2]} + profiler_input_fq = sample_and_fqs.join(report_for_ntm, by:0) + emit: + profiler_input_vcf = gvcf.out.tbprofiler + profiler_input_fq = profiler_input_fq } diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf index bf14b67..858e67c 100644 --- a/workflows/vcfpredict.nf +++ b/workflows/vcfpredict.nf @@ -16,34 +16,16 @@ include {ntmprofiler_collate} from '../modules/vcfpredictModules.nf' params(para workflow vcfpredict { take: - sample_and_fastqs - clockwork_bam - clockwork_bcftools_tuple - minos_vcf_tuple - reference_fasta - + profiler_input_fq + profiler_input_vcf main: - - if ( params.vcfmix == "yes" ) { - - vcfmix(clockwork_bcftools_tuple) - - } - - //get just the vcf - sample_name = minos_vcf_tuple.map{it[0]} - minos_vcf = minos_vcf_tuple.map{it[1]} - do_we_resistance_profile = minos_vcf_tuple.map{it[2]} - report_json = minos_vcf_tuple.map{it[3]} - bam = clockwork_bam.map{it[2]} - //ntm-profiling: e.g. everything down being passed into tbtamr/tb-profiler //at the moment it is only ran on fastqs; need to find a sensible way //of linking up the references - ntmprofiler(sample_and_fastqs, report_json, do_we_resistance_profile) + ntmprofiler(profiler_input_fq) - ntm_profiling_json = ntmprofiler.out.ntmprofiler_json + ntm_profiling_out = ntmprofiler.out.vcfmix_in if(params.collate == "yes"){ collated_ntm_jsons = ntmprofiler.out.collate_json.collect() @@ -58,26 +40,28 @@ workflow vcfpredict { } //add allelic depth back in: was calculated in mpileup but lost in minos - add_allelic_depth(sample_name, minos_vcf, bam, reference_fasta, do_we_resistance_profile) + add_allelic_depth(profiler_input_vcf) //run tb-profiler - tbprofiler(sample_name, add_allelic_depth.out, report_json, do_we_resistance_profile) - profiling_json = tbprofiler.out.tbprofiler_json + tbprofiler(add_allelic_depth.out) + + tb_profiling_out = tbprofiler.out.vcfmix_in if(params.collate == "yes"){ collated_jsons = tbprofiler.out.collate_json.collect() tbprofiler_collate(collated_jsons) } } else if (params.resistance_profiler == "tbtamr"){ - tbtamr(fastq_and_report) - profiling_json = tbtamr.out.tbtamr_json + tbtamr(profiler_input_fq) + + tb_profiling_out = tbtamr.out.vcfmix_in + if(params.collate == "yes"){ collated_jsons = tbtamr.out.collate_json.collect() tbtamr_collate(collated_jsons) } } - if (params.vcfmix == "yes" && params.resistance_profiler != "none"){ - profiling_jsons = profiling_json.combine(ntm_profiling_json) - finalJson(vcfmix.out.vcfmix_json.join(profiling_json, by: 0)) - } + profiling_jsons = ntm_profiling_out.mix(tb_profiling_out) + profiling_jsons.view() + vcfmix(profiling_jsons) } From 34dd7a7a70ab0594165a63c56e4a88310a1d1a3b Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Mon, 16 Sep 2024 14:44:34 +0000 Subject: [PATCH 17/32] change parsing of string for vcmix --- bin/run-vcfmix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/run-vcfmix.py b/bin/run-vcfmix.py index e75da76..13cb9d5 100755 --- a/bin/run-vcfmix.py +++ b/bin/run-vcfmix.py @@ -11,8 +11,8 @@ def go(vcf_file): # create a lineagescan object v = lineageScan() - # assuming postfix of ".bcftools.vcf" - sampleid = vcf_file[:-13] + # assuming postfix of ".minos.vcf" + sampleid = vcf_file.replace(".minos.vcf", "") print(sampleid) res = v.parse(vcffile=vcf_file, sample_id=sampleid) From 35ceada35dd5e8e47c4d8243914172473c4bd7ae Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Mon, 16 Sep 2024 14:50:21 +0000 Subject: [PATCH 18/32] update dockerfile --- docker/Dockerfile.vcfpredict-0.9.9 | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/Dockerfile.vcfpredict-0.9.9 b/docker/Dockerfile.vcfpredict-0.9.9 index 2cd093a..9de1bb7 100644 --- a/docker/Dockerfile.vcfpredict-0.9.9 +++ b/docker/Dockerfile.vcfpredict-0.9.9 @@ -1,6 +1,5 @@ FROM ubuntu:20.04 - LABEL maintainer="pricea35@cardiff.ac.uk" \ about.summary="container for the vcf predict workflow" From 31c85299ce88c3b7e55218383de2497aaf3cccc0 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Tue, 17 Sep 2024 14:09:28 +0000 Subject: [PATCH 19/32] add gatk to clockwork --- docker/Dockerfile.clockwork-0.9.9 | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile.clockwork-0.9.9 b/docker/Dockerfile.clockwork-0.9.9 index a31d050..d99ec59 100644 --- a/docker/Dockerfile.clockwork-0.9.9 +++ b/docker/Dockerfile.clockwork-0.9.9 @@ -17,7 +17,8 @@ vcftools_version=0.1.15 \ mccortex_version=97aba198d632ee98ac1aa496db33d1a7a8cb7e51 \ stampy_version=1.0.32r3761 \ python_version=3.6.5 \ -clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 +clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 \ +gatk_version=4.6.0.0 ENV PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" \ PYTHON="python2.7 python-dev" @@ -108,8 +109,12 @@ RUN git clone --recursive https://github.com/iqbal-lab/cortex.git \ && pip3 install . \ && chmod +x scripts/clockwork +RUN wget https://github.com/broadinstitute/gatk/releases/download/${gatk_version}/gatk-${gatk_version}.zip -O /tmp/gatk-${gatk_version}.zip\ + && unzip /tmp/gatk-${gatk_version}.zip -d /opt/ \ + && rm /tmp/gatk-${gatk_version}.zip -f + ENV CLOCKWORK_CORTEX_DIR=/cortex \ -PATH=${PATH}:/clockwork/python/scripts \ +PATH=${PATH}:/clockwork/python/scripts:/opt/gatk-${gatk_version} \ PICARD_JAR=/usr/local/bin/picard.jar ENV LC_ALL en_US.UTF-8 \ From c25a3c48cd9cefd9a4874118f9f3fa131e66d6d7 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Tue, 17 Sep 2024 14:10:23 +0000 Subject: [PATCH 20/32] gatk to minos so all vcfs are the same --- docker/Dockerfile.tbprofiler-0.9.9 | 3 +-- modules/clockworkModules.nf | 8 +++++++- modules/vcfpredictModules.nf | 7 +++++-- testing.config | 1 + workflows/clockwork.nf | 2 ++ workflows/vcfpredict.nf | 5 +---- 6 files changed, 17 insertions(+), 9 deletions(-) diff --git a/docker/Dockerfile.tbprofiler-0.9.9 b/docker/Dockerfile.tbprofiler-0.9.9 index 42c3832..d9d5210 100644 --- a/docker/Dockerfile.tbprofiler-0.9.9 +++ b/docker/Dockerfile.tbprofiler-0.9.9 @@ -42,8 +42,7 @@ RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest| tar -xvj bin # install tb-profiler via bioconda; install into 'base' conda env RUN micromamba install --yes --name base --channel conda-forge --channel bioconda \ tb-profiler=${TBPROFILER_VER} - -RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 +RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 RUN micromamba install --yes --name base --channel conda-forge --channel bioconda samtools RUN micromamba install --yes --name base --channel conda-forge jq RUN micromamba clean --all --yes diff --git a/modules/clockworkModules.nf b/modules/clockworkModules.nf index a1ea0e6..2c90f15 100644 --- a/modules/clockworkModules.nf +++ b/modules/clockworkModules.nf @@ -233,7 +233,7 @@ process minos { output: tuple val(sample_name), path(report_json), path(bam), path(ref), emit: minos_bam - tuple val(sample_name), path("${sample_name}.minos.vcf"), stdout, emit: minos_vcf + tuple val(sample_name), path("${sample_name}_allelic_depth.minos.vcf"), stdout, emit: minos_vcf tuple val(sample_name), path("${sample_name}_report.json"), emit: minos_report path "${sample_name}_err.json", emit: minos_log optional true @@ -259,6 +259,12 @@ process minos { cp minos/final.vcf ${minos_vcf} rm -rf minos + samtools faidx $ref + samtools dict $ref -o ${ref.baseName}.dict + mkdir tmp + + gatk VariantAnnotator -R $ref -I $bam -V $minos_vcf -A DepthPerAlleleBySample -O ${sample_name}_allelic_depth.minos.vcf --tmp-dir tmp + top_hit=\$(jq -r '.top_hit.file_paths.ref_fa' ${report_json}) cp ${sample_name}_report.json ${sample_name}_report_previous.json diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf index c0e85f5..cb86b5c 100644 --- a/modules/vcfpredictModules.nf +++ b/modules/vcfpredictModules.nf @@ -27,7 +27,7 @@ process vcfmix { error_log = "${sample_name}_err.json" """ - run-vcfmix.py $vcf + run-vcfmix.py ${vcf} cp ${sample_name}_report.json ${sample_name}_report_previous.json @@ -77,7 +77,7 @@ process tbprofiler { output: tuple val(sample_name), path("${sample_name}.tbprofiler-out.json"), path("${sample_name}_report.json"), emit: tbprofiler_json path("${sample_name}/${sample_name}.results.json"), emit: collate_json - tuple val(sample_name), path(vcf), path(report_json), emit: vcfmix_in + tuple val(sample_name), path(minos_vcf), path(report_json), emit: vcfmix_in when: isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/ @@ -87,7 +87,10 @@ process tbprofiler { tbprofiler_json = "${sample_name}.tbprofiler-out.json" """ + #keep the original vcf so we can collate the output and pass it down + cp ${minos_vcf} tmp.vcf bgzip ${minos_vcf} + mv tmp.vcf ${minos_vcf} mkdir tmp tb-profiler profile --vcf ${minos_vcf}.gz --threads ${task.cpus} --temp tmp --prefix ${sample_name} diff --git a/testing.config b/testing.config index e8edf8b..9400bb5 100644 --- a/testing.config +++ b/testing.config @@ -1,5 +1,6 @@ // E.g. to run: NXF_VER=20.11.0-edge nextflow run main.nf -stub -config testing.config + // dry-run parameters // OK or null diff --git a/workflows/clockwork.nf b/workflows/clockwork.nf index 940546e..fff09cd 100644 --- a/workflows/clockwork.nf +++ b/workflows/clockwork.nf @@ -24,6 +24,8 @@ workflow clockwork { sample_name = input_seqs_json.map{it[0]} getRefFromJSON(json, do_we_align, sample_name) + input_seqs_json.view() + getRefFromJSON.out.view() alignToRef(input_seqs_json, getRefFromJSON.out) diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf index 858e67c..1c07f49 100644 --- a/workflows/vcfpredict.nf +++ b/workflows/vcfpredict.nf @@ -39,10 +39,8 @@ workflow vcfpredict { tbprofiler_update_db(reference_fasta) } - //add allelic depth back in: was calculated in mpileup but lost in minos - add_allelic_depth(profiler_input_vcf) //run tb-profiler - tbprofiler(add_allelic_depth.out) + tbprofiler(profiler_input_vcf) tb_profiling_out = tbprofiler.out.vcfmix_in @@ -62,6 +60,5 @@ workflow vcfpredict { } profiling_jsons = ntm_profiling_out.mix(tb_profiling_out) - profiling_jsons.view() vcfmix(profiling_jsons) } From 7fc28993632ae6cac9311f810f4826c8fdbc5bdf Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Tue, 17 Sep 2024 15:17:49 +0000 Subject: [PATCH 21/32] bump jdk --- docker/Dockerfile.clockwork-0.9.9 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile.clockwork-0.9.9 b/docker/Dockerfile.clockwork-0.9.9 index d99ec59..2ceaaab 100644 --- a/docker/Dockerfile.clockwork-0.9.9 +++ b/docker/Dockerfile.clockwork-0.9.9 @@ -20,7 +20,7 @@ python_version=3.6.5 \ clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 \ gatk_version=4.6.0.0 -ENV PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" \ +ENV PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all openjdk-18-jdk" \ PYTHON="python2.7 python-dev" COPY bin/ /opt/bin/ From b4e62cafb5124f9d603c64ba0093ab2efc2ad75f Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Tue, 17 Sep 2024 15:24:46 +0000 Subject: [PATCH 22/32] bump jdk --- docker/Dockerfile.clockwork-0.9.9 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile.clockwork-0.9.9 b/docker/Dockerfile.clockwork-0.9.9 index 2ceaaab..9049a5d 100644 --- a/docker/Dockerfile.clockwork-0.9.9 +++ b/docker/Dockerfile.clockwork-0.9.9 @@ -20,7 +20,7 @@ python_version=3.6.5 \ clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 \ gatk_version=4.6.0.0 -ENV PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all openjdk-18-jdk" \ +ENV PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" \ PYTHON="python2.7 python-dev" COPY bin/ /opt/bin/ @@ -38,7 +38,7 @@ RUN apt-get update \ && ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \ && pip3 install --upgrade pip \ && pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools awscli \ -&& apt-get update && apt-get install -y openjdk-11-jdk +&& apt-get update && apt-get install -y openjdk-18-jdk RUN curl -fsSL https://github.com/samtools/samtools/archive/${samtools_version}.tar.gz | tar -xz \ && curl -fsSL https://github.com/samtools/htslib/releases/download/${htslib_version}/htslib-${htslib_version}.tar.bz2 | tar -xj \ From b3fc305fc7c0510608226b5adff795f3db520d5d Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Tue, 17 Sep 2024 15:39:36 +0000 Subject: [PATCH 23/32] bump jdk --- docker/Dockerfile.clockwork-0.9.9 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile.clockwork-0.9.9 b/docker/Dockerfile.clockwork-0.9.9 index 9049a5d..cd34f87 100644 --- a/docker/Dockerfile.clockwork-0.9.9 +++ b/docker/Dockerfile.clockwork-0.9.9 @@ -1,4 +1,4 @@ -FROM debian:buster +FROM ubuntu:focal LABEL maintainer="pricea35@cardiff.ac.uk" \ @@ -26,7 +26,7 @@ PYTHON="python2.7 python-dev" COPY bin/ /opt/bin/ ENV PATH=/opt/bin:$PATH - +RUN snap install openjdk --channel=22.0.2/stable RUN apt-get update \ && apt-get install -y $PACKAGES $PYTHON \ && curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \ From 97e526d011952184ac8310c96c1e76dd79ae9528 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Wed, 18 Sep 2024 09:36:59 +0000 Subject: [PATCH 24/32] dockerfile for cwork installs snapd --- docker/Dockerfile.clockwork-0.9.9 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile.clockwork-0.9.9 b/docker/Dockerfile.clockwork-0.9.9 index cd34f87..0fcc30c 100644 --- a/docker/Dockerfile.clockwork-0.9.9 +++ b/docker/Dockerfile.clockwork-0.9.9 @@ -26,7 +26,6 @@ PYTHON="python2.7 python-dev" COPY bin/ /opt/bin/ ENV PATH=/opt/bin:$PATH -RUN snap install openjdk --channel=22.0.2/stable RUN apt-get update \ && apt-get install -y $PACKAGES $PYTHON \ && curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \ @@ -38,7 +37,9 @@ RUN apt-get update \ && ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \ && pip3 install --upgrade pip \ && pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools awscli \ -&& apt-get update && apt-get install -y openjdk-18-jdk +&& apt-get update && apt-get install -y snapd + +RUN snap install openjdk --channel=22.0.2/stable RUN curl -fsSL https://github.com/samtools/samtools/archive/${samtools_version}.tar.gz | tar -xz \ && curl -fsSL https://github.com/samtools/htslib/releases/download/${htslib_version}/htslib-${htslib_version}.tar.bz2 | tar -xj \ From 690a443ecbe9ef19dee2cb7132ddbe73e224d29b Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Wed, 18 Sep 2024 11:33:39 +0000 Subject: [PATCH 25/32] jdk --- docker/Dockerfile.clockwork-0.9.9 | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.clockwork-0.9.9 b/docker/Dockerfile.clockwork-0.9.9 index 0fcc30c..af6fe38 100644 --- a/docker/Dockerfile.clockwork-0.9.9 +++ b/docker/Dockerfile.clockwork-0.9.9 @@ -27,7 +27,7 @@ COPY bin/ /opt/bin/ ENV PATH=/opt/bin:$PATH RUN apt-get update \ -&& apt-get install -y $PACKAGES $PYTHON \ +&& DEBIAN_FRONTEND=noninteractive apt-get install -y $PACKAGES $PYTHON \ && curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \ && cd Python-${python_version} \ && ./configure --enable-optimizations \ @@ -37,9 +37,15 @@ RUN apt-get update \ && ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \ && pip3 install --upgrade pip \ && pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools awscli \ -&& apt-get update && apt-get install -y snapd +&& apt-get update + +#update jdk +RUN wget https://download.java.net/java/GA/jdk18/43f95e8614114aeaa8e8a5fcf20a682d/36/GPL/openjdk-18_linux-x64_bin.tar.gz +RUN tar -xvf openjdk-18_linux-x64_bin.tar.gz +RUN mv jdk-18* /opt/ +ENV JAVA_HOME=/opt/jdk-18 +ENV PATH=$PATH:$JAVA_HOME/bin -RUN snap install openjdk --channel=22.0.2/stable RUN curl -fsSL https://github.com/samtools/samtools/archive/${samtools_version}.tar.gz | tar -xz \ && curl -fsSL https://github.com/samtools/htslib/releases/download/${htslib_version}/htslib-${htslib_version}.tar.bz2 | tar -xj \ From f2b61d6de5d0ecb8e0d11cf3a933fee0346e4b53 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Wed, 18 Sep 2024 15:21:36 +0000 Subject: [PATCH 26/32] new vcf mix to deal with minos style --- bin/run-vcfmix.py | 2 +- config/containers.config | 4 +- docker/Dockerfile.clockwork-0.9.9r1 | 131 +++++++++++++++++++++++++++ docker/Dockerfile.vcfpredict-0.9.9r1 | 27 ++++++ 4 files changed, 161 insertions(+), 3 deletions(-) create mode 100644 docker/Dockerfile.clockwork-0.9.9r1 create mode 100644 docker/Dockerfile.vcfpredict-0.9.9r1 diff --git a/bin/run-vcfmix.py b/bin/run-vcfmix.py index 13cb9d5..4368a9c 100755 --- a/bin/run-vcfmix.py +++ b/bin/run-vcfmix.py @@ -12,7 +12,7 @@ def go(vcf_file): v = lineageScan() # assuming postfix of ".minos.vcf" - sampleid = vcf_file.replace(".minos.vcf", "") + sampleid = vcf_file.replace("_allelic_depth.minos.vcf", "") print(sampleid) res = v.parse(vcffile=vcf_file, sample_id=sampleid) diff --git a/config/containers.config b/config/containers.config index 7e44b62..383c4ed 100644 --- a/config/containers.config +++ b/config/containers.config @@ -33,10 +33,10 @@ process { } withLabel:clockwork { - container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.9" + container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.9r1" } withLabel:vcfpredict { - container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.9" + container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.9r1" } } diff --git a/docker/Dockerfile.clockwork-0.9.9r1 b/docker/Dockerfile.clockwork-0.9.9r1 new file mode 100644 index 0000000..af6fe38 --- /dev/null +++ b/docker/Dockerfile.clockwork-0.9.9r1 @@ -0,0 +1,131 @@ +FROM ubuntu:focal + + +LABEL maintainer="pricea35@cardiff.ac.uk" \ +about.summary="container for the clockwork workflow" + +ENV samtools_version=1.12 \ +htslib_version=1.12 \ +bcftools_version=1.12 \ +minimap2_version=2.17 \ +picard_version=2.18.16 \ +gramtools_version=8af53f6c8c0d72ef95223e89ab82119b717044f2 \ +vt_version=2187ff6347086e38f71bd9f8ca622cd7dcfbb40c \ +minos_version=0.11.0 \ +cortex_version=3a235272e4e0121be64527f01e73f9e066d378d3 \ +vcftools_version=0.1.15 \ +mccortex_version=97aba198d632ee98ac1aa496db33d1a7a8cb7e51 \ +stampy_version=1.0.32r3761 \ +python_version=3.6.5 \ +clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 \ +gatk_version=4.6.0.0 + +ENV PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" \ +PYTHON="python2.7 python-dev" + +COPY bin/ /opt/bin/ +ENV PATH=/opt/bin:$PATH + +RUN apt-get update \ +&& DEBIAN_FRONTEND=noninteractive apt-get install -y $PACKAGES $PYTHON \ +&& curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \ +&& cd Python-${python_version} \ +&& ./configure --enable-optimizations \ +&& make altinstall \ +&& cd .. \ +&& ln -s /usr/local/bin/python3.6 /usr/local/bin/python3 \ +&& ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \ +&& pip3 install --upgrade pip \ +&& pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools awscli \ +&& apt-get update + +#update jdk +RUN wget https://download.java.net/java/GA/jdk18/43f95e8614114aeaa8e8a5fcf20a682d/36/GPL/openjdk-18_linux-x64_bin.tar.gz +RUN tar -xvf openjdk-18_linux-x64_bin.tar.gz +RUN mv jdk-18* /opt/ +ENV JAVA_HOME=/opt/jdk-18 +ENV PATH=$PATH:$JAVA_HOME/bin + + +RUN curl -fsSL https://github.com/samtools/samtools/archive/${samtools_version}.tar.gz | tar -xz \ +&& curl -fsSL https://github.com/samtools/htslib/releases/download/${htslib_version}/htslib-${htslib_version}.tar.bz2 | tar -xj \ +&& make -C samtools-${samtools_version} -j HTSDIR=../htslib-${htslib_version} \ +&& make -C samtools-${samtools_version} -j HTSDIR=../htslib-${htslib_version} prefix=/usr/local install \ +&& rm -r samtools-${samtools_version} \ +&& curl -fsSL https://github.com/samtools/bcftools/archive/refs/tags/${bcftools_version}.tar.gz | tar -xz \ +&& make -C bcftools-${bcftools_version} -j HTSDIR=../htslib-${htslib_version} \ +&& make -C bcftools-${bcftools_version} -j HTSDIR=../htslib-${htslib_version} prefix=/usr/local install \ +&& rm -r bcftools-${bcftools_version} + + +RUN curl -fsSL minimap2-${minimap2_version}.tar.gz https://github.com/lh3/minimap2/archive/v${minimap2_version}.tar.gz | tar -xz \ +&& cd minimap2-${minimap2_version} \ +&& make \ +&& chmod +x minimap2 \ +&& mv minimap2 /usr/local/bin \ +&& cd .. \ +&& rm -r minimap2-${minimap2_version} \ +&& wget https://github.com/broadinstitute/picard/releases/download/${picard_version}/picard.jar -O /usr/local/bin/picard.jar + + +RUN git clone https://github.com/atks/vt.git vt-git \ +&& cd vt-git \ +&& git checkout ${vt_version} \ +&& make \ +&& cd .. \ +&& mv vt-git/vt /usr/local/bin \ +&& pip3 install tox "six>=1.14.0" \ +&& git clone https://github.com/iqbal-lab-org/gramtools \ +&& cd gramtools \ +&& git checkout ${gramtools_version} \ +&& pip3 install . \ +&& cd .. \ +&& pip3 install cython \ +&& pip3 install git+https://github.com/iqbal-lab-org/minos@v${minos_version} + + +RUN git clone --recursive https://github.com/iqbal-lab/cortex.git \ +&& cd cortex \ +&& git checkout ${cortex_version} \ +&& bash install.sh \ +&& make NUM_COLS=1 cortex_var \ +&& make NUM_COLS=2 cortex_var \ +&& cd .. \ +&& mkdir bioinf-tools \ +&& cd bioinf-tools \ +&& curl -fsSL http://www.well.ox.ac.uk/~gerton/software/Stampy/stampy-${stampy_version}.tgz | tar -xz \ +&& make -C stampy-* \ +&& cp -s stampy-*/stampy.py . \ +&& curl -fsSL https://github.com/vcftools/vcftools/releases/download/v${vcftools_version}/vcftools-${vcftools_version}.tar.gz | tar -xz \ +&& cd vcftools-${vcftools_version} \ +&& ./configure --prefix $PWD/install \ +&& make && make install \ +&& ln -s src/perl/ . \ +&& cd .. \ +&& git clone --recursive https://github.com/mcveanlab/mccortex \ +&& cd mccortex \ +&& git checkout ${mccortex_version} \ +&& make all \ +&& cd .. \ +&& cp -s mccortex/bin/mccortex31 . \ +&& cd .. \ +&& git clone https://github.com/iqbal-lab-org/clockwork \ +&& cd clockwork \ +&& git checkout ${clockwork_version} \ +&& cd python \ +&& pip3 install . \ +&& chmod +x scripts/clockwork + +RUN wget https://github.com/broadinstitute/gatk/releases/download/${gatk_version}/gatk-${gatk_version}.zip -O /tmp/gatk-${gatk_version}.zip\ + && unzip /tmp/gatk-${gatk_version}.zip -d /opt/ \ + && rm /tmp/gatk-${gatk_version}.zip -f + +ENV CLOCKWORK_CORTEX_DIR=/cortex \ +PATH=${PATH}:/clockwork/python/scripts:/opt/gatk-${gatk_version} \ +PICARD_JAR=/usr/local/bin/picard.jar + +ENV LC_ALL en_US.UTF-8 \ +LANG en_US.UTF-8 \ +LANGUAGE en_US.UTF-8 + + diff --git a/docker/Dockerfile.vcfpredict-0.9.9r1 b/docker/Dockerfile.vcfpredict-0.9.9r1 new file mode 100644 index 0000000..4d34444 --- /dev/null +++ b/docker/Dockerfile.vcfpredict-0.9.9r1 @@ -0,0 +1,27 @@ +FROM ubuntu:20.04 + +LABEL maintainer="pricea35@cardiff.ac.uk" \ +about.summary="container for the vcf predict workflow" + +#add run-vcf to container +COPY bin/ /opt/bin/ +ENV PATH=/opt/bin:$PATH + +ENV PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" \ +PYTHON="python3 python3-pip python3-dev" + +ENV vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 + +#apt updates +RUN apt-get update \ +&& DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \ +&& apt-get install -y $PACKAGES $PYTHON \ +&& apt-get install -y python3-packaging \ +&& git clone https://github.com/whalleyt/VCFMIX.git \ +&& cd VCFMIX \ +&& git checkout ${vcfmix_version} \ +&& pip3 install recursive_diff \ +&& pip3 install awscli \ +&& pip3 install . \ +&& cp -r data /usr/local/lib/python3.8/dist-packages \ +&& cd .. From e565b977fde6c7e9db80124506ac2f0983937301 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Thu, 19 Sep 2024 08:32:08 +0000 Subject: [PATCH 27/32] update run vcf for minos flag --- bin/run-vcfmix.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/run-vcfmix.py b/bin/run-vcfmix.py index 4368a9c..eaeed3a 100755 --- a/bin/run-vcfmix.py +++ b/bin/run-vcfmix.py @@ -5,11 +5,12 @@ import urllib.request import json from pathlib import Path -from vcfmix import lineageScan +#from vcfmix import lineageScan +from vcfScan import lineageScan def go(vcf_file): # create a lineagescan object - v = lineageScan() + v = lineageScan(minos=True) # assuming postfix of ".minos.vcf" sampleid = vcf_file.replace("_allelic_depth.minos.vcf", "") From 20d1510b9d797d5a99987eb56588b20cb2f32a9a Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Thu, 19 Sep 2024 09:35:58 +0000 Subject: [PATCH 28/32] bump docker vcfpred --- docker/Dockerfile.vcfpredict-0.9.9r1 | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/Dockerfile.vcfpredict-0.9.9r1 b/docker/Dockerfile.vcfpredict-0.9.9r1 index 4d34444..f806965 100644 --- a/docker/Dockerfile.vcfpredict-0.9.9r1 +++ b/docker/Dockerfile.vcfpredict-0.9.9r1 @@ -3,6 +3,7 @@ FROM ubuntu:20.04 LABEL maintainer="pricea35@cardiff.ac.uk" \ about.summary="container for the vcf predict workflow" + #add run-vcf to container COPY bin/ /opt/bin/ ENV PATH=/opt/bin:$PATH From d003e8bb4776fdd76aa4aaedc3d6d44bda2be246 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Thu, 19 Sep 2024 11:50:59 +0000 Subject: [PATCH 29/32] update run vcf for minos flag --- bin/run-vcfmix.py | 3 +-- docker/Dockerfile.vcfpredict-0.9.9r1 | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/bin/run-vcfmix.py b/bin/run-vcfmix.py index eaeed3a..ecfe9c5 100755 --- a/bin/run-vcfmix.py +++ b/bin/run-vcfmix.py @@ -5,8 +5,7 @@ import urllib.request import json from pathlib import Path -#from vcfmix import lineageScan -from vcfScan import lineageScan +from vcfmix import lineageScan def go(vcf_file): # create a lineagescan object diff --git a/docker/Dockerfile.vcfpredict-0.9.9r1 b/docker/Dockerfile.vcfpredict-0.9.9r1 index f806965..4d34444 100644 --- a/docker/Dockerfile.vcfpredict-0.9.9r1 +++ b/docker/Dockerfile.vcfpredict-0.9.9r1 @@ -3,7 +3,6 @@ FROM ubuntu:20.04 LABEL maintainer="pricea35@cardiff.ac.uk" \ about.summary="container for the vcf predict workflow" - #add run-vcf to container COPY bin/ /opt/bin/ ENV PATH=/opt/bin:$PATH From e173cc6eaf3f6a8f0aa124b2a46f51cdf0999ef3 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Thu, 19 Sep 2024 11:54:23 +0000 Subject: [PATCH 30/32] sing --- singularity/Singularity.clockwork-0.9.9r1 | 168 +++++++++++++++++++++ singularity/Singularity.vcfpredict-0.9.9r1 | 42 ++++++ 2 files changed, 210 insertions(+) create mode 100644 singularity/Singularity.clockwork-0.9.9r1 create mode 100644 singularity/Singularity.vcfpredict-0.9.9r1 diff --git a/singularity/Singularity.clockwork-0.9.9r1 b/singularity/Singularity.clockwork-0.9.9r1 new file mode 100644 index 0000000..b4ab6b9 --- /dev/null +++ b/singularity/Singularity.clockwork-0.9.9r1 @@ -0,0 +1,168 @@ +Bootstrap: docker +From: ubuntu:focal +Stage: spython-base + +%files +bin/ /opt/bin/ +%labels +maintainer="pricea35@cardiff.ac.uk" +about.summary="container for the clockwork workflow" +%post + + + +samtools_version=1.12 +htslib_version=1.12 +bcftools_version=1.12 +minimap2_version=2.17 +picard_version=2.18.16 +gramtools_version=8af53f6c8c0d72ef95223e89ab82119b717044f2 +vt_version=2187ff6347086e38f71bd9f8ca622cd7dcfbb40c +minos_version=0.11.0 +cortex_version=3a235272e4e0121be64527f01e73f9e066d378d3 +vcftools_version=0.1.15 +mccortex_version=97aba198d632ee98ac1aa496db33d1a7a8cb7e51 +stampy_version=1.0.32r3761 +python_version=3.6.5 +clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 +gatk_version=4.6.0.0 + +PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" +PYTHON="python2.7 python-dev" + +PATH=/opt/bin:$PATH + +apt-get update \ +&& DEBIAN_FRONTEND=noninteractive apt-get install -y $PACKAGES $PYTHON \ +&& curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \ +&& cd Python-${python_version} \ +&& ./configure --enable-optimizations \ +&& make altinstall \ +&& cd .. \ +&& ln -s /usr/local/bin/python3.6 /usr/local/bin/python3 \ +&& ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \ +&& pip3 install --upgrade pip \ +&& pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools awscli \ +&& apt-get update + +#update jdk +wget https://download.java.net/java/GA/jdk18/43f95e8614114aeaa8e8a5fcf20a682d/36/GPL/openjdk-18_linux-x64_bin.tar.gz +tar -xvf openjdk-18_linux-x64_bin.tar.gz +mv jdk-18* /opt/ +JAVA_HOME=/opt/jdk-18 +PATH=$PATH:$JAVA_HOME/bin + + +curl -fsSL https://github.com/samtools/samtools/archive/${samtools_version}.tar.gz | tar -xz \ +&& curl -fsSL https://github.com/samtools/htslib/releases/download/${htslib_version}/htslib-${htslib_version}.tar.bz2 | tar -xj \ +&& make -C samtools-${samtools_version} -j HTSDIR=../htslib-${htslib_version} \ +&& make -C samtools-${samtools_version} -j HTSDIR=../htslib-${htslib_version} prefix=/usr/local install \ +&& rm -r samtools-${samtools_version} \ +&& curl -fsSL https://github.com/samtools/bcftools/archive/refs/tags/${bcftools_version}.tar.gz | tar -xz \ +&& make -C bcftools-${bcftools_version} -j HTSDIR=../htslib-${htslib_version} \ +&& make -C bcftools-${bcftools_version} -j HTSDIR=../htslib-${htslib_version} prefix=/usr/local install \ +&& rm -r bcftools-${bcftools_version} + + +curl -fsSL minimap2-${minimap2_version}.tar.gz https://github.com/lh3/minimap2/archive/v${minimap2_version}.tar.gz | tar -xz \ +&& cd minimap2-${minimap2_version} \ +&& make \ +&& chmod +x minimap2 \ +&& mv minimap2 /usr/local/bin \ +&& cd .. \ +&& rm -r minimap2-${minimap2_version} \ +&& wget https://github.com/broadinstitute/picard/releases/download/${picard_version}/picard.jar -O /usr/local/bin/picard.jar + + +git clone https://github.com/atks/vt.git vt-git \ +&& cd vt-git \ +&& git checkout ${vt_version} \ +&& make \ +&& cd .. \ +&& mv vt-git/vt /usr/local/bin \ +&& pip3 install tox "six>=1.14.0" \ +&& git clone https://github.com/iqbal-lab-org/gramtools \ +&& cd gramtools \ +&& git checkout ${gramtools_version} \ +&& pip3 install . \ +&& cd .. \ +&& pip3 install cython \ +&& pip3 install git+https://github.com/iqbal-lab-org/minos@v${minos_version} + + +git clone --recursive https://github.com/iqbal-lab/cortex.git \ +&& cd cortex \ +&& git checkout ${cortex_version} \ +&& bash install.sh \ +&& make NUM_COLS=1 cortex_var \ +&& make NUM_COLS=2 cortex_var \ +&& cd .. \ +&& mkdir bioinf-tools \ +&& cd bioinf-tools \ +&& curl -fsSL http://www.well.ox.ac.uk/~gerton/software/Stampy/stampy-${stampy_version}.tgz | tar -xz \ +&& make -C stampy-* \ +&& cp -s stampy-*/stampy.py . \ +&& curl -fsSL https://github.com/vcftools/vcftools/releases/download/v${vcftools_version}/vcftools-${vcftools_version}.tar.gz | tar -xz \ +&& cd vcftools-${vcftools_version} \ +&& ./configure --prefix $PWD/install \ +&& make && make install \ +&& ln -s src/perl/ . \ +&& cd .. \ +&& git clone --recursive https://github.com/mcveanlab/mccortex \ +&& cd mccortex \ +&& git checkout ${mccortex_version} \ +&& make all \ +&& cd .. \ +&& cp -s mccortex/bin/mccortex31 . \ +&& cd .. \ +&& git clone https://github.com/iqbal-lab-org/clockwork \ +&& cd clockwork \ +&& git checkout ${clockwork_version} \ +&& cd python \ +&& pip3 install . \ +&& chmod +x scripts/clockwork + +wget https://github.com/broadinstitute/gatk/releases/download/${gatk_version}/gatk-${gatk_version}.zip -O /tmp/gatk-${gatk_version}.zip\ +&& unzip /tmp/gatk-${gatk_version}.zip -d /opt/ \ +&& rm /tmp/gatk-${gatk_version}.zip -f + +CLOCKWORK_CORTEX_DIR=/cortex +PATH=${PATH}:/clockwork/python/scripts:/opt/gatk-${gatk_version} +PICARD_JAR=/usr/local/bin/picard.jar + +LC_ALL=en_US.UTF-8 +LANG=en_US.UTF-8 +LANGUAGE=en_US.UTF-8 + + +%environment +export samtools_version=1.12 +export htslib_version=1.12 +export bcftools_version=1.12 +export minimap2_version=2.17 +export picard_version=2.18.16 +export gramtools_version=8af53f6c8c0d72ef95223e89ab82119b717044f2 +export vt_version=2187ff6347086e38f71bd9f8ca622cd7dcfbb40c +export minos_version=0.11.0 +export cortex_version=3a235272e4e0121be64527f01e73f9e066d378d3 +export vcftools_version=0.1.15 +export mccortex_version=97aba198d632ee98ac1aa496db33d1a7a8cb7e51 +export stampy_version=1.0.32r3761 +export python_version=3.6.5 +export clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 +export gatk_version=4.6.0.0 +export PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" +export PYTHON="python2.7 python-dev" +export PATH=/opt/bin:$PATH +export JAVA_HOME=/opt/jdk-18 +export PATH=$PATH:$JAVA_HOME/bin +export CLOCKWORK_CORTEX_DIR=/cortex +export PATH=${PATH}:/clockwork/python/scripts:/opt/gatk-${gatk_version} +export PICARD_JAR=/usr/local/bin/picard.jar +export LC_ALL=en_US.UTF-8 +export LANG=en_US.UTF-8 +export LANGUAGE=en_US.UTF-8 +%runscript +exec /bin/bash "$@" +%startscript +exec /bin/bash "$@" diff --git a/singularity/Singularity.vcfpredict-0.9.9r1 b/singularity/Singularity.vcfpredict-0.9.9r1 new file mode 100644 index 0000000..b7860ff --- /dev/null +++ b/singularity/Singularity.vcfpredict-0.9.9r1 @@ -0,0 +1,42 @@ +Bootstrap: docker +From: ubuntu:20.04 +Stage: spython-base + +%files +bin/ /opt/bin/ +%labels +maintainer="pricea35@cardiff.ac.uk" +about.summary="container for the vcf predict workflow" +%post + + +#add run-vcf to container +PATH=/opt/bin:$PATH + +PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" +PYTHON="python3 python3-pip python3-dev" + +vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 + +#apt updates +apt-get update \ +&& DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \ +&& apt-get install -y $PACKAGES $PYTHON \ +&& apt-get install -y python3-packaging \ +&& git clone https://github.com/whalleyt/VCFMIX.git \ +&& cd VCFMIX \ +&& git checkout ${vcfmix_version} \ +&& pip3 install recursive_diff \ +&& pip3 install awscli \ +&& pip3 install . \ +&& cp -r data /usr/local/lib/python3.8/dist-packages \ +&& cd .. +%environment +export PATH=/opt/bin:$PATH +export PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" +export PYTHON="python3 python3-pip python3-dev" +export vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 +%runscript +exec /bin/bash "$@" +%startscript +exec /bin/bash "$@" From 735f97ac8e59d4275a91dcbcf73bce2926bf816b Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Thu, 19 Sep 2024 13:00:17 +0000 Subject: [PATCH 31/32] remove checkout --- docker/Dockerfile.vcfpredict-0.9.9r1 | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/Dockerfile.vcfpredict-0.9.9r1 b/docker/Dockerfile.vcfpredict-0.9.9r1 index 4d34444..39e867d 100644 --- a/docker/Dockerfile.vcfpredict-0.9.9r1 +++ b/docker/Dockerfile.vcfpredict-0.9.9r1 @@ -19,7 +19,6 @@ RUN apt-get update \ && apt-get install -y python3-packaging \ && git clone https://github.com/whalleyt/VCFMIX.git \ && cd VCFMIX \ -&& git checkout ${vcfmix_version} \ && pip3 install recursive_diff \ && pip3 install awscli \ && pip3 install . \ From 2edb307f9cb83ee1ceaccc5cd613a3c2ae169269 Mon Sep 17 00:00:00 2001 From: Tom Whalley Date: Fri, 20 Sep 2024 08:52:41 +0000 Subject: [PATCH 32/32] remove debugging views --- workflows/clockwork.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/workflows/clockwork.nf b/workflows/clockwork.nf index fff09cd..940546e 100644 --- a/workflows/clockwork.nf +++ b/workflows/clockwork.nf @@ -24,8 +24,6 @@ workflow clockwork { sample_name = input_seqs_json.map{it[0]} getRefFromJSON(json, do_we_align, sample_name) - input_seqs_json.view() - getRefFromJSON.out.view() alignToRef(input_seqs_json, getRefFromJSON.out)