From 9e6226381ea075d2b4065d7f8600721de91bd11b Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 6 Dec 2023 17:47:34 +0000
Subject: [PATCH 01/44] parse profiler params

---
 main.nf         | 11 +++++++++--
 nextflow.config |  2 ++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/main.nf b/main.nf
index 837d3ef..140be73 100644
--- a/main.nf
+++ b/main.nf
@@ -49,8 +49,8 @@ Mandatory and conditional parameters:
 --bowtie2_index       Directory containing Bowtie2 index (obtain from ftp://ftp.ccb.jhu.edu/pub/data/bowtie2_indexes/hg19_1kgmaj_bt2.zip
                       This is the Langmead lab pre-built major-allele-SNP reference; see https://github.com/BenLangmead/bowtie-majref)
 --bowtie_index_name   Name of the bowtie index, e.g. hg19_1kgmaj
---vcfmix	      Run VFCMIX "yes" or "no". Should be set to "no" for synthetic samples
---gnomonicus          Run gnomon "yes" or "no"
+--vcfmix	          Run VFCMIX "yes" or "no". Should be set to "no" for synthetic samples
+--resistance_profiler Tool to profile resistance with. At the moment options are "tb-profiler" or "none"
 --amr_cat             Path to the AMR catalogue (https://github.com/oxfordmmm/tuberculosis_amr_catalogues is at /tuberculosis_amr_catalogues
                       in the vcfpredict container)
 --afanc_myco_db	      Path to the Afanc database used for speciation. Obtain from https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_3.0.tar.gz
@@ -86,6 +86,13 @@ nextflow run main.nf -profile docker --filetype bam --input_dir bam_dir --unmix_
 }
 
 
+resistance_profilers = ["tb-profiler", "none"]
+
+ if(!resistance_profilers.contains(params.resistance_profiler)){
+    exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler" or "none" to skip.'
+    }
+
+
 // confirm that mandatory parameters have been set and that the conditional parameter, --pattern, has been used appropriately
 if ( params.input_dir == "" ) {
     exit 1, "error: --input_dir is mandatory (run with --help to see parameters)"
diff --git a/nextflow.config b/nextflow.config
index 21122da..27a0fcd 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -46,6 +46,8 @@ params {
 
   // run gnomonicus 'yes' or 'no'
   gnomonicus = 'yes'
+  
+  resistance_profiler = "tb-profiler"
 
   // path to AMR catalogue for gnomon
   // https://github.com/oxfordmmm/tuberculosis_amr_catalogues available at path /tuberculosis_amr_catalogues in container

From 3948b1270f889746a869479218e7172b48aa00fb Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 7 Dec 2023 14:03:31 +0000
Subject: [PATCH 02/44] update tb-profiler in docker

---
 .github/workflows/build-push-quay.yml |  4 +-
 docker/Dockerfile.vcfpredict-0.9.8r1  | 78 +++++++++++++++++++++++++++
 nextflow.config                       | 30 +++++------
 3 files changed, 96 insertions(+), 16 deletions(-)
 create mode 100644 docker/Dockerfile.vcfpredict-0.9.8r1

diff --git a/.github/workflows/build-push-quay.yml b/.github/workflows/build-push-quay.yml
index 9043ae6..cef945d 100644
--- a/.github/workflows/build-push-quay.yml
+++ b/.github/workflows/build-push-quay.yml
@@ -4,10 +4,11 @@ on:
     branches:
       - v0.9.6
       - 0.9.7-dev
-      - climb
+      - tbprofiler
     paths:
       - '**/Dockerfile*'
       - "bin/"
+      - "resources/"
 
   workflow_dispatch:
 
@@ -46,6 +47,7 @@ jobs:
       - name: Copy folders to docker
         run: |
           cp -r bin docker/bin
+          cp -r resources docker/resources
 
       - name: Get image name
         id: image_name
diff --git a/docker/Dockerfile.vcfpredict-0.9.8r1 b/docker/Dockerfile.vcfpredict-0.9.8r1
new file mode 100644
index 0000000..72d8ad4
--- /dev/null
+++ b/docker/Dockerfile.vcfpredict-0.9.8r1
@@ -0,0 +1,78 @@
+FROM mambaorg/micromamba:jammy
+
+LABEL maintainer="pricea35@cardiff.ac.uk" \
+about.summary="container for the vcf predict workflow"
+
+COPY bin/ /opt/bin/
+COPY resources/tuberculosis.fa ~/tuberculosis.fa
+
+ENV PATH=/opt/bin:$PATH
+
+ARG TBPROFILER_VER="5.0.1"
+
+# this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/
+# commits are found on https://github.com/jodyphelan/tbdb/commits/master
+# this was the latest commit as of 2023-10-26
+ARG TBDB_VER="e25540b"
+
+# install tb-profiler via bioconda; install into 'base' conda env
+RUN micromamba install --yes --name base --channel conda-forge --channel bioconda  \
+    tb-profiler=${TBPROFILER_VER} && \
+    micromamba clean --all --yes
+
+# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
+ENV PATH="/opt/conda/bin:${PATH}"
+
+# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json
+# can also run 'tb-profiler list_db' to find the same version info
+# In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER}
+RUN tb-profiler update_tbdb --commit ${TBDB_VER}
+
+ENV PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" \
+PYTHON="python3 python3-pip python3-dev"
+
+ENV vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417
+
+
+RUN apt-get update \
+&& DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \
+&& apt-get install -y $PACKAGES $PYTHON \
+&& apt-get install -y python3-packaging \
+&& git clone https://github.com/JeremyWesthead/VCFMIX.git \
+&& cd VCFMIX \
+&& git checkout ${vcfmix_version} \
+&& pip3 install recursive_diff \
+&& pip3 install . \
+&& cp -r data /usr/local/lib/python3.8/dist-packages \
+&& cd ..
+
+#taken and adapted from staphb/tbprofiler
+ARG TBPROFILER_VER="5.0.1"
+
+# this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/
+# commits are found on https://github.com/jodyphelan/tbdb/commits/master
+# this was the latest commit as of 2023-10-26
+ARG TBDB_VER="e25540b"
+
+# Install dependencies via apt-get; cleanup apt garbage
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    wget \
+    ca-certificates \
+    procps && \
+    apt-get autoclean && rm -rf /var/lib/apt/lists/*
+
+# install tb-profiler via bioconda; install into 'base' conda env
+RUN micromamba install --yes --name base --channel conda-forge --channel bioconda  \
+    tb-profiler=${TBPROFILER_VER} && \
+    micromamba clean --all --yes
+
+# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
+ENV PATH="/opt/conda/bin:${PATH}"
+
+# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json
+# can also run 'tb-profiler list_db' to find the same version info
+# In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER}
+RUN tb-profiler update_tbdb --commit ${TBDB_VER}
+
+#pre-add our TB reference
+RUN tb-profiler update_tbdb --match_ref ~/tuberculosis.fa
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 27a0fcd..9ed781c 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -98,7 +98,7 @@ profiles {
       withLabel:high_memory { memory = '18GB' }
 
       withLabel:preprocessing {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7r9"
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
       }
       
       withLabel:getversion{
@@ -121,11 +121,11 @@ profiles {
       }
 
        withLabel:clockwork {
-        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.7r3"
+        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8"
       }
 
       withLabel:vcfpredict {
-        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.7r3"
+        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8r1"
       }
     }
       params{
@@ -161,11 +161,11 @@ profiles {
       withLabel:high_memory { memory = '18GB' }
 
       withLabel:getversion {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7"
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
       }
 
       withLabel:preprocessing {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7"
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
       }
 
       withName:downloadContamGenomes {
@@ -182,11 +182,11 @@ profiles {
       }
 
       withLabel:clockwork {
-        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.7"
+        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8r1"
       }
 
       withLabel:vcfpredict {
-        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.7"
+        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8r1"
       }
 
     }
@@ -215,11 +215,11 @@ profiles {
       withLabel:high_memory { memory = '18GB' }
 
       withLabel:getversion {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7"
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
       }
 
       withLabel:preprocessing {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7"
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
       }
 
       withName:downloadContamGenomes {
@@ -235,11 +235,11 @@ profiles {
       }
 
       withLabel:clockwork {
-        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.7"
+        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8"
       }
 
       withLabel:vcfpredict {
-        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.7"
+        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8r1"
       }
     }
   }
@@ -264,11 +264,11 @@ profiles {
       withLabel:high_memory { memory = '18GB' }
 
       withLabel:getversion {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7"
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
       }
 
       withLabel:preprocessing {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.7"
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
       }
 
       withName:downloadContamGenomes {
@@ -284,11 +284,11 @@ profiles {
       }
 
        withLabel:clockwork {
-        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.7"
+        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8"
       }
 
       withLabel:vcfpredict {
-        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.7"
+        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8"
       }
     }
   }

From bef7e8c61fb83d7d3a2ce9af2cfc0c5b60568e16 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 7 Dec 2023 14:03:56 +0000
Subject: [PATCH 03/44] rm Dockerfile

---
 docker/Dockerfile.vcfpredict-0.9.8 | 51 ------------------------------
 1 file changed, 51 deletions(-)
 delete mode 100644 docker/Dockerfile.vcfpredict-0.9.8

diff --git a/docker/Dockerfile.vcfpredict-0.9.8 b/docker/Dockerfile.vcfpredict-0.9.8
deleted file mode 100644
index 68d928e..0000000
--- a/docker/Dockerfile.vcfpredict-0.9.8
+++ /dev/null
@@ -1,51 +0,0 @@
-FROM ubuntu:20.04
-
-LABEL maintainer="pricea35@cardiff.ac.uk" \
-about.summary="container for the vcf predict workflow"
-
-ENV PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" \
-PYTHON="python3 python3-pip python3-dev"
-
-ENV vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 \
-gumpy_version=1.0.15 \
-piezo_version=0.3 \
-gnomonicus_version=1.1.2 \
-tuberculosis_amr_catalogues=12d38733ad2e238729a3de9f725081e1d4872968
-
-COPY bin/ /opt/bin/
-ENV PATH=/opt/bin:$PATH
-
-
-RUN apt-get update \
-&& DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \
-&& apt-get install -y $PACKAGES $PYTHON \
-&& apt-get install -y python3-packaging \
-&& git clone https://github.com/JeremyWesthead/VCFMIX.git \
-&& cd VCFMIX \
-&& git checkout ${vcfmix_version} \
-&& pip3 install recursive_diff \
-&& pip3 install awscli \
-&& pip3 install . \
-&& cp -r data /usr/local/lib/python3.8/dist-packages \
-&& cd ..
-
-RUN curl -fsSL https://github.com/oxfordmmm/gumpy/archive/refs/tags/v${gumpy_version}.tar.gz | tar -xz \
-&& cd gumpy-${gumpy_version} \
-&& pip3 install . \
-&& cd ..
-
-RUN curl -fsSL https://github.com/oxfordmmm/piezo/archive/refs/tags/v${piezo_version}.tar.gz | tar -xz \
-&& cd piezo-${piezo_version} \
-&& pip3 install . \
-&& cd ..
-
-RUN curl -fsSL https://github.com/oxfordmmm/gnomonicus/archive/refs/tags/v${gnomonicus_version}.tar.gz | tar -xz \
-&& cd gnomonicus-${gnomonicus_version} \
-&& pip3 install . \
-&& cd ..
-
-RUN git clone https://github.com/oxfordmmm/tuberculosis_amr_catalogues.git \
-&& cd tuberculosis_amr_catalogues \
-&& git checkout ${tuberculosis_amr_catalogues} \
-&& cd ..
-

From 1b5187104328522e345e1fddc67918608648b285 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 7 Dec 2023 14:16:19 +0000
Subject: [PATCH 04/44] docker update

---
 docker/Dockerfile.vcfpredict-0.9.8r1 | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/Dockerfile.vcfpredict-0.9.8r1 b/docker/Dockerfile.vcfpredict-0.9.8r1
index 72d8ad4..72e7dee 100644
--- a/docker/Dockerfile.vcfpredict-0.9.8r1
+++ b/docker/Dockerfile.vcfpredict-0.9.8r1
@@ -15,6 +15,7 @@ ARG TBPROFILER_VER="5.0.1"
 # this was the latest commit as of 2023-10-26
 ARG TBDB_VER="e25540b"
 
+
 # install tb-profiler via bioconda; install into 'base' conda env
 RUN micromamba install --yes --name base --channel conda-forge --channel bioconda  \
     tb-profiler=${TBPROFILER_VER} && \

From a72c6d1d925e0be7cc525df666f1c8a4b1555bd7 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 7 Dec 2023 14:22:38 +0000
Subject: [PATCH 05/44] fa to fasta

---
 docker/Dockerfile.vcfpredict-0.9.8r1 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile.vcfpredict-0.9.8r1 b/docker/Dockerfile.vcfpredict-0.9.8r1
index 72e7dee..39874f3 100644
--- a/docker/Dockerfile.vcfpredict-0.9.8r1
+++ b/docker/Dockerfile.vcfpredict-0.9.8r1
@@ -4,7 +4,7 @@ LABEL maintainer="pricea35@cardiff.ac.uk" \
 about.summary="container for the vcf predict workflow"
 
 COPY bin/ /opt/bin/
-COPY resources/tuberculosis.fa ~/tuberculosis.fa
+COPY resources/tuberculosis.fasta ~/tuberculosis.fasta
 
 ENV PATH=/opt/bin:$PATH
 
@@ -76,4 +76,4 @@ ENV PATH="/opt/conda/bin:${PATH}"
 RUN tb-profiler update_tbdb --commit ${TBDB_VER}
 
 #pre-add our TB reference
-RUN tb-profiler update_tbdb --match_ref ~/tuberculosis.fa
\ No newline at end of file
+RUN tb-profiler update_tbdb --match_ref ~/tuberculosis.fasta
\ No newline at end of file

From 2a809fb9603013bc5f0f0c2cc3f35d76075debac Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Fri, 8 Dec 2023 10:25:54 +0000
Subject: [PATCH 06/44] tb-profiler in docker

---
 docker/Dockerfile.vcfpredict-0.9.8r1 | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docker/Dockerfile.vcfpredict-0.9.8r1 b/docker/Dockerfile.vcfpredict-0.9.8r1
index 39874f3..f781423 100644
--- a/docker/Dockerfile.vcfpredict-0.9.8r1
+++ b/docker/Dockerfile.vcfpredict-0.9.8r1
@@ -34,7 +34,6 @@ PYTHON="python3 python3-pip python3-dev"
 
 ENV vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417
 
-
 RUN apt-get update \
 && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \
 && apt-get install -y $PACKAGES $PYTHON \

From bf1a6c89886c697b22a1003f0e0b476d697ed9a3 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Fri, 8 Dec 2023 12:02:00 +0000
Subject: [PATCH 07/44] new container for tbprofiler

---
 docker/Dockerfile.tbprofiler-0.9.8 | 46 ++++++++++++++++++++++++++++++
 docker/Dockerfile.vcfpredict-0.9.8 | 22 ++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 docker/Dockerfile.tbprofiler-0.9.8
 create mode 100644 docker/Dockerfile.vcfpredict-0.9.8

diff --git a/docker/Dockerfile.tbprofiler-0.9.8 b/docker/Dockerfile.tbprofiler-0.9.8
new file mode 100644
index 0000000..a6d1671
--- /dev/null
+++ b/docker/Dockerfile.tbprofiler-0.9.8
@@ -0,0 +1,46 @@
+FROM mambaorg/micromamba:jammy
+
+LABEL maintainer="whalleyt@cardiff.ac.uk" \
+about.summary="container for the tb-profiler"
+
+COPY bin/ /opt/bin/
+COPY resources/tuberculosis.fasta ~/tuberculosis.fasta
+
+ENV PATH=/opt/bin:$PATH
+
+ARG TBPROFILER_VER="5.0.1"
+
+# this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/
+# commits are found on https://github.com/jodyphelan/tbdb/commits/master
+# this was the latest commit as of 2023-10-26
+ARG TBDB_VER="e25540b"
+
+# Install dependencies via apt-get; cleanup apt garbage
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    wget \
+    ca-certificates \
+    procps && \
+    apt-get autoclean && rm -rf /var/lib/apt/lists/*
+
+# install tb-profiler via bioconda; install into 'base' conda env
+RUN micromamba install --yes --name base --channel conda-forge --channel bioconda  \
+    tb-profiler=${TBPROFILER_VER} && \
+    micromamba clean --all --yes
+# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
+ENV PATH="/opt/conda/bin:${PATH}"
+
+# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json
+# can also run 'tb-profiler list_db' to find the same version info
+# In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER}
+RUN tb-profiler update_tbdb --commit ${TBDB_VER}
+
+# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
+ENV PATH="/opt/conda/bin:${PATH}"
+
+# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json
+# can also run 'tb-profiler list_db' to find the same version info
+# In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER}
+RUN tb-profiler update_tbdb --commit ${TBDB_VER}
+
+#pre-add our TB reference
+RUN tb-profiler update_tbdb --match_ref ~/tuberculosis.fasta
\ No newline at end of file
diff --git a/docker/Dockerfile.vcfpredict-0.9.8 b/docker/Dockerfile.vcfpredict-0.9.8
new file mode 100644
index 0000000..068303f
--- /dev/null
+++ b/docker/Dockerfile.vcfpredict-0.9.8
@@ -0,0 +1,22 @@
+FROM ubuntu:20.04
+
+LABEL maintainer="pricea35@cardiff.ac.uk" \
+about.summary="container for the vcf predict workflow"
+
+ENV PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" \
+PYTHON="python3 python3-pip python3-dev"
+
+ENV vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 \
+
+RUN apt-get update \
+&& DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \
+&& apt-get install -y $PACKAGES $PYTHON \
+&& apt-get install -y python3-packaging \
+&& git clone https://github.com/JeremyWesthead/VCFMIX.git \
+&& cd VCFMIX \
+&& git checkout ${vcfmix_version} \
+&& pip3 install recursive_diff \
+&& pip3 install awscli \
+&& pip3 install . \
+&& cp -r data /usr/local/lib/python3.8/dist-packages \
+&& cd ..
\ No newline at end of file

From 7ac2f3bca1a916f9e45c1c09c239dd333e9f0af1 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Fri, 8 Dec 2023 12:20:36 +0000
Subject: [PATCH 08/44] change base image of tbprofiler docker

---
 docker/Dockerfile.tbprofiler-0.9.8 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile.tbprofiler-0.9.8 b/docker/Dockerfile.tbprofiler-0.9.8
index a6d1671..873a407 100644
--- a/docker/Dockerfile.tbprofiler-0.9.8
+++ b/docker/Dockerfile.tbprofiler-0.9.8
@@ -1,4 +1,4 @@
-FROM mambaorg/micromamba:jammy
+FROM mambaorg/micromamba:1.3.0
 
 LABEL maintainer="whalleyt@cardiff.ac.uk" \
 about.summary="container for the tb-profiler"
@@ -43,4 +43,4 @@ ENV PATH="/opt/conda/bin:${PATH}"
 RUN tb-profiler update_tbdb --commit ${TBDB_VER}
 
 #pre-add our TB reference
-RUN tb-profiler update_tbdb --match_ref ~/tuberculosis.fasta
\ No newline at end of file
+RUN tb-profiler update_tbdb --match_ref ~/tuberculosis.fasta

From df55e2ca94a03f05fe72e2e88f5613428ed669a4 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Fri, 8 Dec 2023 13:34:26 +0000
Subject: [PATCH 09/44] tb-profiler container

---
 docker/Dockerfile.tbprofiler-0.9.8   | 36 +++++++------
 docker/Dockerfile.vcfpredict-0.9.8r1 | 78 ----------------------------
 2 files changed, 20 insertions(+), 94 deletions(-)
 delete mode 100644 docker/Dockerfile.vcfpredict-0.9.8r1

diff --git a/docker/Dockerfile.tbprofiler-0.9.8 b/docker/Dockerfile.tbprofiler-0.9.8
index 873a407..468d706 100644
--- a/docker/Dockerfile.tbprofiler-0.9.8
+++ b/docker/Dockerfile.tbprofiler-0.9.8
@@ -1,12 +1,10 @@
-FROM mambaorg/micromamba:1.3.0
+FROM mambaorg/micromamba:1.3.0 as app
 
-LABEL maintainer="whalleyt@cardiff.ac.uk" \
-about.summary="container for the tb-profiler"
+#copy the reference genome to pre-compute our index
+COPY resources/tuberculosis.fasta /data/tuberculosis.fasta
 
-COPY bin/ /opt/bin/
-COPY resources/tuberculosis.fasta ~/tuberculosis.fasta
-
-ENV PATH=/opt/bin:$PATH
+USER root
+WORKDIR /
 
 ARG TBPROFILER_VER="5.0.1"
 
@@ -15,6 +13,19 @@ ARG TBPROFILER_VER="5.0.1"
 # this was the latest commit as of 2023-10-26
 ARG TBDB_VER="e25540b"
 
+# LABEL instructions tag the image with metadata that might be important to the user
+LABEL base.image="micromamba:1.3.0"
+LABEL dockerfile.version="1"
+LABEL software="tbprofiler"
+LABEL software.version="${TBPROFILER_VER}"
+LABEL description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database."
+LABEL website="https://github.com/jodyphelan/TBProfiler/"
+LABEL license="https://github.com/jodyphelan/TBProfiler/blob/master/LICENSE"
+LABEL maintainer="John Arnn"
+LABEL maintainer.email="jarnn@utah.gov"
+LABEL maintainer2="Curtis Kapsak"
+LABEL maintainer2.email="kapsakcj@gmail.com"
+
 # Install dependencies via apt-get; cleanup apt garbage
 RUN apt-get update && apt-get install -y --no-install-recommends \
     wget \
@@ -26,13 +37,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 RUN micromamba install --yes --name base --channel conda-forge --channel bioconda  \
     tb-profiler=${TBPROFILER_VER} && \
     micromamba clean --all --yes
-# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
-ENV PATH="/opt/conda/bin:${PATH}"
-
-# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json
-# can also run 'tb-profiler list_db' to find the same version info
-# In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER}
-RUN tb-profiler update_tbdb --commit ${TBDB_VER}
 
 # hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
 ENV PATH="/opt/conda/bin:${PATH}"
@@ -42,5 +46,5 @@ ENV PATH="/opt/conda/bin:${PATH}"
 # In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER}
 RUN tb-profiler update_tbdb --commit ${TBDB_VER}
 
-#pre-add our TB reference
-RUN tb-profiler update_tbdb --match_ref ~/tuberculosis.fasta
+WORKDIR /data
+RUN tb-profiler update_tbdb --match_ref tuberculosis.fasta
diff --git a/docker/Dockerfile.vcfpredict-0.9.8r1 b/docker/Dockerfile.vcfpredict-0.9.8r1
deleted file mode 100644
index f781423..0000000
--- a/docker/Dockerfile.vcfpredict-0.9.8r1
+++ /dev/null
@@ -1,78 +0,0 @@
-FROM mambaorg/micromamba:jammy
-
-LABEL maintainer="pricea35@cardiff.ac.uk" \
-about.summary="container for the vcf predict workflow"
-
-COPY bin/ /opt/bin/
-COPY resources/tuberculosis.fasta ~/tuberculosis.fasta
-
-ENV PATH=/opt/bin:$PATH
-
-ARG TBPROFILER_VER="5.0.1"
-
-# this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/
-# commits are found on https://github.com/jodyphelan/tbdb/commits/master
-# this was the latest commit as of 2023-10-26
-ARG TBDB_VER="e25540b"
-
-
-# install tb-profiler via bioconda; install into 'base' conda env
-RUN micromamba install --yes --name base --channel conda-forge --channel bioconda  \
-    tb-profiler=${TBPROFILER_VER} && \
-    micromamba clean --all --yes
-
-# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
-ENV PATH="/opt/conda/bin:${PATH}"
-
-# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json
-# can also run 'tb-profiler list_db' to find the same version info
-# In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER}
-RUN tb-profiler update_tbdb --commit ${TBDB_VER}
-
-ENV PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" \
-PYTHON="python3 python3-pip python3-dev"
-
-ENV vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417
-
-RUN apt-get update \
-&& DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \
-&& apt-get install -y $PACKAGES $PYTHON \
-&& apt-get install -y python3-packaging \
-&& git clone https://github.com/JeremyWesthead/VCFMIX.git \
-&& cd VCFMIX \
-&& git checkout ${vcfmix_version} \
-&& pip3 install recursive_diff \
-&& pip3 install . \
-&& cp -r data /usr/local/lib/python3.8/dist-packages \
-&& cd ..
-
-#taken and adapted from staphb/tbprofiler
-ARG TBPROFILER_VER="5.0.1"
-
-# this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/
-# commits are found on https://github.com/jodyphelan/tbdb/commits/master
-# this was the latest commit as of 2023-10-26
-ARG TBDB_VER="e25540b"
-
-# Install dependencies via apt-get; cleanup apt garbage
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    wget \
-    ca-certificates \
-    procps && \
-    apt-get autoclean && rm -rf /var/lib/apt/lists/*
-
-# install tb-profiler via bioconda; install into 'base' conda env
-RUN micromamba install --yes --name base --channel conda-forge --channel bioconda  \
-    tb-profiler=${TBPROFILER_VER} && \
-    micromamba clean --all --yes
-
-# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
-ENV PATH="/opt/conda/bin:${PATH}"
-
-# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json
-# can also run 'tb-profiler list_db' to find the same version info
-# In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER}
-RUN tb-profiler update_tbdb --commit ${TBDB_VER}
-
-#pre-add our TB reference
-RUN tb-profiler update_tbdb --match_ref ~/tuberculosis.fasta
\ No newline at end of file

From 2a8d5e82d8d46ad6dbe2b0e22158192b9b542459 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Tue, 12 Dec 2023 13:30:50 +0000
Subject: [PATCH 10/44] try another docker push

---
 docker/Dockerfile.tbprofiler-0.9.8 | 1 +
 docker/Dockerfile.vcfpredict-0.9.8 | 1 +
 2 files changed, 2 insertions(+)

diff --git a/docker/Dockerfile.tbprofiler-0.9.8 b/docker/Dockerfile.tbprofiler-0.9.8
index 468d706..bcbffad 100644
--- a/docker/Dockerfile.tbprofiler-0.9.8
+++ b/docker/Dockerfile.tbprofiler-0.9.8
@@ -8,6 +8,7 @@ WORKDIR /
 
 ARG TBPROFILER_VER="5.0.1"
 
+
 # this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/
 # commits are found on https://github.com/jodyphelan/tbdb/commits/master
 # this was the latest commit as of 2023-10-26
diff --git a/docker/Dockerfile.vcfpredict-0.9.8 b/docker/Dockerfile.vcfpredict-0.9.8
index 068303f..ca94910 100644
--- a/docker/Dockerfile.vcfpredict-0.9.8
+++ b/docker/Dockerfile.vcfpredict-0.9.8
@@ -8,6 +8,7 @@ PYTHON="python3 python3-pip python3-dev"
 
 ENV vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417 \
 
+#apt updates
 RUN apt-get update \
 && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \
 && apt-get install -y $PACKAGES $PYTHON \

From 76fc0c8874d35d0fd4c06eaf68caf415c56b026f Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 13 Dec 2023 13:46:46 +0000
Subject: [PATCH 11/44] tb-profiler db and run on vcf

---
 main.nf                      |  4 ++--
 modules/vcfpredictModules.nf | 36 ++++++++++++++++++++++++++++++++++++
 workflows/clockwork.nf       |  1 +
 workflows/vcfpredict.nf      | 29 ++++++++++++-----------------
 4 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/main.nf b/main.nf
index 140be73..9b46430 100644
--- a/main.nf
+++ b/main.nf
@@ -205,9 +205,9 @@ workflow {
 
       mpileup_vcf = clockwork.out.mpileup_vcf
       minos_vcf = clockwork.out.minos_vcf
-      genbank = channel.fromPath(params.gnomonicus_genbank)
+      reference = clockwork.out.reference
 
-      vcfpredict(mpileup_vcf, minos_vcf, genbank)
+      vcfpredict(mpileup_vcf, minos_vcf, reference)
 
 }
 
diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf
index cee38b6..7a1f382 100644
--- a/modules/vcfpredictModules.nf
+++ b/modules/vcfpredictModules.nf
@@ -48,6 +48,42 @@ process vcfmix {
     """
 }
 
+process tbprofiler_update_db {
+    label 'low_memory'
+    label 'low_cpu'
+    label 'tbprofiler'
+
+    input:
+    path(reference)
+
+    script:
+    """
+    tb-profiler update_tbdb --match_ref $reference
+    """
+}
+
+process tbprofiler {
+    label 'medium_memory'
+    label 'medium_cpu'
+    label 'tbprofiler'
+
+    input:
+    val(sample_name)
+    path(minos_vcf)
+
+    output:
+    path("results/tbprofiler.results.json")
+
+    when:
+    isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/
+
+    script:
+    """
+    bgzip ${minos_vcf}
+    tb-profiler profile --vcf ${minos_vcf}.gz --threads ${task.cpus}
+    """
+}
+
 process gnomonicus {
 
     tag {sample_name}
diff --git a/workflows/clockwork.nf b/workflows/clockwork.nf
index 3ffbaa0..bf6f5e1 100644
--- a/workflows/clockwork.nf
+++ b/workflows/clockwork.nf
@@ -39,5 +39,6 @@ workflow clockwork {
 
       mpileup_vcf = callVarsMpileup.out.mpileup_vcf.join(minos.out.minos_report, by: 0)
       minos_vcf = minos.out.minos_vcf.join(alignToRef.out.alignToRef_report, by: 0)
+      reference = getRefFromJSON.out
 
 }
diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf
index 9efc651..2006e73 100644
--- a/workflows/vcfpredict.nf
+++ b/workflows/vcfpredict.nf
@@ -3,36 +3,31 @@ nextflow.enable.dsl = 2
 
 // import modules
 include {vcfmix} from '../modules/vcfpredictModules.nf' params(params)
-include {gnomonicus} from '../modules/vcfpredictModules.nf' params(params)
-include {finalJson} from '../modules/vcfpredictModules.nf' params(params)
+include {tbprofiler} from '../modules/vcfpredictModules.nf' params(params)
+include {tbprofiler_update_db} from '../modules/vcfpredictModules.nf' params(params)
 
 // define workflow component
 workflow vcfpredict {
 
     take:
-
-      clockwork_bcftools
-      clockwork_minos
-      genbank
+      clockwork_bcftools_tuple
+      minos_vcf_tuple
+      reference_fasta
 
     main:
 
       if ( params.vcfmix == "yes" ) {
 
-          vcfmix(clockwork_bcftools)
+          vcfmix(clockwork_bcftools_tuple)
 
       }
 
-      if ( params.gnomonicus == "yes" ) {
-
-          gnomonicus(clockwork_minos, genbank)
+      if ( params.resistance_profiler == "tb-profiler"){
+        //get just the vcf
+        minos_vcf = minos_vcf_tuple.map{it[1]}
+        sample_name = minos_vcf_tuple.map{it[0]}
 
+        tbprofiler_update_db(reference_fasta)
+        tbprofiler(sample_name, minos_vcf)
       }
-
-      if ( (params.vcfmix == "yes") && (params.gnomonicus == "yes") ) {
-
-          finalJson(vcfmix.out.vcfmix_json.join(gnomonicus.out.gnomon_json, by: 0))
-
-      }
-
 }

From ee5ab97bdfa62c092456728a6bd948a754442e42 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 14 Dec 2023 11:26:54 +0000
Subject: [PATCH 12/44] update config to remove gnomonicus params

---
 nextflow.config | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 9ed781c..5a4e0f5 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -43,16 +43,9 @@ params {
 
   // run VCFMIX 'yes' or 'no' (set to no for synthetic samples)
   vcfmix = 'yes'
-
-  // run gnomonicus 'yes' or 'no'
-  gnomonicus = 'yes'
   
   resistance_profiler = "tb-profiler"
 
-  // path to AMR catalogue for gnomon
-  // https://github.com/oxfordmmm/tuberculosis_amr_catalogues available at path /tuberculosis_amr_catalogues in container
-  amr_cat = "/tuberculosis_amr_catalogues/catalogues/NC_000962.3/NC_000962.3_WHO-UCN-GTB-PCI-2021.7_v1.0_GARC1_RUS.csv"
-
   // path to singularity recipes directory (needed to strip software versions in getversion)
   sing_dir = "${baseDir}/singularity"
 
@@ -65,8 +58,6 @@ params {
   //path to resources directory
   resource_dir = "${baseDir}/resources"
   refseq = "${resource_dir}/assembly_summary_refseq.txt"
-  gnomonicus_genbank = "${resource_dir}/H37rV_v3.gbk"
-
 }
 
 profiles {
@@ -135,9 +126,7 @@ profiles {
           afanc_myco_db = "s3://microbial-bioin-sp3/Mycobacteriaciae_DB_7.0/"
           
           resource_dir = "s3://microbial-bioin-sp3/lodestone_resources"
-          refseq = "${resource_dir}/assembly_summary_refseq.txt"
-          gnomonicus_genbank = "${resource_dir}/H37rV_v3.gbk"
-          
+          refseq = "${resource_dir}/assembly_summary_refseq.txt"        
       }
   }
   singularity {

From cc39928e26c0c117127984f16f5f0c2db057ff78 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Mon, 8 Jan 2024 19:24:10 +0000
Subject: [PATCH 13/44] change logic of end condition

---
 modules/vcfpredictModules.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf
index 7a1f382..776ff0a 100644
--- a/modules/vcfpredictModules.nf
+++ b/modules/vcfpredictModules.nf
@@ -33,7 +33,7 @@ process vcfmix {
 
     jq -s ".[0] * .[1]" ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}
 
-    if [ ${params.gnomonicus} == "no" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}; fi
+    if [ ${params.resistance_profiler} == "none" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}; fi
     """
 
     stub:

From d6af590e2b0bf368e242d1eeb668f4aac96cb4de Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Mon, 8 Jan 2024 19:50:44 +0000
Subject: [PATCH 14/44] python paths in vcfmix docker

---
 docker/Dockerfile.vcfpredict-0.9.8 |  4 ++++
 nextflow.config                    | 24 +++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/docker/Dockerfile.vcfpredict-0.9.8 b/docker/Dockerfile.vcfpredict-0.9.8
index ca94910..3139c59 100644
--- a/docker/Dockerfile.vcfpredict-0.9.8
+++ b/docker/Dockerfile.vcfpredict-0.9.8
@@ -3,6 +3,10 @@ FROM ubuntu:20.04
 LABEL maintainer="pricea35@cardiff.ac.uk" \
 about.summary="container for the vcf predict workflow"
 
+#add run-vcf to container
+COPY bin/ /opt/bin/
+ENV PATH=/opt/bin:$PATH
+
 ENV PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" \
 PYTHON="python3 python3-pip python3-dev"
 
diff --git a/nextflow.config b/nextflow.config
index 5a4e0f5..9b76d20 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -58,6 +58,7 @@ params {
   //path to resources directory
   resource_dir = "${baseDir}/resources"
   refseq = "${resource_dir}/assembly_summary_refseq.txt"
+  container_enabled = "false"
 }
 
 profiles {
@@ -92,6 +93,10 @@ profiles {
         container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
       }
       
+      withLabel:tbprofiler {
+      container = "twhalley93/tb-profiler:latest"
+      }
+      
       withLabel:getversion{
        executor = "local"
       }
@@ -116,10 +121,12 @@ profiles {
       }
 
       withLabel:vcfpredict {
-        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8r1"
+        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8"
       }
     }
       params{
+          container_enabled = "true"
+          
           bowtie2_index = "s3://microbial-bioin-sp3/bowtie_hg19"
           bowtie_index_name = "hg19_1kgmaj"
           kraken_db = "s3://microbial-bioin-sp3/kraken_pluspf_16gb/"
@@ -133,6 +140,7 @@ profiles {
   
     params{
     resource_dir = "/resources"
+    container_enabled = "true"
     }
 
     singularity.enabled = 'true'
@@ -156,6 +164,10 @@ profiles {
       withLabel:preprocessing {
         container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
       }
+      
+      withLabel:tbprofiler {
+          container = "twhalley93/tb-profiler:latest"
+      }
 
       withName:downloadContamGenomes {
         shell = ['/bin/bash','-u']
@@ -191,6 +203,7 @@ profiles {
     
     params{
     resource_dir = "/resources"
+    container_enabled = "true"
     }
 
     process {
@@ -210,6 +223,10 @@ profiles {
       withLabel:preprocessing {
         container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
       }
+      
+      withLabel:tbprofiler {
+        container = "twhalley93/tb-profiler:latest"
+      }
 
       withName:downloadContamGenomes {
         shell = ['/bin/bash','-u']
@@ -241,6 +258,7 @@ profiles {
     runOptions = "-u \$(id -u):\$(id -g)"
     
     params{
+    container_enabled = "true"
     resource_dir = "/resources"
     }
 
@@ -279,6 +297,10 @@ profiles {
       withLabel:vcfpredict {
         container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8"
       }
+     
+      withLabel:tbprofiler {
+        container = "twhalley93/tb-profiler:latest"
+      }
     }
   }
 }

From eea6e25ef61c0959c2c96839b618c2857ac645ec Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Tue, 9 Jan 2024 16:21:36 +0000
Subject: [PATCH 15/44] linted indents

---
 main.nf | 51 +++++++++++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/main.nf b/main.nf
index 9b46430..0a9cbdf 100644
--- a/main.nf
+++ b/main.nf
@@ -36,24 +36,24 @@ Produces as output one directory per sample, containing the relevant reports & a
 Mandatory and conditional parameters:
 ------------------------------------------------------------------------
 --input_dir           Directory containing fastq OR bam files. Workflow will process one or the other, so don't mix
---filetype	      File type in input_dir. One of either "fastq" or "bam". fastq files can be gzipped and do not
+--filetype            File type in input_dir. One of either "fastq" or "bam". fastq files can be gzipped and do not
                       have to literally take the form "*.fastq"; see --pattern
 --pattern             Regex to match files in input_dir, e.g. "*_R{1,2}.fq.gz". Only mandatory if --filetype is "fastq"
 --output_dir          Output directory, in which will be created subdirectories matching base name of fastq/bam files
---unmix_myco	      Do you want to disambiguate mixed-mycobacterial samples by read alignment? One of "yes" or "no"
-	              If "yes" workflow will remove reads mapping to any minority mycobacterial genomes but in doing so
+--unmix_myco          Do you want to disambiguate mixed-mycobacterial samples by read alignment? One of "yes" or "no"
+                      If "yes" workflow will remove reads mapping to any minority mycobacterial genomes but in doing so
                       WILL ALMOST CERTAINLY ALSO reduce coverage of the principal species
-	              If "no" then mixed-mycobacterial samples will be left alone. Mixtures of mycobacteria + non-mycobacteria
+                      If "no" then mixed-mycobacterial samples will be left alone. Mixtures of mycobacteria + non-mycobacteria
                       will still be disambiguated
 --kraken_db           Directory containing Kraken2 database files (obtain from https://benlangmead.github.io/aws-indexes/k2)
 --bowtie2_index       Directory containing Bowtie2 index (obtain from ftp://ftp.ccb.jhu.edu/pub/data/bowtie2_indexes/hg19_1kgmaj_bt2.zip
                       This is the Langmead lab pre-built major-allele-SNP reference; see https://github.com/BenLangmead/bowtie-majref)
 --bowtie_index_name   Name of the bowtie index, e.g. hg19_1kgmaj
---vcfmix	          Run VFCMIX "yes" or "no". Should be set to "no" for synthetic samples
+--vcfmix              Run VFCMIX "yes" or "no". Should be set to "no" for synthetic samples
 --resistance_profiler Tool to profile resistance with. At the moment options are "tb-profiler" or "none"
 --amr_cat             Path to the AMR catalogue (https://github.com/oxfordmmm/tuberculosis_amr_catalogues is at /tuberculosis_amr_catalogues
                       in the vcfpredict container)
---afanc_myco_db	      Path to the Afanc database used for speciation. Obtain from https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_3.0.tar.gz
+--afanc_myco_db       Path to the Afanc database used for speciation. Obtain from https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_3.0.tar.gz
 
 Optional parameters:
 ------------------------------------------------------------------------
@@ -63,17 +63,17 @@ Optional parameters:
                    default: null
                    using this parameter will apply an additional sanity test to your sample
 
-	           if you DO NOT use this parameter (default option), pipeline will determine principal species from
+                   if you DO NOT use this parameter (default option), pipeline will determine principal species from
                    the reads and consider any other species a contaminant
 
-	           if you DO use this parameter, pipeline will expect this to be the principal species. It will fail
-		   the sample if reads from this species are not actually the majority
+                   If you DO use this parameter, pipeline will expect this to be the principal species. It will fail
+                   the sample if reads from this species are not actually the majority
 
 
 Profiles:
 ------------------------------------------------------------------------
 singularity        to run with singularity
-docker		   to run with docker
+docker             to run with docker
 
 
 Examples:
@@ -88,10 +88,18 @@ nextflow run main.nf -profile docker --filetype bam --input_dir bam_dir --unmix_
 
 resistance_profilers = ["tb-profiler", "none"]
 
- if(!resistance_profilers.contains(params.resistance_profiler)){
+if(!resistance_profilers.contains(params.resistance_profiler)){
     exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler" or "none" to skip.'
     }
 
+//tbprofiler container already has the reference genome in the DB, so skip if using docker
+if((params.resistance_profiler == "tb-profiler") && (params.container_enabled == true)) {
+    update_tbprofiler = true
+} else {
+    update_tbprofiler = false
+}
+
+resistance_profiler = params.resistance_profiler
 
 // confirm that mandatory parameters have been set and that the conditional parameter, --pattern, has been used appropriately
 if ( params.input_dir == "" ) {
@@ -125,18 +133,17 @@ M Y C O B A C T E R I A L  P I P E L I N E
 
 Parameters used:
 ------------------------------------------------------------------------
---input_dir		${params.input_dir}
---filetype		${params.filetype}
---pattern		${params.pattern}
---output_dir	        ${params.output_dir}
---unmix_myco	        ${params.unmix_myco}
---kraken_db		${params.kraken_db}
+--input_dir             ${params.input_dir}
+--filetype              ${params.filetype}
+--pattern               ${params.pattern}
+--output_dir            ${params.output_dir}
+--unmix_myco            ${params.unmix_myco}
+--kraken_db             ${params.kraken_db}
 --bowtie2_index         ${params.bowtie2_index}
 --bowtie_index_name     ${params.bowtie_index_name}
---species		${params.species}
---vcfmix		${params.vcfmix}
---gnomonicus		${params.gnomonicus}
---amr_cat		${params.amr_cat}
+--resistance_profiler   ${params.resistance_profiler}
+--species               ${params.species}
+--vcfmix                ${params.vcfmix}
 --afanc_myco_db         ${params.afanc_myco_db}
 
 Runtime data:
@@ -207,7 +214,7 @@ workflow {
       minos_vcf = clockwork.out.minos_vcf
       reference = clockwork.out.reference
 
-      vcfpredict(mpileup_vcf, minos_vcf, reference)
+      vcfpredict(mpileup_vcf, minos_vcf, reference, resistance_profiler, update_tbprofiler)
 
 }
 

From f6783dba1ca08a10ce9af9cc8807374accbf74f2 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 10 Jan 2024 10:12:02 +0000
Subject: [PATCH 16/44] update readme, tidy up and remove gnomonicus references

---
 README.md | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 5c44791..4fac53e 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ Pipeline cleans and QCs reads with fastp and FastQC, classifies with Kraken2 & A
 
 Note that while Mykrobe is included within this pipeline, it runs as an independent process and is not used for any downstream reporting.
 
-**WARNING**: There are currently known errors with vcfmix and gnomonicus, as such `errorStrategy 'ignore'` has been added to the processes vcfpredict:vcfmix and vcfpredict:gnomonicus to stop the pipeline from crashing. Please check the stdout from nextflow to see whether these processes have ran successfully.
+**WARNING**: There are currently known errors with vcfmix, as such `errorStrategy 'ignore'` has been added to the processes vcfpredict:vcfmix to stop the pipeline from crashing. Please check the stdout from nextflow to see whether these processes have ran successfully.
 
 ## Quick Start ## 
 This is a Nextflow DSL2 pipeline, it requires a version of Nextflow that supports DSL2 and the stub-run feature. It is recommended to run the pipeline with  `NXF_VER=20.11.0-edge`, as the pipeline has been tested using this version. E.g. to download
@@ -29,6 +29,8 @@ NXF_VER=20.11.0-edge nextflow run main.nf -profile docker --filetype bam --input
 --output_dir . --kraken_db /path/to/database --bowtie2_index /path/to/index --bowtie_index_name hg19_1kgmaj
 ```
 
+There is also a pre-configured climb profile to run Lodestone on a CLIMB Jupyter Notebook Server. Add ```-profile climb``` to your command invocation. The input directory can point to an S3 bucket natively (e.g. ```--input_dir s3://my-team/bucket```). By default this will run the workflow in Docker containers and take advantage of kubernetes pods. The Kraken2, Bowtie2 and Afanc databases will by default point to the ```pluspf16```, ```hg19_1kgmaj_bt2``` and ```Mycobacteriaciae_DB_7.0``` directories by default. These are mounted on a public S3 bucket hosted on CLIMB.
+
 ### Executors ###
 
 By default, the pipeline will just run on the local machine. To run on a cluster, modifications will have to be made to the `nextflow.config` to add in the executor. E.g. for a SLURM cluster add `process.executor = 'slurm'`. For more information on executor options see the Nextflow docs: https://www.nextflow.io/docs/latest/executor.html
@@ -63,10 +65,8 @@ Directory containing Bowtie2 index (obtain from ftp://ftp.ccb.jhu.edu/pub/data/b
 Name of the bowtie index, e.g. hg19_1kgmaj<br />
 * **vcfmix**<br />
 Run [vcfmix](https://github.com/AlexOrlek/VCFMIX), yes or no. Set to no for synthetic samples<br />
-* **gnomonicus**<br />
-Run [gnomonicus](https://github.com/oxfordmmm/gnomonicus), yes or no<br />
-* **amr_cat**<br />
-Path to AMR catalogue for gnomonicus<br />
+* **resistance_profiler**<br />
+Run resistance profiling for Mycobacterium tubercuclosis. Either ["tb-profiler"](https://tbdr.lshtm.ac.uk/) or "none".
 * **afanc_myco_db**<br />Path to the [afanc](https://github.com/ArthurVM/Afanc) database used for speciation. Obtain from  https://s3.climb.ac.uk/microbial-bioin-sp3/Mycobacteriaciae_DB_7.0.tar.gz
 <br />
 
@@ -125,10 +125,7 @@ process clockwork:alignToRef\
 25. (Fail) If < 50% of the reference genome was covered at 10-fold depth
 
 process clockwork:minos\
-26. (Warn) If sample is not TB, then it is not passed to gnomonicus
-
-## Running on CLIMB Jupyter Hub
-There is a pre-configured climb profile to run Lodestone on a CLIMB Jupyter Notebook Server. Add ```profile climb``` to your command invocation. The input directory can point to an S3 bucket natively (e.g. ```--input_dir s3://my-team/bucket```). By default this will run the workflow in Docker containers and take advantage of kubernetes pods. The Kraken2, Bowtie2 and Afanc databases will by default point to the ```pluspf16```, ```hg19_1kgmaj_bt2``` and ```Mycobacteriaciae_DB_7.0``` respectively. These are mounted on a public shared volume.
+26. (Warn) If sample is not TB, then it is not passed to a resistance profiler
 
 ## Acknowledgements ##
 For a list of direct authors of this pipeline, please see the contributors list. All of the software dependencies of this pipeline are recorded in the version.json

From 7851f5979c37286edc6293af0853f54d3f00528c Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 10 Jan 2024 11:18:17 +0000
Subject: [PATCH 17/44] remove reference to gnomonicus

---
 modules/clockworkModules.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/clockworkModules.nf b/modules/clockworkModules.nf
index 4a2675d..d793a34 100644
--- a/modules/clockworkModules.nf
+++ b/modules/clockworkModules.nf
@@ -206,7 +206,7 @@ process callVarsCortex {
 
 process minos {
     /**
-    * @QCcheckpoint check if top species is TB, if yes pass vcf to gnomonicus
+    * @QCcheckpoint check if top species is TB, if yes pass vcf to resistance profiling
     */
 
     tag { sample_name }
@@ -241,7 +241,7 @@ process minos {
 
     cp ${sample_name}_report.json ${sample_name}_report_previous.json
 
-    if [[ \$top_hit =~ ^"Mycobacterium tuberculosis" ]]; then printf "CREATE_ANTIBIOGRAM_${sample_name}"; else echo '{"gnomonicus-warning":"sample is not TB so cannot produce antibiogram using gnomonicus"}' | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
+    if [[ \$top_hit =~ ^"Mycobacterium tuberculosis" ]]; then printf "CREATE_ANTIBIOGRAM_${sample_name}"; else echo '{"resistance-profiling-warning":"sample is not TB so cannot produce antibiogram using resistance profiling tools"}' | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
     """
 
     stub:
@@ -296,7 +296,7 @@ process gvcf {
 
     cp ${sample_name}_report.json ${sample_name}_report_previous.json
 
-    if [ ${params.vcfmix} == "no" ] && [ ${params.gnomonicus} == "no" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
+    if [ ${params.vcfmix} == "no" ] && [ ${params.resistance_profiler} == "none" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
     """
 
     stub:

From 3245b569662d0a526a6787545c13b1f34510dbe5 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 10 Jan 2024 15:52:58 +0000
Subject: [PATCH 18/44] push tbpofiler docker

---
 docker/Dockerfile.tbprofiler-0.9.8 | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docker/Dockerfile.tbprofiler-0.9.8 b/docker/Dockerfile.tbprofiler-0.9.8
index bcbffad..468d706 100644
--- a/docker/Dockerfile.tbprofiler-0.9.8
+++ b/docker/Dockerfile.tbprofiler-0.9.8
@@ -8,7 +8,6 @@ WORKDIR /
 
 ARG TBPROFILER_VER="5.0.1"
 
-
 # this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/
 # commits are found on https://github.com/jodyphelan/tbdb/commits/master
 # this was the latest commit as of 2023-10-26

From 7c7968068542c54dc94a4eb5624be2ad6f10b0a1 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 10 Jan 2024 15:53:51 +0000
Subject: [PATCH 19/44] nextflow config for docker tbprofiler

---
 nextflow.config | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 9b76d20..d268fb4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -94,7 +94,7 @@ profiles {
       }
       
       withLabel:tbprofiler {
-      container = "twhalley93/tb-profiler:latest"
+      container = "quay.io/pathogen-genomics-cymru//tb-profiler:latest"
       }
       
       withLabel:getversion{
@@ -166,7 +166,7 @@ profiles {
       }
       
       withLabel:tbprofiler {
-          container = "twhalley93/tb-profiler:latest"
+          container = "quay.io/pathogen-genomics-cymru//tb-profiler:latest"
       }
 
       withName:downloadContamGenomes {
@@ -225,7 +225,7 @@ profiles {
       }
       
       withLabel:tbprofiler {
-        container = "twhalley93/tb-profiler:latest"
+        container = "quay.io/pathogen-genomics-cymru//tb-profiler:latest"
       }
 
       withName:downloadContamGenomes {
@@ -299,7 +299,7 @@ profiles {
       }
      
       withLabel:tbprofiler {
-        container = "twhalley93/tb-profiler:latest"
+        container = "quay.io/pathogen-genomics-cymru//tb-profiler:latest"
       }
     }
   }

From dc9ff40f3ce3a224909d36b39390d1a3413dc123 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 10 Jan 2024 16:43:45 +0000
Subject: [PATCH 20/44] add config to tidy up main config

---
 config/containers.config |  51 ++++++++
 nextflow.config          | 262 +++++----------------------------------
 2 files changed, 84 insertions(+), 229 deletions(-)
 create mode 100644 config/containers.config

diff --git a/config/containers.config b/config/containers.config
new file mode 100644
index 0000000..ae30606
--- /dev/null
+++ b/config/containers.config
@@ -0,0 +1,51 @@
+params{
+    container_enabled = "true"
+    pipeline_version = 0.9.8
+    container_enabled = "true"
+    resource_dir = "/resources"
+}
+
+
+process {
+    errorStrategy = 'ignore'
+    update_tbprofiler = "false"
+      
+      
+    withLabel:low_cpu {cpus = 2}
+    withLabel:normal_cpu { cpus = 8 }
+    withLabel:low_memory { memory = '5GB' }
+    withLabel:medium_memory { memory = '10GB' }
+    withLabel:high_memory { memory = '18GB' }
+
+    withLabel:getversion {
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:${pipeline_version}"
+    }
+
+    withLabel:preprocessing {
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:${pipeline_version}"
+    }
+      
+    withLabel:tbprofiler {
+        container = "quay.io/pathogen-genomics-cymru//tb-profiler:${pipeline_version}"
+    }
+
+    withName:downloadContamGenomes {
+        shell = ['/bin/bash','-u']
+        errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
+        maxRetries = 5
+   }
+
+    withLabel:retryAfanc {
+	    shell = ['/bin/bash','-u']
+        errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' }
+        maxRetries = 5
+    }
+
+    withLabel:clockwork {
+        container = "quay.io/pathogen-genomics-cymru/clockwork:${pipeline_version}"
+    }
+
+    withLabel:vcfpredict {
+        container = "quay.io/pathogen-genomics-cymru/vcfpredict:${pipeline_version}"
+    }
+ }
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index d268fb4..4747852 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,13 +1,3 @@
-// config for lodestone
-
-manifest {
-    name = "pathogen-genomics-cymru/lodestone"
-}
-
-
-trace.overwrite = true
-report.overwrite = true
-
 params {
 
   // help message
@@ -45,6 +35,7 @@ params {
   vcfmix = 'yes'
   
   resistance_profiler = "tb-profiler"
+  update_tbprofiler = "true"
 
   // path to singularity recipes directory (needed to strip software versions in getversion)
   sing_dir = "${baseDir}/singularity"
@@ -62,245 +53,58 @@ params {
 }
 
 profiles {
-  climb {
-    
-    //this is pre-defined in the CLIMB nextflow.config; however it has been added to allow
-    //-profile climb to still work outside of CLIMB system (e.g. to access S3 buckets)
-    aws {
-      profile = "climb"
-    client { 
-        endpoint = 'https://s3.climb.ac.uk'
-        s3PathStyleAccess = true
-    }
-  }
-  
-    docker.enabled = true
-    fixOwnership = true
-    runOptions = "-u \$(id -u):\$(id -g)"
-
-    // define containers for each process
-    process {
-      k8s {
-       pullPolicy = "always"
-      }
-      withLabel:low_cpu {cpus = 2}
-      withLabel:normal_cpu { cpus = 8 }
-      withLabel:low_memory { memory = '5GB' }
-      withLabel:medium_memory { memory = '10GB' }
-      withLabel:high_memory { memory = '18GB' }
-
-      withLabel:preprocessing {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
-      }
-      
-      withLabel:tbprofiler {
-      container = "quay.io/pathogen-genomics-cymru//tb-profiler:latest"
-      }
-      
-      withLabel:getversion{
-       executor = "local"
-      }
-      
-      withLabel:afanc_parse{
-       executor = "local"
-      }
-      withName:downloadContamGenomes {
-        shell = ['/bin/bash','-u']
-        errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
-        maxRetries = 5
-      }
-
-      withLabel:retry_afanc {
-        shell = ['/bin/bash','-u']
-        errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' }
-        maxRetries = 5
-      }
-
-       withLabel:clockwork {
-        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8"
-      }
-
-      withLabel:vcfpredict {
-        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8"
-      }
-    }
-      params{
-          container_enabled = "true"
-          
-          bowtie2_index = "s3://microbial-bioin-sp3/bowtie_hg19"
-          bowtie_index_name = "hg19_1kgmaj"
-          kraken_db = "s3://microbial-bioin-sp3/kraken_pluspf_16gb/"
-          afanc_myco_db = "s3://microbial-bioin-sp3/Mycobacteriaciae_DB_7.0/"
+    climb {
+        includeConfig 'config/containers.config'
+        
+        //add in docker configs as the above config file is generic for any containerised run
+        docker.enabled = true
+        fixOwnership = true
+        runOptions = "-u \$(id -u):\$(id -g)"
+        
+        //params specific to paths on the climb system
+        params{   
+            bowtie2_index = "s3://microbial-bioin-sp3/bowtie_hg19"
+            bowtie_index_name = "hg19_1kgmaj"
+            kraken_db = "s3://microbial-bioin-sp3/kraken_pluspf_16gb/"
+            afanc_myco_db = "s3://microbial-bioin-sp3/Mycobacteriaciae_DB_7.0/"
           
-          resource_dir = "s3://microbial-bioin-sp3/lodestone_resources"
-          refseq = "${resource_dir}/assembly_summary_refseq.txt"        
+            resource_dir = "s3://microbial-bioin-sp3/lodestone_resources"
+            refseq = "${resource_dir}/assembly_summary_refseq.txt"        
       }
-  }
-  singularity {
-  
-    params{
-    resource_dir = "/resources"
-    container_enabled = "true"
     }
+    
+    singularity {
+        includeConfig 'config/containers.config'
 
-    singularity.enabled = 'true'
-    singularity.autoMounts = 'true'
-
-    // path to the singularity containers
-    singularity.cacheDir = "${baseDir}/singularity"
-
-    process {
-      withLabel:low_cpu {cpus = 2}
-      withLabel:normal_cpu { cpus = 8 }
-      
-      withLabel:low_memory { memory = '5GB' }
-      withLabel:medium_memory { memory = '10GB' }
-      withLabel:high_memory { memory = '18GB' }
-
-      withLabel:getversion {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
-      }
-
-      withLabel:preprocessing {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
-      }
-      
-      withLabel:tbprofiler {
-          container = "quay.io/pathogen-genomics-cymru//tb-profiler:latest"
-      }
-
-      withName:downloadContamGenomes {
-        shell = ['/bin/bash','-u']
-        errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
-        maxRetries = 5
-      }
-
-      withLabel:retryAfanc {
-        shell = ['/bin/bash','-u']
-        // Afanc sometimes fails curl in slurm, retry if so (error is masked as error status 1)
-        errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' }
-        maxRetries = 5
-      }
-
-      withLabel:clockwork {
-        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8r1"
-      }
-
-      withLabel:vcfpredict {
-        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8r1"
-      }
+        singularity.enabled = 'true'
+        singularity.autoMounts = 'true'
 
+        //path to the singularity containers
+        singularity.cacheDir = "${baseDir}/singularity"
     }
-  }
 
   sp3 {
-
+  
+    includeConfig 'config/containers.config'
+    
+    //add in singularity configs as the above config file is generic for any containerised run
     singularity.enabled = 'true'
     singularity.autoMounts = 'true'
-
     // path to the singularity containers
     singularity.cacheDir = "/data/images"
-    
-    params{
-    resource_dir = "/resources"
-    container_enabled = "true"
-    }
 
     process {
-      scratch = true
-      errorStrategy = 'ignore'
-      
-      withLabel:low_cpu {cpus = 2}
-      withLabel:normal_cpu { cpus = 8 }
-      withLabel:low_memory { memory = '5GB' }
-      withLabel:medium_memory { memory = '10GB' }
-      withLabel:high_memory { memory = '18GB' }
-
-      withLabel:getversion {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
-      }
-
-      withLabel:preprocessing {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
-      }
-      
-      withLabel:tbprofiler {
-        container = "quay.io/pathogen-genomics-cymru//tb-profiler:latest"
-      }
-
-      withName:downloadContamGenomes {
-        shell = ['/bin/bash','-u']
-        errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
-        maxRetries = 5
-      }
-
-      withLabel:retryAfanc {
-	shell = ['/bin/bash','-u']
-        errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' }
-        maxRetries = 5
-      }
-
-      withLabel:clockwork {
-        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8"
-      }
-
-      withLabel:vcfpredict {
-        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8r1"
-      }
+      scratch = true     
     }
-  }
+ }
 
 
   docker {
-
+    includeConfig 'config/containers.config'
+    
+    //add in docker configs as the above config file is generic for any containerised run
     docker.enabled = true
     fixOwnership = true
     runOptions = "-u \$(id -u):\$(id -g)"
-    
-    params{
-    container_enabled = "true"
-    resource_dir = "/resources"
-    }
-
-    // define containers for each process
-    process {
-      withLabel:low_cpu {cpus = 2}
-      withLabel:normal_cpu { cpus = 8 }
-      withLabel:low_memory { memory = '5GB' }
-      withLabel:medium_memory { memory = '10GB' }
-      withLabel:high_memory { memory = '18GB' }
-
-      withLabel:getversion {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
-      }
-
-      withLabel:preprocessing {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
-      }
-
-      withName:downloadContamGenomes {
-        shell = ['/bin/bash','-u']
-        errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
-        maxRetries = 5
-      }
-
-      withLabel:retryAfanc {
-	shell = ['/bin/bash','-u']
-        errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' }
-        maxRetries = 5
-      }
-
-       withLabel:clockwork {
-        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8"
-      }
-
-      withLabel:vcfpredict {
-        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8"
-      }
-     
-      withLabel:tbprofiler {
-        container = "quay.io/pathogen-genomics-cymru//tb-profiler:latest"
-      }
-    }
   }
 }

From 3d266f5cba666ac874bda12d46b8b8ee175a7088 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 10 Jan 2024 17:28:38 +0000
Subject: [PATCH 21/44] config include to avoid repitition

---
 config/containers.config | 11 +++++------
 nextflow.config          | 10 ++++++++++
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/config/containers.config b/config/containers.config
index ae30606..c1bfc80 100644
--- a/config/containers.config
+++ b/config/containers.config
@@ -1,6 +1,5 @@
 params{
     container_enabled = "true"
-    pipeline_version = 0.9.8
     container_enabled = "true"
     resource_dir = "/resources"
 }
@@ -18,15 +17,15 @@ process {
     withLabel:high_memory { memory = '18GB' }
 
     withLabel:getversion {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:${pipeline_version}"
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
     }
 
     withLabel:preprocessing {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:${pipeline_version}"
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
     }
       
     withLabel:tbprofiler {
-        container = "quay.io/pathogen-genomics-cymru//tb-profiler:${pipeline_version}"
+        container = "quay.io/pathogen-genomics-cymru/tbprofiler:0.9.8"
     }
 
     withName:downloadContamGenomes {
@@ -42,10 +41,10 @@ process {
     }
 
     withLabel:clockwork {
-        container = "quay.io/pathogen-genomics-cymru/clockwork:${pipeline_version}"
+        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8"
     }
 
     withLabel:vcfpredict {
-        container = "quay.io/pathogen-genomics-cymru/vcfpredict:${pipeline_version}"
+        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8"
     }
  }
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 4747852..c8b15d1 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -61,6 +61,16 @@ profiles {
         fixOwnership = true
         runOptions = "-u \$(id -u):\$(id -g)"
         
+        withLabel:getversion{
+            executor = "local"
+            container = null
+        }
+      
+        withLabel:afanc_parse{
+           executor = "local"
+           container = null
+        }
+        
         //params specific to paths on the climb system
         params{   
             bowtie2_index = "s3://microbial-bioin-sp3/bowtie_hg19"

From 84f137dc85b9c293dd81e1923821ad2552c710a3 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 11 Jan 2024 10:16:52 +0000
Subject: [PATCH 22/44] gatk4 to tb-profiler container

---
 docker/Dockerfile.tbprofiler-0.9.8 | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/Dockerfile.tbprofiler-0.9.8 b/docker/Dockerfile.tbprofiler-0.9.8
index 468d706..b7f6d80 100644
--- a/docker/Dockerfile.tbprofiler-0.9.8
+++ b/docker/Dockerfile.tbprofiler-0.9.8
@@ -36,6 +36,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 # install tb-profiler via bioconda; install into 'base' conda env
 RUN micromamba install --yes --name base --channel conda-forge --channel bioconda  \
     tb-profiler=${TBPROFILER_VER} && \
+    micromamba install -c bioconda -c conda-forge gatk4 && \
     micromamba clean --all --yes
 
 # hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time

From f013273053fa224f7f3e7e118bcb4e1780c5445b Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 11 Jan 2024 10:31:13 +0000
Subject: [PATCH 23/44] tb-profiler docker container gatk

---
 docker/Dockerfile.tbprofiler-0.9.8 | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docker/Dockerfile.tbprofiler-0.9.8 b/docker/Dockerfile.tbprofiler-0.9.8
index b7f6d80..b1f8b9a 100644
--- a/docker/Dockerfile.tbprofiler-0.9.8
+++ b/docker/Dockerfile.tbprofiler-0.9.8
@@ -35,9 +35,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 
 # install tb-profiler via bioconda; install into 'base' conda env
 RUN micromamba install --yes --name base --channel conda-forge --channel bioconda  \
-    tb-profiler=${TBPROFILER_VER} && \
-    micromamba install -c bioconda -c conda-forge gatk4 && \
-    micromamba clean --all --yes
+    tb-profiler=${TBPROFILER_VER}
+
+RUN micromamba install -c bioconda -c conda-forge gatk4 
+RUN micromamba clean --all --yes
 
 # hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
 ENV PATH="/opt/conda/bin:${PATH}"

From 616ef139464b695970f4182257d56610fb2d25e6 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 11 Jan 2024 10:35:41 +0000
Subject: [PATCH 24/44] tb-profiler docker container gatk

---
 docker/Dockerfile.tbprofiler-0.9.8 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile.tbprofiler-0.9.8 b/docker/Dockerfile.tbprofiler-0.9.8
index b1f8b9a..5575ee4 100644
--- a/docker/Dockerfile.tbprofiler-0.9.8
+++ b/docker/Dockerfile.tbprofiler-0.9.8
@@ -37,7 +37,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 RUN micromamba install --yes --name base --channel conda-forge --channel bioconda  \
     tb-profiler=${TBPROFILER_VER}
 
-RUN micromamba install -c bioconda -c conda-forge gatk4 
+RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 
 RUN micromamba clean --all --yes
 
 # hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time

From c7fd33c19657b5e74c1657e8aa9f3181196b6b7c Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 11 Jan 2024 10:51:54 +0000
Subject: [PATCH 25/44] allelic depth

---
 modules/vcfpredictModules.nf | 26 ++++++++++++++++++++++++++
 workflows/vcfpredict.nf      | 16 ++++++++++++++--
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf
index 776ff0a..98eb7aa 100644
--- a/modules/vcfpredictModules.nf
+++ b/modules/vcfpredictModules.nf
@@ -70,6 +70,7 @@ process tbprofiler {
     input:
     val(sample_name)
     path(minos_vcf)
+    val(isSampleTB)
 
     output:
     path("results/tbprofiler.results.json")
@@ -84,6 +85,31 @@ process tbprofiler {
     """
 }
 
+process add_allelic_depth {
+    label 'low_memory'
+    label 'low_cpu'
+    label 'tbprofiler'
+    
+    input:
+    val(sample_name)
+    path(minos_vcf)
+    path(reference)
+    val(isSampleTB)
+    
+    output:
+    path("${sample_name}_allelic_depth.minos.vcf")
+
+    when:
+    isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/
+    
+    script:
+    """
+    samtools faidx $reference
+    gatk VariantAnnotator -R $reference -V $minos_vcf -A DepthPerAlleleBySample -O ${sample_name}_allelic_depth.minos.vcf
+    """
+    
+}
+
 process gnomonicus {
 
     tag {sample_name}
diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf
index 2006e73..b932325 100644
--- a/workflows/vcfpredict.nf
+++ b/workflows/vcfpredict.nf
@@ -5,6 +5,7 @@ nextflow.enable.dsl = 2
 include {vcfmix} from '../modules/vcfpredictModules.nf' params(params)
 include {tbprofiler} from '../modules/vcfpredictModules.nf' params(params)
 include {tbprofiler_update_db} from '../modules/vcfpredictModules.nf' params(params)
+include {add_allelic_depth} from '../modules/vcfpredictModules.nf' params(params) 
 
 // define workflow component
 workflow vcfpredict {
@@ -13,6 +14,7 @@ workflow vcfpredict {
       clockwork_bcftools_tuple
       minos_vcf_tuple
       reference_fasta
+      
 
     main:
 
@@ -24,10 +26,20 @@ workflow vcfpredict {
 
       if ( params.resistance_profiler == "tb-profiler"){
         //get just the vcf
-        minos_vcf = minos_vcf_tuple.map{it[1]}
         sample_name = minos_vcf_tuple.map{it[0]}
+        minos_vcf = minos_vcf_tuple.map{it[1]}
+        do_we_resistance_profile = minos_vcf_tuple.map{it[2]}
 
+        if (params.update_tbprofiler == "yes"){
         tbprofiler_update_db(reference_fasta)
-        tbprofiler(sample_name, minos_vcf)
+        }
+        
+        //add allelic depth back in: was calculated in mpileup but lost in minos
+        add_allelic_depth(sample_name, minos_vcf, reference_fasta, do_we_resistance_profile)
+        tbprofiler(sample_name, add_allelic_depth,out, do_we_resistance_profile)
+      }
+      
+      if (params.vcfmix == "yes" && params.resistance_profiler != "none"){
+          //finalJson(vcfmix.out.vcfmix_json.join(gnomonicus.out.gnomon_json, by: 0))
       }
 }

From 77e234a5f958d4b06a4699462a83bea1a6ef916e Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 11 Jan 2024 11:07:32 +0000
Subject: [PATCH 26/44] deal with json of tbprofiler

---
 docker/Dockerfile.tbprofiler-0.9.8 |  1 +
 modules/vcfpredictModules.nf       | 16 +++++++++++++++-
 workflows/vcfpredict.nf            |  5 +++--
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/docker/Dockerfile.tbprofiler-0.9.8 b/docker/Dockerfile.tbprofiler-0.9.8
index 5575ee4..1947e3b 100644
--- a/docker/Dockerfile.tbprofiler-0.9.8
+++ b/docker/Dockerfile.tbprofiler-0.9.8
@@ -38,6 +38,7 @@ RUN micromamba install --yes --name base --channel conda-forge --channel biocond
     tb-profiler=${TBPROFILER_VER}
 
 RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 
+RUN micromamba install --yes --name base --channel conda-forge --channel bioconda samtools 
 RUN micromamba clean --all --yes
 
 # hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf
index 98eb7aa..574dbe9 100644
--- a/modules/vcfpredictModules.nf
+++ b/modules/vcfpredictModules.nf
@@ -66,6 +66,9 @@ process tbprofiler {
     label 'medium_memory'
     label 'medium_cpu'
     label 'tbprofiler'
+    
+    publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.tbprofiler-out.json', overwrite: 'true'
+    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
 
     input:
     val(sample_name)
@@ -73,15 +76,25 @@ process tbprofiler {
     val(isSampleTB)
 
     output:
-    path("results/tbprofiler.results.json")
+    tuple val(sample_name), path("${sample_name}.tbprofiler-out.json"), path("${sample_name}_report.json"), emit: tbprofiler_json
 
     when:
     isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/
 
     script:
+    error_log = "${sample_name}_err.json"
+    tbprofiler_json = "${sample_name}.tbprofiler-out.json"
+    
     """
     bgzip ${minos_vcf}
     tb-profiler profile --vcf ${minos_vcf}.gz --threads ${task.cpus}
+    mv results/tbprofiler.results.json ${tbprofiler_json}
+    
+    cp ${sample_name}_report.json ${sample_name}_report_previous.json
+
+    echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log}
+
+    jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json  ${tbprofiler_json} > ${report_json}
     """
 }
 
@@ -94,6 +107,7 @@ process add_allelic_depth {
     val(sample_name)
     path(minos_vcf)
     path(reference)
+    path(report_json)
     val(isSampleTB)
     
     output:
diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf
index b932325..2896984 100644
--- a/workflows/vcfpredict.nf
+++ b/workflows/vcfpredict.nf
@@ -29,6 +29,7 @@ workflow vcfpredict {
         sample_name = minos_vcf_tuple.map{it[0]}
         minos_vcf = minos_vcf_tuple.map{it[1]}
         do_we_resistance_profile = minos_vcf_tuple.map{it[2]}
+        report_json  = minos_vcf_tuple.map{it[3]}
 
         if (params.update_tbprofiler == "yes"){
         tbprofiler_update_db(reference_fasta)
@@ -36,10 +37,10 @@ workflow vcfpredict {
         
         //add allelic depth back in: was calculated in mpileup but lost in minos
         add_allelic_depth(sample_name, minos_vcf, reference_fasta, do_we_resistance_profile)
-        tbprofiler(sample_name, add_allelic_depth,out, do_we_resistance_profile)
+        tbprofiler(sample_name, add_allelic_depth,out, report_json, do_we_resistance_profile)
       }
       
       if (params.vcfmix == "yes" && params.resistance_profiler != "none"){
-          //finalJson(vcfmix.out.vcfmix_json.join(gnomonicus.out.gnomon_json, by: 0))
+          finalJson(vcfmix.out.vcfmix_json.join(gnomonicus.out.tbprofiler_json, by: 0))
       }
 }

From 5a760f1469e3973e8189ce3c089c6f83544b0ce8 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 11 Jan 2024 11:21:48 +0000
Subject: [PATCH 27/44] tidy up

---
 modules/vcfpredictModules.nf | 2 +-
 workflows/vcfpredict.nf      | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf
index 574dbe9..551788d 100644
--- a/modules/vcfpredictModules.nf
+++ b/modules/vcfpredictModules.nf
@@ -73,6 +73,7 @@ process tbprofiler {
     input:
     val(sample_name)
     path(minos_vcf)
+    path(report_json)
     val(isSampleTB)
 
     output:
@@ -107,7 +108,6 @@ process add_allelic_depth {
     val(sample_name)
     path(minos_vcf)
     path(reference)
-    path(report_json)
     val(isSampleTB)
     
     output:
diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf
index 2896984..375e410 100644
--- a/workflows/vcfpredict.nf
+++ b/workflows/vcfpredict.nf
@@ -6,6 +6,7 @@ include {vcfmix} from '../modules/vcfpredictModules.nf' params(params)
 include {tbprofiler} from '../modules/vcfpredictModules.nf' params(params)
 include {tbprofiler_update_db} from '../modules/vcfpredictModules.nf' params(params)
 include {add_allelic_depth} from '../modules/vcfpredictModules.nf' params(params) 
+include {finalJson} from '../modules/vcfpredictModules.nf' params(params) 
 
 // define workflow component
 workflow vcfpredict {
@@ -37,10 +38,10 @@ workflow vcfpredict {
         
         //add allelic depth back in: was calculated in mpileup but lost in minos
         add_allelic_depth(sample_name, minos_vcf, reference_fasta, do_we_resistance_profile)
-        tbprofiler(sample_name, add_allelic_depth,out, report_json, do_we_resistance_profile)
+        tbprofiler(sample_name, add_allelic_depth.out, report_json, do_we_resistance_profile)
       }
       
       if (params.vcfmix == "yes" && params.resistance_profiler != "none"){
-          finalJson(vcfmix.out.vcfmix_json.join(gnomonicus.out.tbprofiler_json, by: 0))
+          finalJson(vcfmix.out.vcfmix_json.join(tbprofiler.out.tbprofiler_json, by: 0))
       }
 }

From 255500fba15e47bb5d8365da1d22460553b8dc20 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 11 Jan 2024 15:18:47 +0000
Subject: [PATCH 28/44] add jq to tbprofiler docker

---
 docker/Dockerfile.tbprofiler-0.9.8 | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/Dockerfile.tbprofiler-0.9.8 b/docker/Dockerfile.tbprofiler-0.9.8
index 1947e3b..686c9c4 100644
--- a/docker/Dockerfile.tbprofiler-0.9.8
+++ b/docker/Dockerfile.tbprofiler-0.9.8
@@ -39,6 +39,7 @@ RUN micromamba install --yes --name base --channel conda-forge --channel biocond
 
 RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4 
 RUN micromamba install --yes --name base --channel conda-forge --channel bioconda samtools 
+RUN micromamba install --yes --name base --channel conda-forge jq
 RUN micromamba clean --all --yes
 
 # hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time

From 658f7bd58ce50cd7390f70de7e21833571b79c73 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 11 Jan 2024 15:24:56 +0000
Subject: [PATCH 29/44] remove error ignore

---
 config/containers.config | 1 -
 1 file changed, 1 deletion(-)

diff --git a/config/containers.config b/config/containers.config
index c1bfc80..dece260 100644
--- a/config/containers.config
+++ b/config/containers.config
@@ -6,7 +6,6 @@ params{
 
 
 process {
-    errorStrategy = 'ignore'
     update_tbprofiler = "false"
       
       

From 930098083249c57f5e7a15bb1b79c33627efc63e Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Fri, 12 Jan 2024 11:38:02 +0000
Subject: [PATCH 30/44] update workflow vcfpredict

---
 .../containers-checkpoint.config              |  49 +++
 main.nf                                       |   2 +-
 .../clockworkModules-checkpoint.nf            | 313 ++++++++++++++++++
 .../vcfpredictModules-checkpoint.nf           | 216 ++++++++++++
 modules/vcfpredictModules.nf                  |   1 +
 .../clockwork-checkpoint.nf                   |  44 +++
 .../vcfpredict-checkpoint.nf                  |  47 +++
 7 files changed, 671 insertions(+), 1 deletion(-)
 create mode 100644 config/.ipynb_checkpoints/containers-checkpoint.config
 create mode 100644 modules/.ipynb_checkpoints/clockworkModules-checkpoint.nf
 create mode 100644 modules/.ipynb_checkpoints/vcfpredictModules-checkpoint.nf
 create mode 100644 workflows/.ipynb_checkpoints/clockwork-checkpoint.nf
 create mode 100644 workflows/.ipynb_checkpoints/vcfpredict-checkpoint.nf

diff --git a/config/.ipynb_checkpoints/containers-checkpoint.config b/config/.ipynb_checkpoints/containers-checkpoint.config
new file mode 100644
index 0000000..dece260
--- /dev/null
+++ b/config/.ipynb_checkpoints/containers-checkpoint.config
@@ -0,0 +1,49 @@
+params{
+    container_enabled = "true"
+    container_enabled = "true"
+    resource_dir = "/resources"
+}
+
+
+process {
+    update_tbprofiler = "false"
+      
+      
+    withLabel:low_cpu {cpus = 2}
+    withLabel:normal_cpu { cpus = 8 }
+    withLabel:low_memory { memory = '5GB' }
+    withLabel:medium_memory { memory = '10GB' }
+    withLabel:high_memory { memory = '18GB' }
+
+    withLabel:getversion {
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
+    }
+
+    withLabel:preprocessing {
+        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
+    }
+      
+    withLabel:tbprofiler {
+        container = "quay.io/pathogen-genomics-cymru/tbprofiler:0.9.8"
+    }
+
+    withName:downloadContamGenomes {
+        shell = ['/bin/bash','-u']
+        errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
+        maxRetries = 5
+   }
+
+    withLabel:retryAfanc {
+	    shell = ['/bin/bash','-u']
+        errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' }
+        maxRetries = 5
+    }
+
+    withLabel:clockwork {
+        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8"
+    }
+
+    withLabel:vcfpredict {
+        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8"
+    }
+ }
\ No newline at end of file
diff --git a/main.nf b/main.nf
index 0a9cbdf..2f38ef6 100644
--- a/main.nf
+++ b/main.nf
@@ -214,7 +214,7 @@ workflow {
       minos_vcf = clockwork.out.minos_vcf
       reference = clockwork.out.reference
 
-      vcfpredict(mpileup_vcf, minos_vcf, reference, resistance_profiler, update_tbprofiler)
+      vcfpredict(mpileup_vcf, minos_vcf, reference)
 
 }
 
diff --git a/modules/.ipynb_checkpoints/clockworkModules-checkpoint.nf b/modules/.ipynb_checkpoints/clockworkModules-checkpoint.nf
new file mode 100644
index 0000000..d793a34
--- /dev/null
+++ b/modules/.ipynb_checkpoints/clockworkModules-checkpoint.nf
@@ -0,0 +1,313 @@
+// modules for the clockwork workflow
+
+process getRefFromJSON {
+    tag { sample_name }
+    label 'clockwork'
+    label 'low_memory'
+    label 'low_cpu'
+    
+    input:
+    path(species_json)
+    val(do_we_align)
+    val(sample_name)
+    
+    when:
+    do_we_align =~ /NOW\_ALIGN\_TO\_REF\_${sample_name}/
+    
+    output:
+    stdout
+    
+    script:
+    """
+    ref_string=\$(jq -r '.top_hit.file_paths.ref_fa' ${species_json})
+    echo "\$ref_string"
+    """
+    
+    
+}
+
+process alignToRef {
+    /**
+    * @QCcheckpoint fail if insufficient number and/or quality of read alignments to the reference genome
+    */
+
+    tag { sample_name }
+    label 'clockwork'
+    label 'normal_cpu'
+    label 'medium_memory'
+
+    publishDir "${params.output_dir}/$sample_name/output_bam", mode: 'copy', overwrite: 'true', pattern: '*{.bam,.bam.bai,_alignmentStats.json}'
+    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
+
+    input:
+    tuple val(sample_name), path(fq1), path(fq2), path(software_json), path(species_json), val(doWeAlign)
+    path(reference_path)
+
+    when:
+    doWeAlign =~ /NOW\_ALIGN\_TO\_REF\_${sample_name}/
+
+    output:
+    tuple val(sample_name), path("${sample_name}_report.json"), path("${sample_name}.bam"), path("${sample_name}.fa"), stdout, emit: alignToRef_bam
+    path("${sample_name}.bam.bai", emit: alignToRef_bai)
+    path("${sample_name}_alignmentStats.json", emit: alignToRef_json)
+    path "${sample_name}_err.json", emit: alignToRef_log optional true
+    tuple val(sample_name), path("${sample_name}_report.json"), emit: alignToRef_report
+
+    script:
+    bam = "${sample_name}.bam"
+    bai = "${sample_name}.bam.bai"
+    stats = "${sample_name}.stats"
+    stats_json = "${sample_name}_alignmentStats.json"
+    report_json = "${sample_name}_report.json"
+    error_log = "${sample_name}_err.json"
+
+    """
+    echo $reference_path
+    cp ${reference_path} ${sample_name}.fa
+
+    minimap2 -ax sr ${sample_name}.fa -t ${task.cpus} $fq1 $fq2 | samtools fixmate -m - - | samtools sort -T tmp - | samtools markdup --reference ${sample_name}.fa - minimap.bam
+
+    java -jar /usr/local/bin/picard.jar AddOrReplaceReadGroups INPUT=minimap.bam OUTPUT=${bam} RGID=${sample_name} RGLB=lib RGPL=Illumina RGPU=unit RGSM=sample
+
+    samtools index ${bam} ${bai}
+    samtools stats ${bam} > ${stats}
+
+    parse_samtools_stats.py ${bam} ${stats} > ${stats_json}
+    create_final_json.py ${stats_json} ${species_json}
+
+    cp ${sample_name}_report.json ${sample_name}_report_previous.json
+
+    jq -s ".[0] * .[1]" ${software_json} ${sample_name}_report_previous.json > ${report_json}
+
+    continue=\$(jq -r '.summary_questions.continue_to_clockwork' ${report_json})
+
+    if [ \$continue == 'yes' ]; then printf "NOW_VARCALL_${sample_name}"; elif [ \$continue == 'no' ]; then echo '{"error":"insufficient number and/or quality of read alignments to the reference genome"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
+    """
+
+    stub:
+    bam = "${sample_name}.bam"
+    bai = "${sample_name}.bam.bai"
+    stats = "${sample_name}.stats"
+    stats_json = "${sample_name}_alignmentStats.json"
+    out_json = "${sample_name}_report.json"
+    error_log = "${sample_name}_err.json"
+
+    """
+    touch ${sample_name}.fa
+    touch ${bam}
+    touch ${bai}
+    touch ${stats}
+    touch ${stats_json}
+    touch ${out_json}
+    touch ${error_log}
+    printf ${params.alignToRef_doWeVarCall}
+    """
+}
+
+process callVarsMpileup {
+    /**
+    * @QCcheckpoint none
+    */
+
+    tag { sample_name }
+    label 'clockwork'
+    label 'normal_cpu'
+    label 'low_memory'
+
+    publishDir "${params.output_dir}/$sample_name/output_vcfs", mode: 'copy', pattern: '*.vcf'
+
+    input:
+    tuple val(sample_name), path(report_json), path(bam), path(ref), val(doWeVarCall)
+
+    when:
+    doWeVarCall =~ /NOW\_VARCALL\_${sample_name}/
+
+    output:
+    tuple val(sample_name), path("${sample_name}.bcftools.vcf"), emit: mpileup_vcf
+
+    script:
+    bcftools_vcf = "${sample_name}.bcftools.vcf"
+
+    """
+    bcftools mpileup -Ou -a 'INFO/AD' -f ${ref} ${bam} | bcftools call --threads ${task.cpus} -vm -O v -o ${bcftools_vcf}
+    """
+
+    stub:
+    bcftools_vcf = "${sample_name}.bcftools.vcf"
+
+    """
+    touch ${bcftools_vcf}
+    """
+}
+
+process getRefCortex {
+    tag { sample_name }
+    label 'clockwork'
+    label 'low_memory'
+    label 'low_cpu'
+    
+    input:
+    tuple val(sample_name), path(report_json), path(bam), path(ref), val(doWeVarCall)
+
+    when:
+    doWeVarCall =~ /NOW\_VARCALL\_${sample_name}/
+    
+    output:
+    stdout
+    
+    script:
+    """
+    ref_dir=\$(jq -r '.top_hit.file_paths.clockwork_ref_dir' ${report_json})
+    echo "\$ref_dir"
+    """
+    
+    
+}
+
+process callVarsCortex {
+    /**
+    * @QCcheckpoint none
+    */
+
+    tag { sample_name }
+    label 'clockwork'
+    label 'normal_cpu'
+    label 'medium_memory'
+
+    publishDir "${params.output_dir}/$sample_name/output_vcfs", mode: 'copy', pattern: '*.vcf'
+    
+    input:
+    tuple val(sample_name), path(report_json), path(bam), path(ref), val(doWeVarCall)
+    path(ref_dir)
+
+    when:
+    doWeVarCall =~ /NOW\_VARCALL\_${sample_name}/
+
+    output:
+    tuple val(sample_name), path("${sample_name}.cortex.vcf"), emit: cortex_vcf
+
+    script:
+    cortex_vcf = "${sample_name}.cortex.vcf"
+
+    """
+    cp -r ${ref_dir}/* .
+
+    clockwork cortex . ${bam} cortex ${sample_name}
+    cp cortex/cortex.out/vcfs/cortex_wk_flow_I_RefCC_FINALcombined_BC_calls_at_all_k.raw.vcf ${cortex_vcf}
+    """
+
+    stub:
+    cortex_vcf = "${sample_name}.cortex.vcf"
+
+    """
+    touch ${cortex_vcf}
+    """
+}
+
+process minos {
+    /**
+    * @QCcheckpoint check if top species is TB, if yes pass vcf to resistance profiling
+    */
+
+    tag { sample_name }
+    label 'clockwork'
+    label 'medium_memory'
+    label 'normal_cpu'
+
+    publishDir "${params.output_dir}/$sample_name/output_vcfs", mode: 'copy', pattern: '*.vcf'
+    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
+
+    input:
+    tuple val(sample_name), path(report_json), path(bam), path(ref), val(doWeVarCall), path(cortex_vcf), path(bcftools_vcf)
+
+    output:
+    tuple val(sample_name), path(report_json), path(bam), path(ref), emit: minos_bam
+    tuple val(sample_name), path("${sample_name}.minos.vcf"), stdout, emit: minos_vcf
+    tuple val(sample_name), path("${sample_name}_report.json"), emit: minos_report
+    path "${sample_name}_err.json", emit: minos_log optional true
+
+    script:
+    minos_vcf = "${sample_name}.minos.vcf"
+    error_log = "${sample_name}_err.json"
+
+    """
+    awk '{print \$1}' ${ref} > ref.fa
+
+    minos adjudicate --force --reads ${bam} minos ref.fa ${bcftools_vcf} ${cortex_vcf}
+    cp minos/final.vcf ${minos_vcf}
+    rm -rf minos
+
+    top_hit=\$(jq -r '.top_hit.name' ${report_json})
+
+    cp ${sample_name}_report.json ${sample_name}_report_previous.json
+
+    if [[ \$top_hit =~ ^"Mycobacterium tuberculosis" ]]; then printf "CREATE_ANTIBIOGRAM_${sample_name}"; else echo '{"resistance-profiling-warning":"sample is not TB so cannot produce antibiogram using resistance profiling tools"}' | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
+    """
+
+    stub:
+    minos_vcf = "${sample_name}.minos.vcf"
+    error_log = "${sample_name}_err.json"
+
+    """
+    touch ${minos_vcf}
+    touch ${error_log}
+    printf ${params.minos_isSampleTB}
+    """
+}
+
+process gvcf {
+    /**
+    * @QCcheckpoint none
+    */
+
+    tag { sample_name }
+    label 'clockwork'
+    label 'normal_cpu'
+    label 'low_memory'
+
+    publishDir "${params.output_dir}/$sample_name/output_fasta", mode: 'copy', pattern: '*.fa'
+    publishDir "${params.output_dir}/$sample_name/output_vcfs", mode: 'copy', pattern: '*.vcf.gz'
+    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
+
+    input:
+    tuple val(sample_name), path(report_json), path(bam), path(ref), val(doWeValCall), path(minos_vcf), val(isSampleTB)
+
+    output:
+    path("${sample_name}.gvcf.vcf.gz", emit: gvcf)
+    path("${sample_name}.fa", emit: gvcf_fa)
+    path "${sample_name}_err.json", emit: gvcf_log optional true
+    path "${sample_name}_report.json", emit: gvcf_report optional true
+
+    script:
+    gvcf = "${sample_name}.gvcf.vcf"
+    gvcf_fa = "${sample_name}.fa"
+    error_log = "${sample_name}_err.json"
+
+    """
+    awk '{print \$1}' ${ref} > ref.fa
+
+    samtools mpileup -ugf ref.fa ${bam} | bcftools call --threads ${task.cpus} -m -O v -o samtools_all_pos.vcf
+
+    clockwork gvcf_from_minos_and_samtools ref.fa ${minos_vcf} samtools_all_pos.vcf ${gvcf}
+    clockwork gvcf_to_fasta ${gvcf} ${gvcf_fa}
+
+    rm samtools_all_pos.vcf
+    gzip ${gvcf}
+
+    cp ${sample_name}_report.json ${sample_name}_report_previous.json
+
+    if [ ${params.vcfmix} == "no" ] && [ ${params.resistance_profiler} == "none" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
+    """
+
+    stub:
+    gvcf = "${sample_name}.gvcf.vcf.gz"
+    gvcf_fa = "${sample_name}.fa"
+    error_log = "${sample_name}_err.json"
+
+    """
+    touch ${gvcf}
+    touch ${gvcf_fa}
+    touch ${error_log}
+    """
+}
+
diff --git a/modules/.ipynb_checkpoints/vcfpredictModules-checkpoint.nf b/modules/.ipynb_checkpoints/vcfpredictModules-checkpoint.nf
new file mode 100644
index 0000000..4bc7957
--- /dev/null
+++ b/modules/.ipynb_checkpoints/vcfpredictModules-checkpoint.nf
@@ -0,0 +1,216 @@
+// modules for the vcfpredict workflow
+
+process vcfmix {
+
+    tag {sample_name}
+    label 'vcfpredict'
+    label 'low_memory'
+    label 'low_cpu'
+
+    errorStrategy 'ignore'
+
+    publishDir "${params.output_dir}/${sample_name}/output_vcfs", mode: 'copy', pattern: '*_f-stats.json', overwrite: 'true'
+    publishDir "${params.output_dir}/${sample_name}/output_vcfs", mode: 'copy', pattern: '*.csv', overwrite: 'true'
+    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
+
+    input:
+    tuple val(sample_name), path(vcf), path(report_json)
+
+    output:
+    tuple val(sample_name), path("${sample_name}_f-stats.json"), emit: vcfmix_json
+    tuple val(sample_name), path("${sample_name}_f-stats.json"), path("${sample_name}_vcfmix-regions.csv"), emit: vcfmix_json_csv
+    path "${sample_name}_err.json", emit: vcfmix_log optional true
+    path ("${sample_name}_report.json", emit: vcfmix_report)
+
+    script:
+    bcftools_vcf = "${sample_name}.bcftools.vcf"
+    error_log = "${sample_name}_err.json"
+
+    """
+    run-vcfmix.py ${bcftools_vcf}
+
+    cp ${sample_name}_report.json ${sample_name}_report_previous.json
+
+    jq -s ".[0] * .[1]" ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}
+
+    if [ ${params.resistance_profiler} == "none" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}; fi
+    """
+
+    stub:
+    vcfmix_json = "${sample_name}_f-stats.json"
+    vcfmix_csv = "${sample_name}_vcfmix-regions.csv"
+    error_log = "${sample_name}_err.json"
+
+    """
+    touch ${vcfmix_json}
+    touch ${vcfmix_csv}
+    touch ${error_log}
+    """
+}
+
+process tbprofiler_update_db {
+    label 'low_memory'
+    label 'low_cpu'
+    label 'tbprofiler'
+
+    input:
+    path(reference)
+
+    script:
+    """
+    tb-profiler update_tbdb --match_ref $reference
+    """
+}
+
+process tbprofiler {
+    label 'medium_memory'
+    label 'medium_cpu'
+    label 'tbprofiler'
+    
+    publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.tbprofiler-out.json', overwrite: 'true'
+    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
+
+    input:
+    val(sample_name)
+    path(minos_vcf)
+    path(report_json)
+    val(isSampleTB)
+
+    output:
+    tuple val(sample_name), path("${sample_name}.tbprofiler-out.json"), path("${sample_name}_report.json"), emit: tbprofiler_json
+
+    when:
+    isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/
+
+    script:
+    error_log = "${sample_name}_err.json"
+    tbprofiler_json = "${sample_name}.tbprofiler-out.json"
+    
+    """
+    bgzip ${minos_vcf}
+    tb-profiler profile --vcf ${minos_vcf}.gz --threads ${task.cpus}
+    mv results/tbprofiler.results.json ${tbprofiler_json}
+    
+    cp ${sample_name}_report.json ${sample_name}_report_previous.json
+
+    echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log}
+
+    jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json  ${tbprofiler_json} > ${report_json}
+    """
+}
+
+process add_allelic_depth {
+    label 'low_memory'
+    label 'low_cpu'
+    label 'tbprofiler'
+    
+    input:
+    val(sample_name)
+    path(minos_vcf)
+    path(reference)
+    val(isSampleTB)
+    
+    output:
+    path("${sample_name}_allelic_depth.minos.vcf")
+
+    when:
+    isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/
+    
+    script:
+    """
+    samtools faidx $reference
+    samtools dict $reference -o ${reference.baseName}.dict
+    gatk VariantAnnotator -R $reference -V $minos_vcf -A DepthPerAlleleBySample -O ${sample_name}_allelic_depth.minos.vcf
+    """
+    
+}
+
+process gnomonicus {
+
+    tag {sample_name}
+    label 'vcfpredict'
+    label 'low_memory'
+    label 'low_cpu'
+
+    errorStrategy 'ignore'
+
+    publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.gnomonicus-out.json', overwrite: 'true'
+    publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.csv', overwrite: 'true'
+    publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.fasta', overwrite: 'true'
+    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
+
+    input:
+    tuple val(sample_name), path(vcf), val(isSampleTB), path(report_json)
+    path(genbank)
+    when:
+    isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/
+
+    output:
+    tuple val(sample_name), path("${sample_name}.gnomonicus-out.json"), path("${sample_name}_report.json"), emit: gnomon_json
+    tuple val(sample_name), path("${sample_name}.effects.csv"), path("${sample_name}.mutations.csv"), emit: gnomon_csv optional true
+    tuple val(sample_name), path("*-fixed.fasta"), emit: gnomon_fasta
+    path("${sample_name}_err.json", emit: gnomon_log)
+    path ("${sample_name}_report.json", emit: gnomon_report)
+
+    script:
+    minos_vcf = "${sample_name}.minos.vcf"
+    error_log = "${sample_name}_err.json"
+
+    """
+    gnomonicus --genome_object ${genbank} --catalogue ${params.amr_cat} --vcf_file ${minos_vcf} --output_dir . --json --fasta fixed
+
+    cp ${sample_name}_report.json ${sample_name}_report_previous.json
+
+    echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log}
+
+    jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${sample_name}.gnomonicus-out.json > ${report_json}
+    """
+
+    stub:
+    gnomonicus_json = "${sample_name}.gnomonicus-out.json"
+    gnomonicus_fasta = "${sample_name}-fixed.fasta"
+    gnomonicus_effects = "${sample_name}.effects.csv"
+    gnomonicus_mutations = "${sample_name}.mutations.csv"
+    error_log = "${sample_name}_err.json"
+
+    """
+    touch ${gnomonicus_json}
+    touch ${gnomonicus_fasta}
+    touch ${gnomonicus_effects}
+    touch ${gnomonicus_mutations}
+    touch ${error_log}
+    """
+}
+
+process finalJson {
+
+    tag {sample_name}
+    label 'vcfpredict'
+    label 'low_memory'
+    label 'low_cpu'
+    
+    errorStrategy 'ignore'
+
+    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*_report.json'
+
+    input:
+    tuple val(sample_name), path(vcfmix_json), path(gnomon_json), path(report_json)
+
+    output:
+    tuple val(sample_name), path("${sample_name}_report.json"), emit: final_json
+
+    script:
+    """
+    cp ${sample_name}_report.json ${sample_name}_report_previous.json
+
+    jq -s ".[0] * .[1]" ${sample_name}_report_previous.json ${vcfmix_json} > ${report_json}
+    """
+
+    stub:
+    report_json = "${sample_name}_report.json"
+
+    """
+    touch ${report_json}
+    """
+
+}
diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf
index 551788d..4bc7957 100644
--- a/modules/vcfpredictModules.nf
+++ b/modules/vcfpredictModules.nf
@@ -119,6 +119,7 @@ process add_allelic_depth {
     script:
     """
     samtools faidx $reference
+    samtools dict $reference -o ${reference.baseName}.dict
     gatk VariantAnnotator -R $reference -V $minos_vcf -A DepthPerAlleleBySample -O ${sample_name}_allelic_depth.minos.vcf
     """
     
diff --git a/workflows/.ipynb_checkpoints/clockwork-checkpoint.nf b/workflows/.ipynb_checkpoints/clockwork-checkpoint.nf
new file mode 100644
index 0000000..bf6f5e1
--- /dev/null
+++ b/workflows/.ipynb_checkpoints/clockwork-checkpoint.nf
@@ -0,0 +1,44 @@
+// enable dsl2
+nextflow.enable.dsl = 2
+
+// import modules
+include {alignToRef} from '../modules/clockworkModules.nf' params(params)
+include {callVarsMpileup} from '../modules/clockworkModules.nf' params(params)
+include {callVarsCortex} from '../modules/clockworkModules.nf' params(params)
+include {minos} from '../modules/clockworkModules.nf' params(params)
+include {gvcf} from '../modules/clockworkModules.nf' params(params)
+include {getRefFromJSON} from '../modules/clockworkModules.nf' params(params)
+include {getRefCortex} from '../modules/clockworkModules.nf' params(params)
+         
+// define workflow component
+workflow clockwork {
+
+    take:
+      input_seqs_json
+
+    main:
+      //get just the json
+      json = input_seqs_json.map{it[4]}
+      do_we_align = input_seqs_json.map{it[5]}
+      sample_name = input_seqs_json.map{it[0]}
+      
+      getRefFromJSON(json, do_we_align, sample_name)
+      alignToRef(input_seqs_json, getRefFromJSON.out)
+      
+
+      callVarsMpileup(alignToRef.out.alignToRef_bam)
+
+      getRefCortex(alignToRef.out.alignToRef_bam)
+      callVarsCortex(alignToRef.out.alignToRef_bam, getRefCortex.out)
+
+      minos(alignToRef.out.alignToRef_bam.join(callVarsCortex.out.cortex_vcf, by: 0).join(callVarsMpileup.out.mpileup_vcf, by: 0))
+
+      gvcf(alignToRef.out.alignToRef_bam.join(minos.out.minos_vcf, by: 0))
+
+    emit:
+
+      mpileup_vcf = callVarsMpileup.out.mpileup_vcf.join(minos.out.minos_report, by: 0)
+      minos_vcf = minos.out.minos_vcf.join(alignToRef.out.alignToRef_report, by: 0)
+      reference = getRefFromJSON.out
+
+}
diff --git a/workflows/.ipynb_checkpoints/vcfpredict-checkpoint.nf b/workflows/.ipynb_checkpoints/vcfpredict-checkpoint.nf
new file mode 100644
index 0000000..375e410
--- /dev/null
+++ b/workflows/.ipynb_checkpoints/vcfpredict-checkpoint.nf
@@ -0,0 +1,47 @@
+// enable dsl2
+nextflow.enable.dsl = 2
+
+// import modules
+include {vcfmix} from '../modules/vcfpredictModules.nf' params(params)
+include {tbprofiler} from '../modules/vcfpredictModules.nf' params(params)
+include {tbprofiler_update_db} from '../modules/vcfpredictModules.nf' params(params)
+include {add_allelic_depth} from '../modules/vcfpredictModules.nf' params(params) 
+include {finalJson} from '../modules/vcfpredictModules.nf' params(params) 
+
+// define workflow component
+workflow vcfpredict {
+
+    take:
+      clockwork_bcftools_tuple
+      minos_vcf_tuple
+      reference_fasta
+      
+
+    main:
+
+      if ( params.vcfmix == "yes" ) {
+
+          vcfmix(clockwork_bcftools_tuple)
+
+      }
+
+      if ( params.resistance_profiler == "tb-profiler"){
+        //get just the vcf
+        sample_name = minos_vcf_tuple.map{it[0]}
+        minos_vcf = minos_vcf_tuple.map{it[1]}
+        do_we_resistance_profile = minos_vcf_tuple.map{it[2]}
+        report_json  = minos_vcf_tuple.map{it[3]}
+
+        if (params.update_tbprofiler == "yes"){
+        tbprofiler_update_db(reference_fasta)
+        }
+        
+        //add allelic depth back in: was calculated in mpileup but lost in minos
+        add_allelic_depth(sample_name, minos_vcf, reference_fasta, do_we_resistance_profile)
+        tbprofiler(sample_name, add_allelic_depth.out, report_json, do_we_resistance_profile)
+      }
+      
+      if (params.vcfmix == "yes" && params.resistance_profiler != "none"){
+          finalJson(vcfmix.out.vcfmix_json.join(tbprofiler.out.tbprofiler_json, by: 0))
+      }
+}

From f3345796f7d16278f02d241b9e262069fc643d5c Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Fri, 12 Jan 2024 11:40:23 +0000
Subject: [PATCH 31/44] rm checkpoints

---
 .../clockworkModules-checkpoint.nf            | 313 ------------------
 .../vcfpredictModules-checkpoint.nf           | 216 ------------
 .../clockwork-checkpoint.nf                   |  44 ---
 .../vcfpredict-checkpoint.nf                  |  47 ---
 4 files changed, 620 deletions(-)
 delete mode 100644 modules/.ipynb_checkpoints/clockworkModules-checkpoint.nf
 delete mode 100644 modules/.ipynb_checkpoints/vcfpredictModules-checkpoint.nf
 delete mode 100644 workflows/.ipynb_checkpoints/clockwork-checkpoint.nf
 delete mode 100644 workflows/.ipynb_checkpoints/vcfpredict-checkpoint.nf

diff --git a/modules/.ipynb_checkpoints/clockworkModules-checkpoint.nf b/modules/.ipynb_checkpoints/clockworkModules-checkpoint.nf
deleted file mode 100644
index d793a34..0000000
--- a/modules/.ipynb_checkpoints/clockworkModules-checkpoint.nf
+++ /dev/null
@@ -1,313 +0,0 @@
-// modules for the clockwork workflow
-
-process getRefFromJSON {
-    tag { sample_name }
-    label 'clockwork'
-    label 'low_memory'
-    label 'low_cpu'
-    
-    input:
-    path(species_json)
-    val(do_we_align)
-    val(sample_name)
-    
-    when:
-    do_we_align =~ /NOW\_ALIGN\_TO\_REF\_${sample_name}/
-    
-    output:
-    stdout
-    
-    script:
-    """
-    ref_string=\$(jq -r '.top_hit.file_paths.ref_fa' ${species_json})
-    echo "\$ref_string"
-    """
-    
-    
-}
-
-process alignToRef {
-    /**
-    * @QCcheckpoint fail if insufficient number and/or quality of read alignments to the reference genome
-    */
-
-    tag { sample_name }
-    label 'clockwork'
-    label 'normal_cpu'
-    label 'medium_memory'
-
-    publishDir "${params.output_dir}/$sample_name/output_bam", mode: 'copy', overwrite: 'true', pattern: '*{.bam,.bam.bai,_alignmentStats.json}'
-    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
-
-    input:
-    tuple val(sample_name), path(fq1), path(fq2), path(software_json), path(species_json), val(doWeAlign)
-    path(reference_path)
-
-    when:
-    doWeAlign =~ /NOW\_ALIGN\_TO\_REF\_${sample_name}/
-
-    output:
-    tuple val(sample_name), path("${sample_name}_report.json"), path("${sample_name}.bam"), path("${sample_name}.fa"), stdout, emit: alignToRef_bam
-    path("${sample_name}.bam.bai", emit: alignToRef_bai)
-    path("${sample_name}_alignmentStats.json", emit: alignToRef_json)
-    path "${sample_name}_err.json", emit: alignToRef_log optional true
-    tuple val(sample_name), path("${sample_name}_report.json"), emit: alignToRef_report
-
-    script:
-    bam = "${sample_name}.bam"
-    bai = "${sample_name}.bam.bai"
-    stats = "${sample_name}.stats"
-    stats_json = "${sample_name}_alignmentStats.json"
-    report_json = "${sample_name}_report.json"
-    error_log = "${sample_name}_err.json"
-
-    """
-    echo $reference_path
-    cp ${reference_path} ${sample_name}.fa
-
-    minimap2 -ax sr ${sample_name}.fa -t ${task.cpus} $fq1 $fq2 | samtools fixmate -m - - | samtools sort -T tmp - | samtools markdup --reference ${sample_name}.fa - minimap.bam
-
-    java -jar /usr/local/bin/picard.jar AddOrReplaceReadGroups INPUT=minimap.bam OUTPUT=${bam} RGID=${sample_name} RGLB=lib RGPL=Illumina RGPU=unit RGSM=sample
-
-    samtools index ${bam} ${bai}
-    samtools stats ${bam} > ${stats}
-
-    parse_samtools_stats.py ${bam} ${stats} > ${stats_json}
-    create_final_json.py ${stats_json} ${species_json}
-
-    cp ${sample_name}_report.json ${sample_name}_report_previous.json
-
-    jq -s ".[0] * .[1]" ${software_json} ${sample_name}_report_previous.json > ${report_json}
-
-    continue=\$(jq -r '.summary_questions.continue_to_clockwork' ${report_json})
-
-    if [ \$continue == 'yes' ]; then printf "NOW_VARCALL_${sample_name}"; elif [ \$continue == 'no' ]; then echo '{"error":"insufficient number and/or quality of read alignments to the reference genome"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
-    """
-
-    stub:
-    bam = "${sample_name}.bam"
-    bai = "${sample_name}.bam.bai"
-    stats = "${sample_name}.stats"
-    stats_json = "${sample_name}_alignmentStats.json"
-    out_json = "${sample_name}_report.json"
-    error_log = "${sample_name}_err.json"
-
-    """
-    touch ${sample_name}.fa
-    touch ${bam}
-    touch ${bai}
-    touch ${stats}
-    touch ${stats_json}
-    touch ${out_json}
-    touch ${error_log}
-    printf ${params.alignToRef_doWeVarCall}
-    """
-}
-
-process callVarsMpileup {
-    /**
-    * @QCcheckpoint none
-    */
-
-    tag { sample_name }
-    label 'clockwork'
-    label 'normal_cpu'
-    label 'low_memory'
-
-    publishDir "${params.output_dir}/$sample_name/output_vcfs", mode: 'copy', pattern: '*.vcf'
-
-    input:
-    tuple val(sample_name), path(report_json), path(bam), path(ref), val(doWeVarCall)
-
-    when:
-    doWeVarCall =~ /NOW\_VARCALL\_${sample_name}/
-
-    output:
-    tuple val(sample_name), path("${sample_name}.bcftools.vcf"), emit: mpileup_vcf
-
-    script:
-    bcftools_vcf = "${sample_name}.bcftools.vcf"
-
-    """
-    bcftools mpileup -Ou -a 'INFO/AD' -f ${ref} ${bam} | bcftools call --threads ${task.cpus} -vm -O v -o ${bcftools_vcf}
-    """
-
-    stub:
-    bcftools_vcf = "${sample_name}.bcftools.vcf"
-
-    """
-    touch ${bcftools_vcf}
-    """
-}
-
-process getRefCortex {
-    tag { sample_name }
-    label 'clockwork'
-    label 'low_memory'
-    label 'low_cpu'
-    
-    input:
-    tuple val(sample_name), path(report_json), path(bam), path(ref), val(doWeVarCall)
-
-    when:
-    doWeVarCall =~ /NOW\_VARCALL\_${sample_name}/
-    
-    output:
-    stdout
-    
-    script:
-    """
-    ref_dir=\$(jq -r '.top_hit.file_paths.clockwork_ref_dir' ${report_json})
-    echo "\$ref_dir"
-    """
-    
-    
-}
-
-process callVarsCortex {
-    /**
-    * @QCcheckpoint none
-    */
-
-    tag { sample_name }
-    label 'clockwork'
-    label 'normal_cpu'
-    label 'medium_memory'
-
-    publishDir "${params.output_dir}/$sample_name/output_vcfs", mode: 'copy', pattern: '*.vcf'
-    
-    input:
-    tuple val(sample_name), path(report_json), path(bam), path(ref), val(doWeVarCall)
-    path(ref_dir)
-
-    when:
-    doWeVarCall =~ /NOW\_VARCALL\_${sample_name}/
-
-    output:
-    tuple val(sample_name), path("${sample_name}.cortex.vcf"), emit: cortex_vcf
-
-    script:
-    cortex_vcf = "${sample_name}.cortex.vcf"
-
-    """
-    cp -r ${ref_dir}/* .
-
-    clockwork cortex . ${bam} cortex ${sample_name}
-    cp cortex/cortex.out/vcfs/cortex_wk_flow_I_RefCC_FINALcombined_BC_calls_at_all_k.raw.vcf ${cortex_vcf}
-    """
-
-    stub:
-    cortex_vcf = "${sample_name}.cortex.vcf"
-
-    """
-    touch ${cortex_vcf}
-    """
-}
-
-process minos {
-    /**
-    * @QCcheckpoint check if top species is TB, if yes pass vcf to resistance profiling
-    */
-
-    tag { sample_name }
-    label 'clockwork'
-    label 'medium_memory'
-    label 'normal_cpu'
-
-    publishDir "${params.output_dir}/$sample_name/output_vcfs", mode: 'copy', pattern: '*.vcf'
-    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
-
-    input:
-    tuple val(sample_name), path(report_json), path(bam), path(ref), val(doWeVarCall), path(cortex_vcf), path(bcftools_vcf)
-
-    output:
-    tuple val(sample_name), path(report_json), path(bam), path(ref), emit: minos_bam
-    tuple val(sample_name), path("${sample_name}.minos.vcf"), stdout, emit: minos_vcf
-    tuple val(sample_name), path("${sample_name}_report.json"), emit: minos_report
-    path "${sample_name}_err.json", emit: minos_log optional true
-
-    script:
-    minos_vcf = "${sample_name}.minos.vcf"
-    error_log = "${sample_name}_err.json"
-
-    """
-    awk '{print \$1}' ${ref} > ref.fa
-
-    minos adjudicate --force --reads ${bam} minos ref.fa ${bcftools_vcf} ${cortex_vcf}
-    cp minos/final.vcf ${minos_vcf}
-    rm -rf minos
-
-    top_hit=\$(jq -r '.top_hit.name' ${report_json})
-
-    cp ${sample_name}_report.json ${sample_name}_report_previous.json
-
-    if [[ \$top_hit =~ ^"Mycobacterium tuberculosis" ]]; then printf "CREATE_ANTIBIOGRAM_${sample_name}"; else echo '{"resistance-profiling-warning":"sample is not TB so cannot produce antibiogram using resistance profiling tools"}' | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
-    """
-
-    stub:
-    minos_vcf = "${sample_name}.minos.vcf"
-    error_log = "${sample_name}_err.json"
-
-    """
-    touch ${minos_vcf}
-    touch ${error_log}
-    printf ${params.minos_isSampleTB}
-    """
-}
-
-process gvcf {
-    /**
-    * @QCcheckpoint none
-    */
-
-    tag { sample_name }
-    label 'clockwork'
-    label 'normal_cpu'
-    label 'low_memory'
-
-    publishDir "${params.output_dir}/$sample_name/output_fasta", mode: 'copy', pattern: '*.fa'
-    publishDir "${params.output_dir}/$sample_name/output_vcfs", mode: 'copy', pattern: '*.vcf.gz'
-    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
-
-    input:
-    tuple val(sample_name), path(report_json), path(bam), path(ref), val(doWeValCall), path(minos_vcf), val(isSampleTB)
-
-    output:
-    path("${sample_name}.gvcf.vcf.gz", emit: gvcf)
-    path("${sample_name}.fa", emit: gvcf_fa)
-    path "${sample_name}_err.json", emit: gvcf_log optional true
-    path "${sample_name}_report.json", emit: gvcf_report optional true
-
-    script:
-    gvcf = "${sample_name}.gvcf.vcf"
-    gvcf_fa = "${sample_name}.fa"
-    error_log = "${sample_name}_err.json"
-
-    """
-    awk '{print \$1}' ${ref} > ref.fa
-
-    samtools mpileup -ugf ref.fa ${bam} | bcftools call --threads ${task.cpus} -m -O v -o samtools_all_pos.vcf
-
-    clockwork gvcf_from_minos_and_samtools ref.fa ${minos_vcf} samtools_all_pos.vcf ${gvcf}
-    clockwork gvcf_to_fasta ${gvcf} ${gvcf_fa}
-
-    rm samtools_all_pos.vcf
-    gzip ${gvcf}
-
-    cp ${sample_name}_report.json ${sample_name}_report_previous.json
-
-    if [ ${params.vcfmix} == "no" ] && [ ${params.resistance_profiler} == "none" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1]" ${error_log} ${sample_name}_report_previous.json > ${report_json}; fi
-    """
-
-    stub:
-    gvcf = "${sample_name}.gvcf.vcf.gz"
-    gvcf_fa = "${sample_name}.fa"
-    error_log = "${sample_name}_err.json"
-
-    """
-    touch ${gvcf}
-    touch ${gvcf_fa}
-    touch ${error_log}
-    """
-}
-
diff --git a/modules/.ipynb_checkpoints/vcfpredictModules-checkpoint.nf b/modules/.ipynb_checkpoints/vcfpredictModules-checkpoint.nf
deleted file mode 100644
index 4bc7957..0000000
--- a/modules/.ipynb_checkpoints/vcfpredictModules-checkpoint.nf
+++ /dev/null
@@ -1,216 +0,0 @@
-// modules for the vcfpredict workflow
-
-process vcfmix {
-
-    tag {sample_name}
-    label 'vcfpredict'
-    label 'low_memory'
-    label 'low_cpu'
-
-    errorStrategy 'ignore'
-
-    publishDir "${params.output_dir}/${sample_name}/output_vcfs", mode: 'copy', pattern: '*_f-stats.json', overwrite: 'true'
-    publishDir "${params.output_dir}/${sample_name}/output_vcfs", mode: 'copy', pattern: '*.csv', overwrite: 'true'
-    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
-
-    input:
-    tuple val(sample_name), path(vcf), path(report_json)
-
-    output:
-    tuple val(sample_name), path("${sample_name}_f-stats.json"), emit: vcfmix_json
-    tuple val(sample_name), path("${sample_name}_f-stats.json"), path("${sample_name}_vcfmix-regions.csv"), emit: vcfmix_json_csv
-    path "${sample_name}_err.json", emit: vcfmix_log optional true
-    path ("${sample_name}_report.json", emit: vcfmix_report)
-
-    script:
-    bcftools_vcf = "${sample_name}.bcftools.vcf"
-    error_log = "${sample_name}_err.json"
-
-    """
-    run-vcfmix.py ${bcftools_vcf}
-
-    cp ${sample_name}_report.json ${sample_name}_report_previous.json
-
-    jq -s ".[0] * .[1]" ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}
-
-    if [ ${params.resistance_profiler} == "none" ]; then echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${sample_name}_f-stats.json > ${report_json}; fi
-    """
-
-    stub:
-    vcfmix_json = "${sample_name}_f-stats.json"
-    vcfmix_csv = "${sample_name}_vcfmix-regions.csv"
-    error_log = "${sample_name}_err.json"
-
-    """
-    touch ${vcfmix_json}
-    touch ${vcfmix_csv}
-    touch ${error_log}
-    """
-}
-
-process tbprofiler_update_db {
-    label 'low_memory'
-    label 'low_cpu'
-    label 'tbprofiler'
-
-    input:
-    path(reference)
-
-    script:
-    """
-    tb-profiler update_tbdb --match_ref $reference
-    """
-}
-
-process tbprofiler {
-    label 'medium_memory'
-    label 'medium_cpu'
-    label 'tbprofiler'
-    
-    publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.tbprofiler-out.json', overwrite: 'true'
-    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
-
-    input:
-    val(sample_name)
-    path(minos_vcf)
-    path(report_json)
-    val(isSampleTB)
-
-    output:
-    tuple val(sample_name), path("${sample_name}.tbprofiler-out.json"), path("${sample_name}_report.json"), emit: tbprofiler_json
-
-    when:
-    isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/
-
-    script:
-    error_log = "${sample_name}_err.json"
-    tbprofiler_json = "${sample_name}.tbprofiler-out.json"
-    
-    """
-    bgzip ${minos_vcf}
-    tb-profiler profile --vcf ${minos_vcf}.gz --threads ${task.cpus}
-    mv results/tbprofiler.results.json ${tbprofiler_json}
-    
-    cp ${sample_name}_report.json ${sample_name}_report_previous.json
-
-    echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log}
-
-    jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json  ${tbprofiler_json} > ${report_json}
-    """
-}
-
-process add_allelic_depth {
-    label 'low_memory'
-    label 'low_cpu'
-    label 'tbprofiler'
-    
-    input:
-    val(sample_name)
-    path(minos_vcf)
-    path(reference)
-    val(isSampleTB)
-    
-    output:
-    path("${sample_name}_allelic_depth.minos.vcf")
-
-    when:
-    isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/
-    
-    script:
-    """
-    samtools faidx $reference
-    samtools dict $reference -o ${reference.baseName}.dict
-    gatk VariantAnnotator -R $reference -V $minos_vcf -A DepthPerAlleleBySample -O ${sample_name}_allelic_depth.minos.vcf
-    """
-    
-}
-
-process gnomonicus {
-
-    tag {sample_name}
-    label 'vcfpredict'
-    label 'low_memory'
-    label 'low_cpu'
-
-    errorStrategy 'ignore'
-
-    publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.gnomonicus-out.json', overwrite: 'true'
-    publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.csv', overwrite: 'true'
-    publishDir "${params.output_dir}/${sample_name}/antibiogram", mode: 'copy', pattern: '*.fasta', overwrite: 'true'
-    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*{_err.json,_report.json}'
-
-    input:
-    tuple val(sample_name), path(vcf), val(isSampleTB), path(report_json)
-    path(genbank)
-    when:
-    isSampleTB =~ /CREATE\_ANTIBIOGRAM\_${sample_name}/
-
-    output:
-    tuple val(sample_name), path("${sample_name}.gnomonicus-out.json"), path("${sample_name}_report.json"), emit: gnomon_json
-    tuple val(sample_name), path("${sample_name}.effects.csv"), path("${sample_name}.mutations.csv"), emit: gnomon_csv optional true
-    tuple val(sample_name), path("*-fixed.fasta"), emit: gnomon_fasta
-    path("${sample_name}_err.json", emit: gnomon_log)
-    path ("${sample_name}_report.json", emit: gnomon_report)
-
-    script:
-    minos_vcf = "${sample_name}.minos.vcf"
-    error_log = "${sample_name}_err.json"
-
-    """
-    gnomonicus --genome_object ${genbank} --catalogue ${params.amr_cat} --vcf_file ${minos_vcf} --output_dir . --json --fasta fixed
-
-    cp ${sample_name}_report.json ${sample_name}_report_previous.json
-
-    echo '{"complete":"workflow complete without error"}' | jq '.' > ${error_log}
-
-    jq -s ".[0] * .[1] * .[2]" ${error_log} ${sample_name}_report_previous.json ${sample_name}.gnomonicus-out.json > ${report_json}
-    """
-
-    stub:
-    gnomonicus_json = "${sample_name}.gnomonicus-out.json"
-    gnomonicus_fasta = "${sample_name}-fixed.fasta"
-    gnomonicus_effects = "${sample_name}.effects.csv"
-    gnomonicus_mutations = "${sample_name}.mutations.csv"
-    error_log = "${sample_name}_err.json"
-
-    """
-    touch ${gnomonicus_json}
-    touch ${gnomonicus_fasta}
-    touch ${gnomonicus_effects}
-    touch ${gnomonicus_mutations}
-    touch ${error_log}
-    """
-}
-
-process finalJson {
-
-    tag {sample_name}
-    label 'vcfpredict'
-    label 'low_memory'
-    label 'low_cpu'
-    
-    errorStrategy 'ignore'
-
-    publishDir "${params.output_dir}/$sample_name", mode: 'copy', overwrite: 'true', pattern: '*_report.json'
-
-    input:
-    tuple val(sample_name), path(vcfmix_json), path(gnomon_json), path(report_json)
-
-    output:
-    tuple val(sample_name), path("${sample_name}_report.json"), emit: final_json
-
-    script:
-    """
-    cp ${sample_name}_report.json ${sample_name}_report_previous.json
-
-    jq -s ".[0] * .[1]" ${sample_name}_report_previous.json ${vcfmix_json} > ${report_json}
-    """
-
-    stub:
-    report_json = "${sample_name}_report.json"
-
-    """
-    touch ${report_json}
-    """
-
-}
diff --git a/workflows/.ipynb_checkpoints/clockwork-checkpoint.nf b/workflows/.ipynb_checkpoints/clockwork-checkpoint.nf
deleted file mode 100644
index bf6f5e1..0000000
--- a/workflows/.ipynb_checkpoints/clockwork-checkpoint.nf
+++ /dev/null
@@ -1,44 +0,0 @@
-// enable dsl2
-nextflow.enable.dsl = 2
-
-// import modules
-include {alignToRef} from '../modules/clockworkModules.nf' params(params)
-include {callVarsMpileup} from '../modules/clockworkModules.nf' params(params)
-include {callVarsCortex} from '../modules/clockworkModules.nf' params(params)
-include {minos} from '../modules/clockworkModules.nf' params(params)
-include {gvcf} from '../modules/clockworkModules.nf' params(params)
-include {getRefFromJSON} from '../modules/clockworkModules.nf' params(params)
-include {getRefCortex} from '../modules/clockworkModules.nf' params(params)
-         
-// define workflow component
-workflow clockwork {
-
-    take:
-      input_seqs_json
-
-    main:
-      //get just the json
-      json = input_seqs_json.map{it[4]}
-      do_we_align = input_seqs_json.map{it[5]}
-      sample_name = input_seqs_json.map{it[0]}
-      
-      getRefFromJSON(json, do_we_align, sample_name)
-      alignToRef(input_seqs_json, getRefFromJSON.out)
-      
-
-      callVarsMpileup(alignToRef.out.alignToRef_bam)
-
-      getRefCortex(alignToRef.out.alignToRef_bam)
-      callVarsCortex(alignToRef.out.alignToRef_bam, getRefCortex.out)
-
-      minos(alignToRef.out.alignToRef_bam.join(callVarsCortex.out.cortex_vcf, by: 0).join(callVarsMpileup.out.mpileup_vcf, by: 0))
-
-      gvcf(alignToRef.out.alignToRef_bam.join(minos.out.minos_vcf, by: 0))
-
-    emit:
-
-      mpileup_vcf = callVarsMpileup.out.mpileup_vcf.join(minos.out.minos_report, by: 0)
-      minos_vcf = minos.out.minos_vcf.join(alignToRef.out.alignToRef_report, by: 0)
-      reference = getRefFromJSON.out
-
-}
diff --git a/workflows/.ipynb_checkpoints/vcfpredict-checkpoint.nf b/workflows/.ipynb_checkpoints/vcfpredict-checkpoint.nf
deleted file mode 100644
index 375e410..0000000
--- a/workflows/.ipynb_checkpoints/vcfpredict-checkpoint.nf
+++ /dev/null
@@ -1,47 +0,0 @@
-// enable dsl2
-nextflow.enable.dsl = 2
-
-// import modules
-include {vcfmix} from '../modules/vcfpredictModules.nf' params(params)
-include {tbprofiler} from '../modules/vcfpredictModules.nf' params(params)
-include {tbprofiler_update_db} from '../modules/vcfpredictModules.nf' params(params)
-include {add_allelic_depth} from '../modules/vcfpredictModules.nf' params(params) 
-include {finalJson} from '../modules/vcfpredictModules.nf' params(params) 
-
-// define workflow component
-workflow vcfpredict {
-
-    take:
-      clockwork_bcftools_tuple
-      minos_vcf_tuple
-      reference_fasta
-      
-
-    main:
-
-      if ( params.vcfmix == "yes" ) {
-
-          vcfmix(clockwork_bcftools_tuple)
-
-      }
-
-      if ( params.resistance_profiler == "tb-profiler"){
-        //get just the vcf
-        sample_name = minos_vcf_tuple.map{it[0]}
-        minos_vcf = minos_vcf_tuple.map{it[1]}
-        do_we_resistance_profile = minos_vcf_tuple.map{it[2]}
-        report_json  = minos_vcf_tuple.map{it[3]}
-
-        if (params.update_tbprofiler == "yes"){
-        tbprofiler_update_db(reference_fasta)
-        }
-        
-        //add allelic depth back in: was calculated in mpileup but lost in minos
-        add_allelic_depth(sample_name, minos_vcf, reference_fasta, do_we_resistance_profile)
-        tbprofiler(sample_name, add_allelic_depth.out, report_json, do_we_resistance_profile)
-      }
-      
-      if (params.vcfmix == "yes" && params.resistance_profiler != "none"){
-          finalJson(vcfmix.out.vcfmix_json.join(tbprofiler.out.tbprofiler_json, by: 0))
-      }
-}

From f331b89a4347ef2c7b9ef97cd8ab499f524a7ae1 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 17 Jan 2024 10:20:15 +0000
Subject: [PATCH 32/44] remove print

---
 workflows/preprocessing.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf
index 5097dad..fbb19aa 100644
--- a/workflows/preprocessing.nf
+++ b/workflows/preprocessing.nf
@@ -66,7 +66,6 @@ workflow preprocessing {
       bowtie2(kraken2.out.kraken2_fqs, bowtie_dir.toList())
 
       identifyBacterialContaminants(bowtie2.out.bowtie2_fqs.join(speciation_report, by: 0).join(kraken2.out.kraken2_json, by: 0), resource_dir, refseq_path)
-      identifyBacterialContaminants.out.prev_sample_json.view()
 
       downloadContamGenomes(identifyBacterialContaminants.out.contam_list)
 

From fdc78927c2a9aa5b2157465852e0ce7d2383144a Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 18 Jan 2024 12:47:36 +0000
Subject: [PATCH 33/44] minimap on original reference, not copy

---
 modules/clockworkModules.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/clockworkModules.nf b/modules/clockworkModules.nf
index d793a34..f45ac62 100644
--- a/modules/clockworkModules.nf
+++ b/modules/clockworkModules.nf
@@ -47,7 +47,7 @@ process alignToRef {
     doWeAlign =~ /NOW\_ALIGN\_TO\_REF\_${sample_name}/
 
     output:
-    tuple val(sample_name), path("${sample_name}_report.json"), path("${sample_name}.bam"), path("${sample_name}.fa"), stdout, emit: alignToRef_bam
+    tuple val(sample_name), path("${sample_name}_report.json"), path("${sample_name}.bam"), path(reference_path), stdout, emit: alignToRef_bam
     path("${sample_name}.bam.bai", emit: alignToRef_bai)
     path("${sample_name}_alignmentStats.json", emit: alignToRef_json)
     path "${sample_name}_err.json", emit: alignToRef_log optional true
@@ -63,9 +63,9 @@ process alignToRef {
 
     """
     echo $reference_path
-    cp ${reference_path} ${sample_name}.fa
+    cp $reference_path ${sample_name}.fa
 
-    minimap2 -ax sr ${sample_name}.fa -t ${task.cpus} $fq1 $fq2 | samtools fixmate -m - - | samtools sort -T tmp - | samtools markdup --reference ${sample_name}.fa - minimap.bam
+    minimap2 -ax sr $reference_path -t ${task.cpus} $fq1 $fq2 | samtools fixmate -m - - | samtools sort -T tmp - | samtools markdup --reference $reference_path - minimap.bam
 
     java -jar /usr/local/bin/picard.jar AddOrReplaceReadGroups INPUT=minimap.bam OUTPUT=${bam} RGID=${sample_name} RGLB=lib RGPL=Illumina RGPU=unit RGSM=sample
 

From 024137f58954abcd6b11ad1d450484d3293d76ea Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 24 Jan 2024 10:16:37 +0000
Subject: [PATCH 34/44] tidy up

---
 .../containers-checkpoint.config              | 49 -------------
 config/containers.config                      |  1 -
 ...work-0.9.7 => Singularity.clockwork-0.9.8} | 10 ++-
 ...-0.9.7 => Singularity.preprocessing-0.9.8} | 14 ++--
 singularity/Singularity.tbprofiler-0.9.8      | 70 +++++++++++++++++++
 ...ict-0.9.7 => Singularity.vcfpredict-0.9.8} | 37 ++--------
 6 files changed, 94 insertions(+), 87 deletions(-)
 delete mode 100644 config/.ipynb_checkpoints/containers-checkpoint.config
 rename singularity/{Singularity.clockwork-0.9.7 => Singularity.clockwork-0.9.8} (97%)
 rename singularity/{Singularity.preprocessing-0.9.7 => Singularity.preprocessing-0.9.8} (95%)
 create mode 100644 singularity/Singularity.tbprofiler-0.9.8
 rename singularity/{Singularity.vcfpredict-0.9.7 => Singularity.vcfpredict-0.9.8} (51%)

diff --git a/config/.ipynb_checkpoints/containers-checkpoint.config b/config/.ipynb_checkpoints/containers-checkpoint.config
deleted file mode 100644
index dece260..0000000
--- a/config/.ipynb_checkpoints/containers-checkpoint.config
+++ /dev/null
@@ -1,49 +0,0 @@
-params{
-    container_enabled = "true"
-    container_enabled = "true"
-    resource_dir = "/resources"
-}
-
-
-process {
-    update_tbprofiler = "false"
-      
-      
-    withLabel:low_cpu {cpus = 2}
-    withLabel:normal_cpu { cpus = 8 }
-    withLabel:low_memory { memory = '5GB' }
-    withLabel:medium_memory { memory = '10GB' }
-    withLabel:high_memory { memory = '18GB' }
-
-    withLabel:getversion {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
-    }
-
-    withLabel:preprocessing {
-        container = "quay.io/pathogen-genomics-cymru/preprocessing:0.9.8"
-    }
-      
-    withLabel:tbprofiler {
-        container = "quay.io/pathogen-genomics-cymru/tbprofiler:0.9.8"
-    }
-
-    withName:downloadContamGenomes {
-        shell = ['/bin/bash','-u']
-        errorStrategy = { task.exitStatus in 100..113 ? 'retry' : 'terminate' }
-        maxRetries = 5
-   }
-
-    withLabel:retryAfanc {
-	    shell = ['/bin/bash','-u']
-        errorStrategy = {task.exitStatus == 1 ? 'retry' : 'ignore' }
-        maxRetries = 5
-    }
-
-    withLabel:clockwork {
-        container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.8"
-    }
-
-    withLabel:vcfpredict {
-        container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.8"
-    }
- }
\ No newline at end of file
diff --git a/config/containers.config b/config/containers.config
index dece260..e961b71 100644
--- a/config/containers.config
+++ b/config/containers.config
@@ -1,7 +1,6 @@
 params{
     container_enabled = "true"
     container_enabled = "true"
-    resource_dir = "/resources"
 }
 
 
diff --git a/singularity/Singularity.clockwork-0.9.7 b/singularity/Singularity.clockwork-0.9.8
similarity index 97%
rename from singularity/Singularity.clockwork-0.9.7
rename to singularity/Singularity.clockwork-0.9.8
index f3f3c24..0e13714 100644
--- a/singularity/Singularity.clockwork-0.9.7
+++ b/singularity/Singularity.clockwork-0.9.8
@@ -2,6 +2,8 @@ Bootstrap: docker
 From: debian:buster
 Stage: spython-base
 
+%files
+bin/ /opt/bin/
 %labels
 maintainer="pricea35@cardiff.ac.uk" 
 about.summary="container for the clockwork workflow"
@@ -26,6 +28,9 @@ clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5
 PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all"
 PYTHON="python2.7 python-dev"
 
+PATH=/opt/bin:$PATH
+
+
 apt-get update \
 && apt-get install -y $PACKAGES $PYTHON \
 && curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \
@@ -36,7 +41,7 @@ apt-get update \
 && ln -s /usr/local/bin/python3.6 /usr/local/bin/python3 \
 && ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \
 && pip3 install --upgrade pip \
-&& pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools \
+&& pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools  awscli \
 && wget -qO - https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | apt-key add - \
 && add-apt-repository --yes https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/ \
 && apt-get update && apt-get install -y adoptopenjdk-8-hotspot
@@ -136,6 +141,7 @@ export python_version=3.6.5
 export clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5
 export PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all"
 export PYTHON="python2.7 python-dev"
+export PATH=/opt/bin:$PATH
 export CLOCKWORK_CORTEX_DIR=/cortex
 export PATH=${PATH}:/clockwork/python/scripts
 export PICARD_JAR=/usr/local/bin/picard.jar
@@ -145,4 +151,4 @@ export LANGUAGE=en_US.UTF-8
 %runscript
 exec /bin/bash "$@"
 %startscript
-exec /bin/bash "$@"
\ No newline at end of file
+exec /bin/bash "$@"
diff --git a/singularity/Singularity.preprocessing-0.9.7 b/singularity/Singularity.preprocessing-0.9.8
similarity index 95%
rename from singularity/Singularity.preprocessing-0.9.7
rename to singularity/Singularity.preprocessing-0.9.8
index 7ca3b35..a164d85 100644
--- a/singularity/Singularity.preprocessing-0.9.7
+++ b/singularity/Singularity.preprocessing-0.9.8
@@ -2,6 +2,8 @@ Bootstrap: docker
 From: ubuntu:focal
 Stage: spython-base
 
+%files
+bin/ /opt/bin/
 %labels
 maintainer="pricea35@cardiff.ac.uk" 
 about.summary="container for the preprocessing workflow"
@@ -25,13 +27,15 @@ fastani_version=1.33
 
 PACKAGES="procps curl git wget build-essential zlib1g-dev libncurses-dev libz-dev libbz2-dev liblzma-dev libcurl4-openssl-dev libgsl-dev rsync unzip ncbi-blast+ pigz jq libtbb-dev openjdk-11-jre-headless autoconf r-base-core locales locales-all"
 PYTHON="python3 python3-pip python3-dev"
-PYTHON_PACKAGES="biopython"
+PYTHON_PACKAGES="biopython awscli boto3"
 
 PATH=${PATH}:/usr/local/bin/mccortex/bin:/usr/local/bin/bwa-${bwa_version}:/opt/edirect
 LD_LIBRARY_PATH=/usr/local/lib
 
 export DEBIAN_FRONTEND="noninteractive"
 
+PATH=/opt/bin:$PATH
+
 apt-get update \
 && DEBIAN_FRONTEND="noninteractive" apt-get install -y $PACKAGES $PYTHON \
 && pip3 install --upgrade pip \
@@ -82,7 +86,7 @@ curl -fsSL https://github.com/OpenGene/fastp/archive/v${fastp_version}.tar.gz |
 && cd .. \
 && rm -r fastp-${fastp_version}
 
-wget http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v${fastqc_version}.zip \
+wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v${fastqc_version}.zip \
 && unzip fastqc_v${fastqc_version}.zip \
 && chmod +x FastQC/fastqc \
 && mv FastQC/* /usr/local/bin \
@@ -102,10 +106,9 @@ curl -fsSL https://github.com/ArthurVM/Afanc/archive/refs/tags/v${afanc_version}
 && mv mash-Linux64-v${mash_version}/mash /usr/local/bin \
 && rm -r mash-Linux* \
 && wget https://github.com/ParBLiSS/FastANI/releases/download/v${fastani_version}/fastANI-Linux64-v${fastani_version}.zip \
-&& unzip fastANI-Linux64-v${fastani_version}.zip  \
+&& unzip fastANI-Linux64-v${fastani_version}.zip \
 && mv fastANI /usr/local/bin
 
-
 sh -c "$(curl -fsSL ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/install-edirect.sh)" \
 && mkdir -p /opt/edirect \
 && mv /root/edirect/* /opt/edirect
@@ -149,9 +152,10 @@ export mash_version=2.3
 export fastani_version=1.33
 export PACKAGES="procps curl git wget build-essential zlib1g-dev libncurses-dev libz-dev libbz2-dev liblzma-dev libcurl4-openssl-dev libgsl-dev rsync unzip ncbi-blast+ pigz jq libtbb-dev openjdk-11-jre-headless autoconf r-base-core locales locales-all"
 export PYTHON="python3 python3-pip python3-dev"
-export PYTHON_PACKAGES="biopython"
+export PYTHON_PACKAGES="biopython awscli boto3"
 export PATH=${PATH}:/usr/local/bin/mccortex/bin:/usr/local/bin/bwa-${bwa_version}:/opt/edirect
 export LD_LIBRARY_PATH=/usr/local/lib
+export PATH=/opt/bin:$PATH
 export LC_ALL=en_US.UTF-8
 export LANG=en_US.UTF-8
 export LANGUAGE=en_US.UTF-8
diff --git a/singularity/Singularity.tbprofiler-0.9.8 b/singularity/Singularity.tbprofiler-0.9.8
new file mode 100644
index 0000000..33be3bd
--- /dev/null
+++ b/singularity/Singularity.tbprofiler-0.9.8
@@ -0,0 +1,70 @@
+Bootstrap: docker
+From: mambaorg/micromamba:1.3.0 
+Stage: app
+
+%files
+resources/tuberculosis.fasta /data/tuberculosis.fasta
+%labels
+base.image="micromamba:1.3.0"
+dockerfile.version="1"
+software="tbprofiler"
+software.version="${TBPROFILER_VER}"
+description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database."
+website="https://github.com/jodyphelan/TBProfiler/"
+license="https://github.com/jodyphelan/TBProfiler/blob/master/LICENSE"
+maintainer="John Arnn"
+maintainer.email="jarnn@utah.gov"
+maintainer2="Curtis Kapsak"
+maintainer2.email="kapsakcj@gmail.com"
+%post
+
+#copy the reference genome to pre-compute our index
+
+su -  root # USER root
+mkdir -p /
+cd /
+
+TBPROFILER_VER="5.0.1"
+
+# this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/
+# commits are found on https://github.com/jodyphelan/tbdb/commits/master
+# this was the latest commit as of 2023-10-26
+TBDB_VER="e25540b"
+
+# LABEL instructions tag the image with metadata that might be important to the user
+
+# Install dependencies via apt-get; cleanup apt garbage
+apt-get update && apt-get install -y --no-install-recommends \
+wget \
+ca-certificates \
+procps && \
+apt-get autoclean && rm -rf /var/lib/apt/lists/*
+
+# install tb-profiler via bioconda; install into 'base' conda env
+micromamba install --yes --name base --channel conda-forge --channel bioconda  \
+tb-profiler=${TBPROFILER_VER}
+
+micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4
+micromamba install --yes --name base --channel conda-forge --channel bioconda samtools
+micromamba install --yes --name base --channel conda-forge jq
+micromamba clean --all --yes
+
+# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time
+PATH="/opt/conda/bin:${PATH}"
+
+# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json
+# can also run 'tb-profiler list_db' to find the same version info
+# In 5.0.1 updating_tbdb does not work with tb-profiler update_tbdb --commit ${TBDB_VER}
+tb-profiler update_tbdb --commit ${TBDB_VER}
+
+mkdir -p /data
+cd /data
+tb-profiler update_tbdb --match_ref tuberculosis.fasta
+%environment
+export PATH="/opt/conda/bin:${PATH}"
+%runscript
+cd /data
+exec /bin/bash "$@"
+%startscript
+cd /data
+exec /bin/bash "$@"
diff --git a/singularity/Singularity.vcfpredict-0.9.7 b/singularity/Singularity.vcfpredict-0.9.8
similarity index 51%
rename from singularity/Singularity.vcfpredict-0.9.7
rename to singularity/Singularity.vcfpredict-0.9.8
index ff29506..0146e7d 100644
--- a/singularity/Singularity.vcfpredict-0.9.7
+++ b/singularity/Singularity.vcfpredict-0.9.8
@@ -2,22 +2,22 @@ Bootstrap: docker
 From: ubuntu:20.04
 Stage: spython-base
 
+%files
+bin/ /opt/bin/
 %labels
 maintainer="pricea35@cardiff.ac.uk" 
 about.summary="container for the vcf predict workflow"
 %post
 
 
+#add run-vcf to container
+PATH=/opt/bin:$PATH
 
 PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq"
 PYTHON="python3 python3-pip python3-dev"
 
 vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417
-gumpy_version=1.0.15
-piezo_version=0.3
-gnomonicus_version=1.1.2
-tuberculosis_amr_catalogues=12d38733ad2e238729a3de9f725081e1d4872968
-
+#apt updates
 apt-get update \
 && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \
 && apt-get install -y $PACKAGES $PYTHON \
@@ -26,38 +26,15 @@ apt-get update \
 && cd VCFMIX \
 && git checkout ${vcfmix_version} \
 && pip3 install recursive_diff \
+&& pip3 install awscli \
 && pip3 install . \
 && cp -r data /usr/local/lib/python3.8/dist-packages \
 && cd ..
-
-curl -fsSL https://github.com/oxfordmmm/gumpy/archive/refs/tags/v${gumpy_version}.tar.gz | tar -xz \
-&& cd gumpy-${gumpy_version} \
-&& pip3 install . \
-&& cd ..
-
-curl -fsSL https://github.com/oxfordmmm/piezo/archive/refs/tags/v${piezo_version}.tar.gz | tar -xz \
-&& cd piezo-${piezo_version} \
-&& pip3 install . \
-&& cd ..
-
-curl -fsSL https://github.com/oxfordmmm/gnomonicus/archive/refs/tags/v${gnomonicus_version}.tar.gz | tar -xz \
-&& cd gnomonicus-${gnomonicus_version} \
-&& pip3 install . \
-&& cd ..
-
-git clone https://github.com/oxfordmmm/tuberculosis_amr_catalogues.git \
-&& cd tuberculosis_amr_catalogues \
-&& git checkout ${tuberculosis_amr_catalogues} \
-&& cd ..
-
 %environment
+export PATH=/opt/bin:$PATH
 export PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq"
 export PYTHON="python3 python3-pip python3-dev"
 export vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417
-export gumpy_version=1.0.15
-export piezo_version=0.3
-export gnomonicus_version=1.1.2
-export tuberculosis_amr_catalogues=12d38733ad2e238729a3de9f725081e1d4872968
 %runscript
 exec /bin/bash "$@"
 %startscript

From 26497d5ba2fa25c8e0695c8e46e7fa2ac3a43de5 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 24 Jan 2024 14:00:42 +0000
Subject: [PATCH 35/44] fix allelic depth

---
 main.nf                      | 3 ++-
 modules/vcfpredictModules.nf | 3 ++-
 workflows/clockwork.nf       | 1 +
 workflows/vcfpredict.nf      | 4 +++-
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/main.nf b/main.nf
index 2f38ef6..0cd98f2 100644
--- a/main.nf
+++ b/main.nf
@@ -213,8 +213,9 @@ workflow {
       mpileup_vcf = clockwork.out.mpileup_vcf
       minos_vcf = clockwork.out.minos_vcf
       reference = clockwork.out.reference
+      bam = clockwork.out.bam
 
-      vcfpredict(mpileup_vcf, minos_vcf, reference)
+      vcfpredict(bam, mpileup_vcf, minos_vcf, reference)
 
 }
 
diff --git a/modules/vcfpredictModules.nf b/modules/vcfpredictModules.nf
index 4bc7957..042b403 100644
--- a/modules/vcfpredictModules.nf
+++ b/modules/vcfpredictModules.nf
@@ -107,6 +107,7 @@ process add_allelic_depth {
     input:
     val(sample_name)
     path(minos_vcf)
+    path(bam)
     path(reference)
     val(isSampleTB)
     
@@ -120,7 +121,7 @@ process add_allelic_depth {
     """
     samtools faidx $reference
     samtools dict $reference -o ${reference.baseName}.dict
-    gatk VariantAnnotator -R $reference -V $minos_vcf -A DepthPerAlleleBySample -O ${sample_name}_allelic_depth.minos.vcf
+    gatk VariantAnnotator -R $reference -I $bam -V $minos_vcf -A DepthPerAlleleBySample -O ${sample_name}_allelic_depth.minos.vcf
     """
     
 }
diff --git a/workflows/clockwork.nf b/workflows/clockwork.nf
index bf6f5e1..148f523 100644
--- a/workflows/clockwork.nf
+++ b/workflows/clockwork.nf
@@ -40,5 +40,6 @@ workflow clockwork {
       mpileup_vcf = callVarsMpileup.out.mpileup_vcf.join(minos.out.minos_report, by: 0)
       minos_vcf = minos.out.minos_vcf.join(alignToRef.out.alignToRef_report, by: 0)
       reference = getRefFromJSON.out
+      bam = alignToRef.out.alignToRef_bam
 
 }
diff --git a/workflows/vcfpredict.nf b/workflows/vcfpredict.nf
index 375e410..8fec00f 100644
--- a/workflows/vcfpredict.nf
+++ b/workflows/vcfpredict.nf
@@ -12,6 +12,7 @@ include {finalJson} from '../modules/vcfpredictModules.nf' params(params)
 workflow vcfpredict {
 
     take:
+      clockwork_bam
       clockwork_bcftools_tuple
       minos_vcf_tuple
       reference_fasta
@@ -31,13 +32,14 @@ workflow vcfpredict {
         minos_vcf = minos_vcf_tuple.map{it[1]}
         do_we_resistance_profile = minos_vcf_tuple.map{it[2]}
         report_json  = minos_vcf_tuple.map{it[3]}
+        bam = clockwork_bam.map{it[2]}
 
         if (params.update_tbprofiler == "yes"){
         tbprofiler_update_db(reference_fasta)
         }
         
         //add allelic depth back in: was calculated in mpileup but lost in minos
-        add_allelic_depth(sample_name, minos_vcf, reference_fasta, do_we_resistance_profile)
+        add_allelic_depth(sample_name, minos_vcf, bam, reference_fasta, do_we_resistance_profile)
         tbprofiler(sample_name, add_allelic_depth.out, report_json, do_we_resistance_profile)
       }
       

From f149e82761649c6ad00881a070c5ef615fb1af97 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 24 Jan 2024 14:50:28 +0000
Subject: [PATCH 36/44] afanc memory

---
 modules/preprocessingModules.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/preprocessingModules.nf b/modules/preprocessingModules.nf
index 9d7177a..0de3ab6 100644
--- a/modules/preprocessingModules.nf
+++ b/modules/preprocessingModules.nf
@@ -337,7 +337,7 @@ process afanc {
     tag { sample_name }
     label 'preprocessing'
     label 'normal_cpu'
-    label 'medium_memory'
+    label 'high_memory'
     label 'retry_afanc'
 
     publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP", mode: 'copy', pattern: '*_afanc_report.json'

From 441512d00f85b1b099c44ae812bbc3c17774d30c Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Wed, 24 Jan 2024 17:12:51 +0000
Subject: [PATCH 37/44] remove copy

---
 modules/clockworkModules.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/clockworkModules.nf b/modules/clockworkModules.nf
index f45ac62..0bea703 100644
--- a/modules/clockworkModules.nf
+++ b/modules/clockworkModules.nf
@@ -63,7 +63,6 @@ process alignToRef {
 
     """
     echo $reference_path
-    cp $reference_path ${sample_name}.fa
 
     minimap2 -ax sr $reference_path -t ${task.cpus} $fq1 $fq2 | samtools fixmate -m - - | samtools sort -T tmp - | samtools markdup --reference $reference_path - minimap.bam
 

From f54bc37d6926c9b9ef2241aa411151dd7121d61d Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Fri, 26 Jan 2024 09:34:45 +0000
Subject: [PATCH 38/44] k8s job

---
 nextflow.config | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/nextflow.config b/nextflow.config
index c8b15d1..707ea73 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -71,6 +71,11 @@ profiles {
            container = null
         }
         
+        process {
+        //process as job rather than pod, helps with stability
+        k8s.computeResourceType = 'Job'
+        }
+        
         //params specific to paths on the climb system
         params{   
             bowtie2_index = "s3://microbial-bioin-sp3/bowtie_hg19"

From 7eece9ad6f79cad2b8a3c79a68dbd1015200f580 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Fri, 26 Jan 2024 09:44:03 +0000
Subject: [PATCH 39/44] update branches that actions wf works on

---
 .github/workflows/build-push-quay.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/build-push-quay.yml b/.github/workflows/build-push-quay.yml
index cef945d..a1c2b72 100644
--- a/.github/workflows/build-push-quay.yml
+++ b/.github/workflows/build-push-quay.yml
@@ -2,9 +2,7 @@ name: build-push-quay
 on:
   push:
     branches:
-      - v0.9.6
-      - 0.9.7-dev
-      - tbprofiler
+      - main
     paths:
       - '**/Dockerfile*'
       - "bin/"

From 9eeb2a671bcbc2eba4c0b28495b00e6d4341156e Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Fri, 26 Jan 2024 09:55:27 +0000
Subject: [PATCH 40/44] update readme

---
 modules/preprocessingModules.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/preprocessingModules.nf b/modules/preprocessingModules.nf
index 0de3ab6..f4c528d 100644
--- a/modules/preprocessingModules.nf
+++ b/modules/preprocessingModules.nf
@@ -434,7 +434,7 @@ process bowtie2 {
     tag { sample_name }
     label 'preprocessing'
     label 'normal_cpu'
-    label 'low_memory'
+    label 'medium_memory'
 
     publishDir "${params.output_dir}/$sample_name/output_reads", mode: 'copy', pattern: '*.fq.gz', overwrite: 'true'
 

From 1f6252916066489c1a6d065c0b4869c7ad9b431f Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Fri, 26 Jan 2024 09:58:31 +0000
Subject: [PATCH 41/44] README

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 4fac53e..a076e2a 100644
--- a/README.md
+++ b/README.md
@@ -131,3 +131,4 @@ process clockwork:minos\
 For a list of direct authors of this pipeline, please see the contributors list. All of the software dependencies of this pipeline are recorded in the version.json
 
 The preprocessing sub-workflow is based on the preprocessing nextflow DSL1 pipeline written by Stephen Bush, University of Oxford. The clockwork sub-workflow uses aspects of the variant calling workflow from https://github.com/iqbal-lab-org/clockwork, lead author Martin Hunt, Iqbal Lab at EMBL-EBI
+

From b5291ad847e1f7f1dc0b443752872d4cb18114ca Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 8 Feb 2024 09:50:27 +0000
Subject: [PATCH 42/44] k8s job to stop fails

---
 nextflow.config | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 707ea73..43a0d71 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -71,9 +71,8 @@ profiles {
            container = null
         }
         
-        process {
-        //process as job rather than pod, helps with stability
-        k8s.computeResourceType = 'Job'
+        k8s {
+           computeResourceType = 'Job'
         }
         
         //params specific to paths on the climb system

From aa906e6aacf283904f99d4d32255219a8931ad2a Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 8 Feb 2024 09:51:19 +0000
Subject: [PATCH 43/44] output csv

---
 modules/preprocessingModules.nf | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/modules/preprocessingModules.nf b/modules/preprocessingModules.nf
index f4c528d..8d05c49 100644
--- a/modules/preprocessingModules.nf
+++ b/modules/preprocessingModules.nf
@@ -398,6 +398,7 @@ process mykrobe {
     label 'medium_memory'
 
     publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP", mode: 'copy', pattern: '*_mykrobe_report.json'
+    publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP", mode: 'copy', pattern: '*_mykrobe_report.csv'
 
     input:
     tuple val(sample_name), path(fq1), path(fq2), val(run_mykrobe), path(software_json)
@@ -413,7 +414,7 @@ process mykrobe {
     mykrobe_report = "${sample_name}_mykrobe_report.json"
 
     """
-    mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json --output ${mykrobe_report} -1 $fq1 $fq2
+    mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json_and_csv --output ${mykrobe_report} -1 $fq1 $fq2
     printf ${sample_name}
     """
 
@@ -733,6 +734,7 @@ process reMykrobe {
     label 'low_memory'
 
     publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP_and_postContamRemoval", mode: 'copy', pattern: '*_mykrobe_report.json'
+    publishDir "${params.output_dir}/$sample_name/speciation_reports_for_reads_postFastP_and_postContamRemoval", mode: 'copy', pattern: '*_mykrobe_report.csv'
 
     input:
     tuple val(sample_name), path(fq1), path(fq2), path(software_json)
@@ -744,7 +746,7 @@ process reMykrobe {
     mykrobe_report = "${sample_name}_mykrobe_report.json"
 
     """
-    mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json --output ${mykrobe_report} -1 $fq1 $fq2
+    mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json_and_csv --output ${mykrobe_report} -1 $fq1 $fq2
     """
 
     stub:

From 0e48d699bd1626687e18789a2e49efac25b42f78 Mon Sep 17 00:00:00 2001
From: whalleyt <whalleyt@cardiff.ac.uk>
Date: Thu, 8 Feb 2024 10:03:13 +0000
Subject: [PATCH 44/44] change output name to deal with csv and json

---
 modules/preprocessingModules.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/preprocessingModules.nf b/modules/preprocessingModules.nf
index 8d05c49..b59d0cc 100644
--- a/modules/preprocessingModules.nf
+++ b/modules/preprocessingModules.nf
@@ -411,7 +411,7 @@ process mykrobe {
     tuple val(sample_name), path(fq1), path(fq2), stdout, emit: mykrobe_fqs
 
     script:
-    mykrobe_report = "${sample_name}_mykrobe_report.json"
+    mykrobe_report = "${sample_name}_mykrobe_report"
 
     """
     mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json_and_csv --output ${mykrobe_report} -1 $fq1 $fq2
@@ -422,7 +422,7 @@ process mykrobe {
     mykrobe_report = "${sample_name}_mykrobe_report.json"
 
     """
-    touch ${mykrobe_report}
+    touch ${mykrobe_report}.json
     printf ${sample_name}
     """
 }
@@ -743,7 +743,7 @@ process reMykrobe {
     tuple val(sample_name), path("${sample_name}_mykrobe_report.json"), emit: reMykrobe_report
 
     script:
-    mykrobe_report = "${sample_name}_mykrobe_report.json"
+    mykrobe_report = "${sample_name}_mykrobe_report"
 
     """
     mykrobe predict --sample ${sample_name} --species tb --threads ${task.cpus} --format json_and_csv --output ${mykrobe_report} -1 $fq1 $fq2
@@ -753,7 +753,7 @@ process reMykrobe {
     mykrobe_report = "${sample_name}_mykrobe_report.json"
 
     """
-    touch ${mykrobe_report}
+    touch ${mykrobe_report}.json
     """
 }