Skip to content

Commit c459c8f

Browse files
authored
Merge pull request #108 from Pathogen-Genomics-Cymru/bcg
Bcg
2 parents 8d77a6b + 2edb307 commit c459c8f

20 files changed

+501
-90
lines changed

.github/workflows/build-push-quay.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ on:
33
push:
44
branches:
55
- main
6-
- ntmprofiler
6+
- bcg
77
paths:
88
- '**/Dockerfile*'
99
- "bin/"

README.md

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,20 @@
22
![Build Status](https://github.com/Pathogen-Genomics-Cymru/lodestone/workflows/build-push-quay/badge.svg)
33
![Build Status](https://github.com/Pathogen-Genomics-Cymru/lodestone/workflows/pytest/badge.svg)
44
![Build Status](https://github.com/Pathogen-Genomics-Cymru/lodestone/workflows/stub-run/badge.svg)
5-
5+
6+
## Table of Contents
7+
- [What is Lodestone](#what-is-lodestone)
8+
- [Quick Start](#quick-start)
9+
- [Executors](#executors)
10+
- [System Requirements](#system-requirements)
11+
- [Parameters](#parameters)
12+
- [Stub Runs](#stub-runs)
13+
- [Checkpoints](#checkpoints)
14+
- [Acknowledgments](#acknowledgements)
15+
- [License](#-license)
16+
17+
## What is Lodestone?
18+
619
This pipeline takes as input reads presumed to be from one of 10 mycobacterial genomes: abscessus, africanum, avium, bovis, chelonae, chimaera, fortuitum, intracellulare, kansasii, tuberculosis. Input should be in the form of one directory containing pairs of fastq(.gz) or bam files.
720

821
Pipeline cleans and QCs reads with fastp and FastQC, classifies with Kraken2 & Afanc, removes non-bacterial content, and - by alignment to any minority genomes - disambiguates mixtures of bacterial reads. Cleaned reads are aligned to either of the 10 supported genomes and variants called. Produces as output one directory per sample, containing cleaned fastqs, sorted, indexed BAM, VCF, F2 and F47 statistics, an antibiogram and summary reports.
@@ -40,7 +53,7 @@ By default, the pipeline will just run on the local machine. To run on a cluster
4053
### System Requirements ###
4154
Minimum recommended requirements: 32GB RAM, 8CPU
4255

43-
## Params ##
56+
## Paramaters ##
4457
The following parameters should be set in `nextflow.config` or specified on the command line:
4558

4659
* **input_dir**<br />
@@ -84,7 +97,7 @@ For more information on the parameters run `nextflow run main.nf --help`
8497

8598
The path to the singularity images can also be changed in the singularity profile in `nextflow.config`. Default value is `${baseDir}/singularity`
8699

87-
## Stub-run ##
100+
## Stub runs ##
88101
To test the stub run:
89102
```
90103
NXF_VER=20.11.0-edge nextflow run main.nf -stub -config testing.config
@@ -150,3 +163,5 @@ For a list of direct authors of this pipeline, please see the contributors list.
150163

151164
The preprocessing sub-workflow is based on the preprocessing nextflow DSL1 pipeline written by Stephen Bush, University of Oxford. The clockwork sub-workflow uses aspects of the variant calling workflow from https://github.com/iqbal-lab-org/clockwork, lead author Martin Hunt, Iqbal Lab at EMBL-EBI
152165

166+
## License
167+
The tool is licensed under the V3 GNU Affero GPL license. Please see the [LICENSE](LICENSE) file for more details.

bin/identify_tophit_and_contaminants2.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,8 +359,21 @@ def process_reports(afanc_json_path, kraken_json_path, supposed_species, unmix_m
359359

360360
# IS THE TOP SPECIES HIT ONE OF THE 10 ACCEPTABLE POSSIBILITIES? IF SO, PROVIDE A LINK TO THE REFERENCE GENOME
361361
re_top_species = re.findall(r"^(Mycobact|Mycolicibac)\w+ (abscessus|africanum|avium|bovis|chelonae|chimaera|fortuitum|intracellulare|kansasii|tuberculosis).*?$", top_species)
362+
re_top_variant = re.findall(r"^(Mycobact|Mycolicibac)\w+ (abscessus|africanum|avium|bovis|chelonae|chimaera|fortuitum|intracellulare|kansasii|tuberculosis) ()\w+ (bovis|orgis|caprae).*?$", top_species)
363+
if len(re_top_variant) != 0:
364+
re_top_species = re_top_variant
362365
if len(re_top_species) > 0:
363-
identified_species = re_top_species[0][1]
366+
if len(re_top_species[0]) == 2:
367+
identified_species = re_top_species[0][1]
368+
#deal with lineages
369+
lineage_dict = {"La1.": "bovis",
370+
"La2.": "caprae",
371+
"La3.": "orygis"}
372+
for lineage in lineage_dict:
373+
if lineage in top_species:
374+
identified_species = lineage_dict[lineage]
375+
else:
376+
identified_species = re_top_species[0][3] #we have bovis (or orgis/caprae) with variant in the name
364377
if supposed_species == 'null':
365378
out['summary_questions']['is_the_top_species_appropriate'] = 'yes'
366379
elif ((supposed_species != 'null') & (supposed_species == identified_species)):

bin/run-vcfmix.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99

1010
def go(vcf_file):
1111
# create a lineagescan object
12-
v = lineageScan()
12+
v = lineageScan(minos=True)
1313

14-
# assuming postfix of ".bcftools.vcf"
15-
sampleid = vcf_file[:-13]
14+
# assuming postfix of ".minos.vcf"
15+
sampleid = vcf_file.replace("_allelic_depth.minos.vcf", "")
1616
print(sampleid)
1717

1818
res = v.parse(vcffile=vcf_file, sample_id=sampleid)

config/containers.config

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ process {
3333
}
3434

3535
withLabel:clockwork {
36-
container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.9"
36+
container = "quay.io/pathogen-genomics-cymru/clockwork:0.9.9r1"
3737
}
3838

3939
withLabel:vcfpredict {
40-
container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.9"
40+
container = "quay.io/pathogen-genomics-cymru/vcfpredict:0.9.9r1"
4141
}
4242
}

docker/Dockerfile.clockwork-0.9.9

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
FROM debian:buster
1+
FROM ubuntu:focal
2+
23

34
LABEL maintainer="[email protected]" \
45
about.summary="container for the clockwork workflow"
@@ -16,17 +17,17 @@ vcftools_version=0.1.15 \
1617
mccortex_version=97aba198d632ee98ac1aa496db33d1a7a8cb7e51 \
1718
stampy_version=1.0.32r3761 \
1819
python_version=3.6.5 \
19-
clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5
20+
clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 \
21+
gatk_version=4.6.0.0
2022

2123
ENV PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" \
2224
PYTHON="python2.7 python-dev"
2325

2426
COPY bin/ /opt/bin/
2527
ENV PATH=/opt/bin:$PATH
2628

27-
2829
RUN apt-get update \
29-
&& apt-get install -y $PACKAGES $PYTHON \
30+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y $PACKAGES $PYTHON \
3031
&& curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \
3132
&& cd Python-${python_version} \
3233
&& ./configure --enable-optimizations \
@@ -36,7 +37,15 @@ RUN apt-get update \
3637
&& ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \
3738
&& pip3 install --upgrade pip \
3839
&& pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools awscli \
39-
&& apt-get update && apt-get install -y openjdk-11-jdk
40+
&& apt-get update
41+
42+
#update jdk
43+
RUN wget https://download.java.net/java/GA/jdk18/43f95e8614114aeaa8e8a5fcf20a682d/36/GPL/openjdk-18_linux-x64_bin.tar.gz
44+
RUN tar -xvf openjdk-18_linux-x64_bin.tar.gz
45+
RUN mv jdk-18* /opt/
46+
ENV JAVA_HOME=/opt/jdk-18
47+
ENV PATH=$PATH:$JAVA_HOME/bin
48+
4049

4150
RUN curl -fsSL https://github.com/samtools/samtools/archive/${samtools_version}.tar.gz | tar -xz \
4251
&& curl -fsSL https://github.com/samtools/htslib/releases/download/${htslib_version}/htslib-${htslib_version}.tar.bz2 | tar -xj \
@@ -107,8 +116,12 @@ RUN git clone --recursive https://github.com/iqbal-lab/cortex.git \
107116
&& pip3 install . \
108117
&& chmod +x scripts/clockwork
109118

119+
RUN wget https://github.com/broadinstitute/gatk/releases/download/${gatk_version}/gatk-${gatk_version}.zip -O /tmp/gatk-${gatk_version}.zip\
120+
&& unzip /tmp/gatk-${gatk_version}.zip -d /opt/ \
121+
&& rm /tmp/gatk-${gatk_version}.zip -f
122+
110123
ENV CLOCKWORK_CORTEX_DIR=/cortex \
111-
PATH=${PATH}:/clockwork/python/scripts \
124+
PATH=${PATH}:/clockwork/python/scripts:/opt/gatk-${gatk_version} \
112125
PICARD_JAR=/usr/local/bin/picard.jar
113126

114127
ENV LC_ALL en_US.UTF-8 \

docker/Dockerfile.clockwork-0.9.9r1

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
FROM ubuntu:focal
2+
3+
4+
LABEL maintainer="[email protected]" \
5+
about.summary="container for the clockwork workflow"
6+
7+
ENV samtools_version=1.12 \
8+
htslib_version=1.12 \
9+
bcftools_version=1.12 \
10+
minimap2_version=2.17 \
11+
picard_version=2.18.16 \
12+
gramtools_version=8af53f6c8c0d72ef95223e89ab82119b717044f2 \
13+
vt_version=2187ff6347086e38f71bd9f8ca622cd7dcfbb40c \
14+
minos_version=0.11.0 \
15+
cortex_version=3a235272e4e0121be64527f01e73f9e066d378d3 \
16+
vcftools_version=0.1.15 \
17+
mccortex_version=97aba198d632ee98ac1aa496db33d1a7a8cb7e51 \
18+
stampy_version=1.0.32r3761 \
19+
python_version=3.6.5 \
20+
clockwork_version=2364dec4cbf25c844575e19e8fe0a319d10721b5 \
21+
gatk_version=4.6.0.0
22+
23+
ENV PACKAGES="procps curl git build-essential wget zlib1g-dev pkg-config jq r-base-core rsync autoconf libncurses-dev libbz2-dev liblzma-dev libcurl4-openssl-dev cmake tabix libvcflib-tools libssl-dev software-properties-common perl locales locales-all" \
24+
PYTHON="python2.7 python-dev"
25+
26+
COPY bin/ /opt/bin/
27+
ENV PATH=/opt/bin:$PATH
28+
29+
RUN apt-get update \
30+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y $PACKAGES $PYTHON \
31+
&& curl -fsSL https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz | tar -xz \
32+
&& cd Python-${python_version} \
33+
&& ./configure --enable-optimizations \
34+
&& make altinstall \
35+
&& cd .. \
36+
&& ln -s /usr/local/bin/python3.6 /usr/local/bin/python3 \
37+
&& ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 \
38+
&& pip3 install --upgrade pip \
39+
&& pip3 install 'cluster_vcf_records==0.13.1' pysam setuptools awscli \
40+
&& apt-get update
41+
42+
#update jdk
43+
RUN wget https://download.java.net/java/GA/jdk18/43f95e8614114aeaa8e8a5fcf20a682d/36/GPL/openjdk-18_linux-x64_bin.tar.gz
44+
RUN tar -xvf openjdk-18_linux-x64_bin.tar.gz
45+
RUN mv jdk-18* /opt/
46+
ENV JAVA_HOME=/opt/jdk-18
47+
ENV PATH=$PATH:$JAVA_HOME/bin
48+
49+
50+
RUN curl -fsSL https://github.com/samtools/samtools/archive/${samtools_version}.tar.gz | tar -xz \
51+
&& curl -fsSL https://github.com/samtools/htslib/releases/download/${htslib_version}/htslib-${htslib_version}.tar.bz2 | tar -xj \
52+
&& make -C samtools-${samtools_version} -j HTSDIR=../htslib-${htslib_version} \
53+
&& make -C samtools-${samtools_version} -j HTSDIR=../htslib-${htslib_version} prefix=/usr/local install \
54+
&& rm -r samtools-${samtools_version} \
55+
&& curl -fsSL https://github.com/samtools/bcftools/archive/refs/tags/${bcftools_version}.tar.gz | tar -xz \
56+
&& make -C bcftools-${bcftools_version} -j HTSDIR=../htslib-${htslib_version} \
57+
&& make -C bcftools-${bcftools_version} -j HTSDIR=../htslib-${htslib_version} prefix=/usr/local install \
58+
&& rm -r bcftools-${bcftools_version}
59+
60+
61+
RUN curl -fsSL minimap2-${minimap2_version}.tar.gz https://github.com/lh3/minimap2/archive/v${minimap2_version}.tar.gz | tar -xz \
62+
&& cd minimap2-${minimap2_version} \
63+
&& make \
64+
&& chmod +x minimap2 \
65+
&& mv minimap2 /usr/local/bin \
66+
&& cd .. \
67+
&& rm -r minimap2-${minimap2_version} \
68+
&& wget https://github.com/broadinstitute/picard/releases/download/${picard_version}/picard.jar -O /usr/local/bin/picard.jar
69+
70+
71+
RUN git clone https://github.com/atks/vt.git vt-git \
72+
&& cd vt-git \
73+
&& git checkout ${vt_version} \
74+
&& make \
75+
&& cd .. \
76+
&& mv vt-git/vt /usr/local/bin \
77+
&& pip3 install tox "six>=1.14.0" \
78+
&& git clone https://github.com/iqbal-lab-org/gramtools \
79+
&& cd gramtools \
80+
&& git checkout ${gramtools_version} \
81+
&& pip3 install . \
82+
&& cd .. \
83+
&& pip3 install cython \
84+
&& pip3 install git+https://github.com/iqbal-lab-org/minos@v${minos_version}
85+
86+
87+
RUN git clone --recursive https://github.com/iqbal-lab/cortex.git \
88+
&& cd cortex \
89+
&& git checkout ${cortex_version} \
90+
&& bash install.sh \
91+
&& make NUM_COLS=1 cortex_var \
92+
&& make NUM_COLS=2 cortex_var \
93+
&& cd .. \
94+
&& mkdir bioinf-tools \
95+
&& cd bioinf-tools \
96+
&& curl -fsSL http://www.well.ox.ac.uk/~gerton/software/Stampy/stampy-${stampy_version}.tgz | tar -xz \
97+
&& make -C stampy-* \
98+
&& cp -s stampy-*/stampy.py . \
99+
&& curl -fsSL https://github.com/vcftools/vcftools/releases/download/v${vcftools_version}/vcftools-${vcftools_version}.tar.gz | tar -xz \
100+
&& cd vcftools-${vcftools_version} \
101+
&& ./configure --prefix $PWD/install \
102+
&& make && make install \
103+
&& ln -s src/perl/ . \
104+
&& cd .. \
105+
&& git clone --recursive https://github.com/mcveanlab/mccortex \
106+
&& cd mccortex \
107+
&& git checkout ${mccortex_version} \
108+
&& make all \
109+
&& cd .. \
110+
&& cp -s mccortex/bin/mccortex31 . \
111+
&& cd .. \
112+
&& git clone https://github.com/iqbal-lab-org/clockwork \
113+
&& cd clockwork \
114+
&& git checkout ${clockwork_version} \
115+
&& cd python \
116+
&& pip3 install . \
117+
&& chmod +x scripts/clockwork
118+
119+
RUN wget https://github.com/broadinstitute/gatk/releases/download/${gatk_version}/gatk-${gatk_version}.zip -O /tmp/gatk-${gatk_version}.zip\
120+
&& unzip /tmp/gatk-${gatk_version}.zip -d /opt/ \
121+
&& rm /tmp/gatk-${gatk_version}.zip -f
122+
123+
ENV CLOCKWORK_CORTEX_DIR=/cortex \
124+
PATH=${PATH}:/clockwork/python/scripts:/opt/gatk-${gatk_version} \
125+
PICARD_JAR=/usr/local/bin/picard.jar
126+
127+
ENV LC_ALL en_US.UTF-8 \
128+
LANG en_US.UTF-8 \
129+
LANGUAGE en_US.UTF-8
130+
131+

docker/Dockerfile.tbprofiler-0.9.9

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest| tar -xvj bin
4242
# install tb-profiler via bioconda; install into 'base' conda env
4343
RUN micromamba install --yes --name base --channel conda-forge --channel bioconda \
4444
tb-profiler=${TBPROFILER_VER}
45-
46-
RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4
45+
RUN micromamba install --yes --name base --channel conda-forge --channel bioconda gatk4
4746
RUN micromamba install --yes --name base --channel conda-forge --channel bioconda samtools
4847
RUN micromamba install --yes --name base --channel conda-forge jq
4948
RUN micromamba clean --all --yes

docker/Dockerfile.tbtamr-0.9.9

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ FROM ubuntu:jammy
22

33
WORKDIR /
44

5+
ENV freebayes_version=1.3.6 \
6+
tbtamr_version=0.0.4
7+
58
# LABEL instructions tag the image with metadata that might be important to the user
69
LABEL base.image="ubuntu:jammy"
710
LABEL dockerfile.version="0.9.9"

docker/Dockerfile.vcfpredict-0.9.9r1

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
FROM ubuntu:20.04
2+
3+
LABEL maintainer="[email protected]" \
4+
about.summary="container for the vcf predict workflow"
5+
6+
#add run-vcf to container
7+
COPY bin/ /opt/bin/
8+
ENV PATH=/opt/bin:$PATH
9+
10+
ENV PACKAGES="procps curl wget git build-essential libhdf5-dev libffi-dev r-base-core jq" \
11+
PYTHON="python3 python3-pip python3-dev"
12+
13+
ENV vcfmix_version=d4693344bf612780723e39ce27c8ae3868f95417
14+
15+
#apt updates
16+
RUN apt-get update \
17+
&& DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata \
18+
&& apt-get install -y $PACKAGES $PYTHON \
19+
&& apt-get install -y python3-packaging \
20+
&& git clone https://github.com/whalleyt/VCFMIX.git \
21+
&& cd VCFMIX \
22+
&& pip3 install recursive_diff \
23+
&& pip3 install awscli \
24+
&& pip3 install . \
25+
&& cp -r data /usr/local/lib/python3.8/dist-packages \
26+
&& cd ..

main.nf

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,10 @@ nextflow run main.nf -profile docker --filetype bam --input_dir bam_dir --unmix_
8585
}
8686

8787

88-
resistance_profilers = ["tb-profiler", "tbtamr", "none"]
88+
resistance_profilers = ["tb-profiler", "tbtamr"]
8989

9090
if(!resistance_profilers.contains(params.resistance_profiler)){
91-
exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler", "tbtamr" \
92-
or "none" to skip.'
91+
exit 1, 'Invalid resistance profiler. Must be one of "tb-profiler" or "tbtamr"'
9392
}
9493

9594

@@ -199,13 +198,10 @@ workflow {
199198
clockwork(preprocessing_output)
200199

201200
// VCFPREDICT SUB-WORKFLOW
202-
sample_and_fastqs = clockwork.out.sample_and_fastqs
203-
mpileup_vcf = clockwork.out.mpileup_vcf
204-
minos_vcf = clockwork.out.minos_vcf
205-
reference = clockwork.out.reference
206-
bam = clockwork.out.bam
201+
profiler_input_vcf = clockwork.out.profiler_input_vcf
202+
profiler_input_fq = clockwork.out.profiler_input_fq
207203

208-
vcfpredict(sample_and_fastqs, bam, mpileup_vcf, minos_vcf, reference)
204+
vcfpredict(profiler_input_fq, profiler_input_vcf)
209205

210206
}
211207

0 commit comments

Comments
 (0)