Skip to content

Commit 207c9a7

Browse files
committed
2 parents 8f5f976 + e201bf2 commit 207c9a7

16 files changed

+667
-32
lines changed

.dockerignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
.git
2+
.dockerignore

.github/workflows/pangolin.yml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
name: pangolin
2-
32
on: [push, pull_request]
3+
defaults:
4+
run:
5+
shell: bash -l {0}
46

57
jobs:
68
run_pangolin_test:
@@ -18,12 +20,9 @@ jobs:
1820
channels: conda-forge,bioconda,defaults
1921
mamba-version: "*"
2022
- name: Install pangolin
21-
shell: bash -l {0}
2223
run: pip install -e .
2324
- name: Check pangolin version
24-
shell: bash -l {0}
2525
run: pangolin --version
2626
- name: Run pangolin with test data
27-
shell: bash -l {0}
2827
run: pangolin pangolin/test/test_seqs.fasta 2>&1 | tee pangolin.log
29-
# add more tests here
28+
# add more tests here

.github/workflows/push_dockerhub.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Docker push
2+
# This builds the docker image and pushes it to DockerHub
3+
# Runs on cov-lineages/pangolin repo releases
4+
# and push event to 'dev' branch (PR merges)
5+
on:
6+
push:
7+
branches:
8+
- dev
9+
release:
10+
types: [published]
11+
12+
jobs:
13+
push_dockerhub:
14+
name: Push new Docker image to Docker Hub
15+
runs-on: ubuntu-latest
16+
# Only run for the cov-lineages/pangolin repo, for releases and merged PRs
17+
if: ${{ github.repository == 'cov-lineages/pangolin' }}
18+
env:
19+
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
20+
DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }}
21+
steps:
22+
- name: Check out pipeline code
23+
uses: actions/checkout@v2
24+
25+
- name: Build new docker image
26+
run: docker build --no-cache . -t covlineages/pangolin:latest
27+
28+
- name: Push Docker image to DockerHub (dev)
29+
if: ${{ github.event_name == 'push' }}
30+
run: |
31+
echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
32+
docker tag covlineages/pangolin:latest covlineages/pangolin:dev
33+
docker push covlineages/pangolin:dev
34+
35+
- name: Push Docker image to DockerHub (release)
36+
if: ${{ github.event_name == 'release' }}
37+
run: |
38+
echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
39+
docker push covlineages/pangolin:latest
40+
docker tag covlineages/pangolin:latest covlineages/pangolin:${{ github.event.release.tag_name }}
41+
docker push covlineages/pangolin:${{ github.event.release.tag_name }}

Dockerfile

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
FROM continuumio/miniconda3:4.9.2-alpine
2+
LABEL version="2.1.2" \
3+
description="Docker image for Pangolin"
4+
5+
# Install git for pangolin
6+
RUN apk update && \
7+
apk add git bash
8+
9+
COPY environment.yml /environment.yml
10+
# Python 3.8.5 already installed along with recent version of pip
11+
# so remove Python and pip deps from environment.yml before installation
12+
RUN sed -i "$(grep -n python=3.7 /environment.yml | cut -f1 -d:)d" /environment.yml && \
13+
sed -i "$(grep -n pip= /environment.yml | cut -f1 -d:)d" /environment.yml
14+
# Install the conda environment
15+
RUN conda env create --quiet -f /environment.yml && conda clean -a
16+
# Add conda installation dir to PATH (instead of doing 'conda activate')
17+
ENV PATH /opt/conda/envs/pangolin/bin:$PATH
18+
19+
# Install Pangolin
20+
COPY . /pangolin/
21+
WORKDIR /pangolin/
22+
RUN pip install . && rm -rf /root/.cache/pip
23+
RUN pangolin --version &> /pangolin-version.txt
24+
25+
# Dump the details of the installed packages to a file for posterity
26+
RUN conda env export --name pangolin > /pangolin.yml
27+
WORKDIR /tmp/

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,12 @@ and you should see the versions of <strong>pangolin</strong>, and <strong>pangoL
6767

6868
> Note: Even if you have previously installed <strong>pangolin</strong>, as it is being worked on intensively, we recommend you check for updates before running.
6969
70-
To update:
70+
To update pangolin, pangoLEARN, and lineages automatically to the latest stable release:
71+
72+
1. ``conda activate pangolin``
73+
2. ``pangolin --update``
74+
75+
Alternatively, this can be done manually:
7176

7277
1. ``conda activate pangolin``
7378
2. ``git pull`` \

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,5 @@ dependencies:
1616
- pytools==2020.1
1717
- dendropy>=4.4.0
1818
- git+https://github.com/cov-ert/datafunk.git
19-
- git+https://github.com/cov-lineages/pangoLEARN.git@2020-11-30_2
19+
- git+https://github.com/cov-lineages/pangoLEARN.git
2020
- git+https://github.com/cov-lineages/lineages.git@2020-05-19-2

pangolin/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
_program = "pangolin"
2-
__version__ = "2.0.11"
2+
__version__ = "2.1.10"

pangolin/command.py

Lines changed: 118 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,16 @@
44
import os.path
55
import snakemake
66
import sys
7+
from urllib import request
8+
from distutils.version import LooseVersion
9+
import subprocess
10+
import json
711
from tempfile import gettempdir
812
import tempfile
913
import pprint
1014
import json
15+
import os
16+
import joblib
1117
import lineages
1218
import pangoLEARN
1319

@@ -23,17 +29,18 @@
2329

2430
def main(sysargs = sys.argv[1:]):
2531

26-
parser = argparse.ArgumentParser(prog = _program,
27-
description='pangolin: Phylogenetic Assignment of Named Global Outbreak LINeages',
32+
parser = argparse.ArgumentParser(prog = _program,
33+
description='pangolin: Phylogenetic Assignment of Named Global Outbreak LINeages',
2834
usage='''pangolin <query> [options]''')
2935

30-
parser.add_argument('query', help='Query fasta file of sequences to analyse.')
36+
parser.add_argument('query', nargs="*", help='Query fasta file of sequences to analyse.')
3137
parser.add_argument('-o','--outdir', action="store",help="Output directory. Default: current working directory")
3238
parser.add_argument('--outfile', action="store",help="Optional output file name. Default: lineage_report.csv")
3339
parser.add_argument('-d', '--data', action='store',help="Data directory minimally containing a fasta alignment and guide tree")
3440
parser.add_argument('-n', '--dry-run', action='store_true',help="Go through the motions but don't actually run")
3541
parser.add_argument('--tempdir',action="store",help="Specify where you want the temp stuff to go. Default: $TMPDIR")
3642
parser.add_argument("--no-temp",action="store_true",help="Output all intermediate files, for dev purposes.")
43+
parser.add_argument('--decompress-model',action="store_true",dest="decompress",help="Permanently decompress the model file to save time running pangolin.")
3744
parser.add_argument('--max-ambig', action="store", default=0.5, type=float,help="Maximum proportion of Ns allowed for pangolin to attempt assignment. Default: 0.5",dest="maxambig")
3845
parser.add_argument('--min-length', action="store", default=10000, type=int,help="Minimum query length allowed for pangolin to attempt assignment. Default: 10000",dest="minlen")
3946
parser.add_argument('--panGUIlin', action='store_true',help="Run web-app version of pangolin",dest="panGUIlin")
@@ -45,12 +52,17 @@ def main(sysargs = sys.argv[1:]):
4552
parser.add_argument("-v","--version", action='version', version=f"pangolin {__version__}")
4653
parser.add_argument("-lv","--lineages-version", action='version', version=f"lineages {lineages.__version__}",help="show lineages's version number and exit")
4754
parser.add_argument("-pv","--pangoLEARN-version", action='version', version=f"pangoLEARN {pangoLEARN.__version__}",help="show pangoLEARN's version number and exit")
55+
parser.add_argument("--update", action='store_true', default=False, help="Automatically updates to latest release of pangolin, pangoLEARN, and lineages then exits")
4856

4957
if len(sysargs)<1:
5058
parser.print_help()
5159
sys.exit(-1)
5260
else:
5361
args = parser.parse_args(sysargs)
62+
args = parser.parse_args()
63+
64+
if args.update:
65+
update(__version__, lineages.__version__, pangoLEARN.__version__)
5466

5567
if args.legacy:
5668
snakefile = os.path.join(thisdir, 'scripts','Snakefile')
@@ -63,13 +75,20 @@ def main(sysargs = sys.argv[1:]):
6375
else:
6476
print("Found the snakefile")
6577

66-
# find the query fasta
67-
query = os.path.join(cwd, args.query)
68-
if not os.path.exists(query):
69-
sys.stderr.write('Error: cannot find query (input) fasta file at {}\nPlease enter your fasta sequence file and refer to pangolin usage at:\nhttps://github.com/hCoV-2019/pangolin#usage\n for detailed instructions\n'.format(query))
78+
# to enable not having to pass a query if running update
79+
# by allowing query to accept 0 to many arguments
80+
if len(args.query) > 1:
81+
print(f"Error: Too many query (input) fasta files supplied: {args.query}\nPlease supply one only")
82+
parser.print_help()
7083
sys.exit(-1)
7184
else:
72-
print(f"The query file is {query}")
85+
# find the query fasta
86+
query = os.path.join(cwd, args.query[0])
87+
if not os.path.exists(query):
88+
sys.stderr.write('Error: cannot find query (input) fasta file at {}\nPlease enter your fasta sequence file and refer to pangolin usage at:\nhttps://github.com/hCoV-2019/pangolin#usage\n for detailed instructions\n'.format(query))
89+
sys.exit(-1)
90+
else:
91+
print(f"The query file is {query}")
7392

7493
# default output dir
7594
outdir = ''
@@ -100,12 +119,14 @@ def main(sysargs = sys.argv[1:]):
100119
else:
101120
temporary_directory = tempfile.TemporaryDirectory(suffix=None, prefix=None, dir=None)
102121
tempdir = temporary_directory.name
103-
122+
104123
if args.no_temp:
105124
print(f"--no-temp: All intermediate files will be written to {outdir}")
106125
tempdir = outdir
107126

108-
"""
127+
128+
129+
"""
109130
QC steps:
110131
1) check no empty seqs
111132
2) check N content
@@ -127,7 +148,7 @@ def main(sysargs = sys.argv[1:]):
127148
else:
128149
num_N = str(record.seq).upper().count("N")
129150
prop_N = round((num_N)/len(record.seq), 2)
130-
if prop_N > args.maxambig:
151+
if prop_N > args.maxambig:
131152
record.description = record.description + f" fail=N_content:{prop_N}"
132153
do_not_run.append(record)
133154
print(f"{record.id}\thas an N content of {prop_N}")
@@ -147,7 +168,7 @@ def main(sysargs = sys.argv[1:]):
147168
fw.write(f"{record.id},None,0,{pangoLEARN.__version__},fail,{reason}\n")
148169
print(f'Note: no query sequences have passed the qc\n')
149170
sys.exit(0)
150-
171+
151172
post_qc_query = os.path.join(tempdir, 'query.post_qc.fasta')
152173
with open(post_qc_query,"w") as fw:
153174
SeqIO.write(run, fw, "fasta")
@@ -206,7 +227,7 @@ def main(sysargs = sys.argv[1:]):
206227
elif fn.endswith(".csv") and fn.startswith("lineages"):
207228
lineages_csv = os.path.join(r, fn)
208229

209-
230+
210231
if representative_aln=="" or guide_tree=="" or lineages_csv=="":
211232
print("""Check your environment, didn't find appropriate files from the lineages repo, please see https://cov-lineages.org/pangolin.html for installation instructions. \nTreefile must end with `.treefile`.\
212233
\nAlignment must be in `.fasta` format.\n Trained model must exist. \
@@ -242,6 +263,22 @@ def main(sysargs = sys.argv[1:]):
242263
print("""Check your environment, didn't find appropriate files from the pangoLEARN repo.\n Trained model must be installed, please see https://cov-lineages.org/pangolin.html for installation instructions.""")
243264
exit(1)
244265
else:
266+
if args.decompress:
267+
prev_size = os.path.getsize(trained_model)
268+
269+
print("Decompressing model and header files")
270+
model = joblib.load(trained_model)
271+
joblib.dump(model, trained_model, compress=0)
272+
headers = joblib.load(header_file)
273+
joblib.dump(headers, header_file, compress=0)
274+
275+
if os.path.getsize(trained_model) >= prev_size:
276+
print(f'Success! Decompressed the model file. Exiting\n')
277+
sys.exit(0)
278+
else:
279+
print(f'Error: failed to decompress model. Exiting\n')
280+
sys.exit(0)
281+
245282
print("\nData files found")
246283
print(f"Trained model:\t{trained_model}")
247284
print(f"Header file:\t{header_file}")
@@ -252,6 +289,18 @@ def main(sysargs = sys.argv[1:]):
252289
reference_fasta = pkg_resources.resource_filename('pangolin', 'data/reference.fasta')
253290
config["reference_fasta"] = reference_fasta
254291

292+
variants_file = pkg_resources.resource_filename('pangolin', 'data/config_b.1.1.7.csv')
293+
config["b117_variants"] = variants_file
294+
295+
variants_file = pkg_resources.resource_filename('pangolin', 'data/config_b.1.351.csv')
296+
config["b1351_variants"] = variants_file
297+
298+
variants_file = pkg_resources.resource_filename('pangolin', 'data/config_p.1.csv')
299+
config["p1_variants"] = variants_file
300+
301+
variants_file = pkg_resources.resource_filename('pangolin', 'data/config_p.2.csv')
302+
config["p2_variants"] = variants_file
303+
255304
if args.write_tree:
256305
config["write_tree"]="True"
257306

@@ -275,5 +324,61 @@ def main(sysargs = sys.argv[1:]):
275324

276325
return 1
277326

327+
328+
def update(pangolin_version, lineages_version, pangoLEARN_version):
329+
"""
330+
Using the github releases API check for the latest current release
331+
of each pangolin, lineages, and pangoLEARN
332+
333+
Compare these to the currently running versions and if newer releases
334+
exist update to them accordingly (or do nothing if current).
335+
Afterwards, exit program safely with a 0 exit code.
336+
337+
pangolin_version: string containing the __version__ data for the currently
338+
running pangolin module
339+
lineages_version: string containing the __version__ data for the imported
340+
lineages data module
341+
pangoLEARN_version: string containing the __version__ data for the imported
342+
pangoLEARN data module
343+
"""
344+
# flag if any element is update if everything is the latest release
345+
# we want to just continue running
346+
for dependency, version in [('pangolin', pangolin_version),
347+
('pangoLEARN', pangoLEARN_version),
348+
('lineages', lineages_version)]:
349+
latest_release = request.urlopen(\
350+
f"https://api.github.com/repos/cov-lineages/{dependency}/releases")
351+
latest_release = json.load(latest_release)
352+
latest_release = LooseVersion(latest_release[0]['tag_name'])
353+
354+
# to match the tag names add a v to the pangolin internal version
355+
if dependency == 'pangolin':
356+
version = "v" + version
357+
# lineages doesn't need any changes for matching
358+
# to match the tag names for pangoLEARN add data release
359+
elif dependency == 'pangoLEARN':
360+
version = version.replace(' ', ' data release ')
361+
362+
# convert to LooseVersion to have proper ordering of versions
363+
# this prevents someone using the latest commit/HEAD from being
364+
# downgraded to the last stable release
365+
version = LooseVersion(version)
366+
367+
if version < latest_release:
368+
subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade',
369+
f"git+https://github.com/cov-lineages/{dependency}.git@{latest_release}"],
370+
check=True,
371+
stdout=subprocess.DEVNULL,
372+
stderr=subprocess.DEVNULL)
373+
print(f"{dependency} updated to {latest_release}", file=sys.stderr)
374+
elif version > latest_release:
375+
print(f"{dependency} ({version}) is newer than latest stable "
376+
f"release ({latest_release}), not updating.", file=sys.stderr)
377+
else:
378+
print(f"{dependency} already latest release ({latest_release})",
379+
file=sys.stderr)
380+
381+
sys.exit(0)
382+
278383
if __name__ == '__main__':
279384
main()

pangolin/data/config_b.1.1.7.csv

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
aa:orf1ab:T1001I
2+
aa:orf1ab:A1708D
3+
aa:orf1ab:I2230T
4+
del:11288:9
5+
del:21765:6
6+
del:21991:3
7+
aa:S:N501Y
8+
aa:S:A570D
9+
aa:S:P681H
10+
aa:S:T716I
11+
aa:S:S982A
12+
aa:S:D1118H
13+
aa:Orf8:Q27*
14+
aa:Orf8:R52I
15+
aa:Orf8:Y73C
16+
aa:N:D3L
17+
aa:N:S235F

pangolin/data/config_b.1.351.csv

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
aa:E:P71L
2+
aa:N:T205I
3+
aa:orf1a:K1655N
4+
aa:S:D80A
5+
aa:S:D215G
6+
aa:S:K417N
7+
aa:S:A701V
8+
aa:S:N501Y
9+
aa:S:E484K

0 commit comments

Comments
 (0)