diff --git a/README.md b/README.md
index b55e93f..9671563 100644
--- a/README.md
+++ b/README.md
@@ -67,7 +67,12 @@ and you should see the versions of pangolin, and pangoL
> Note: Even if you have previously installed pangolin, as it is being worked on intensively, we recommend you check for updates before running.
-To update:
+To update pangolin, pangoLEARN, and lineages automatically to the latest stable release:
+1. ``conda activate pangolin``
+2. ``pangolin --update``
+Alternatively, this can be done manually:
1. ``conda activate pangolin``
2. ``git pull`` \
diff --git a/pangolin/command.py b/pangolin/command.py
index 29a15d8..540ed9c 100644
--- a/pangolin/command.py
+++ b/pangolin/command.py
@@ -4,6 +4,9 @@
import os.path
import snakemake
import sys
+from urllib import request
+import subprocess
+import json
from tempfile import gettempdir
import tempfile
import pprint
@@ -25,11 +28,11 @@
def main(sysargs = sys.argv[1:]):
- parser = argparse.ArgumentParser(prog = _program,
- description='pangolin: Phylogenetic Assignment of Named Global Outbreak LINeages',
+ parser = argparse.ArgumentParser(prog = _program,
+ description='pangolin: Phylogenetic Assignment of Named Global Outbreak LINeages',
usage='''pangolin [options]''')
- parser.add_argument('query', help='Query fasta file of sequences to analyse.')
+ parser.add_argument('query', nargs="*", help='Query fasta file of sequences to analyse.')
parser.add_argument('-o','--outdir', action="store",help="Output directory. Default: current working directory")
parser.add_argument('--outfile', action="store",help="Optional output file name. Default: lineage_report.csv")
parser.add_argument('-d', '--data', action='store',help="Data directory minimally containing a fasta alignment and guide tree")
@@ -48,13 +51,17 @@ def main(sysargs = sys.argv[1:]):
parser.add_argument("-v","--version", action='version', version=f"pangolin {__version__}")
parser.add_argument("-lv","--lineages-version", action='version', version=f"lineages {lineages.__version__}",help="show lineages's version number and exit")
parser.add_argument("-pv","--pangoLEARN-version", action='version', version=f"pangoLEARN {pangoLEARN.__version__}",help="show pangoLEARN's version number and exit")
+ parser.add_argument("--update", action='store_true', default=False, help="Automatically updates to latest release of pangolin, pangoLEARN, and lineages then exits")
if len(sysargs)<1:
args = parser.parse_args(sysargs)
+ args = parser.parse_args()
+ if args.update:
+ update(__version__, lineages.__version__, pangoLEARN.__version__)
if args.legacy:
snakefile = os.path.join(thisdir, 'scripts','Snakefile')
@@ -67,13 +74,20 @@ def main(sysargs = sys.argv[1:]):
print("Found the snakefile")
- # find the query fasta
- query = os.path.join(cwd, args.query)
- if not os.path.exists(query):
- sys.stderr.write('Error: cannot find query (input) fasta file at {}\nPlease enter your fasta sequence file and refer to pangolin usage at:\nhttps://github.com/hCoV-2019/pangolin#usage\n for detailed instructions\n'.format(query))
+ # to enable not having to pass a query if running update
+ # by allowing query to accept 0 to many arguments
+ if len(args.query) > 1:
+ print(f"Error: Too many query (input) fasta files supplied: {args.query}\nPlease supply one only")
+ parser.print_help()
- print(f"The query file is {query}")
+ # find the query fasta
+ query = os.path.join(cwd, args.query[0])
+ if not os.path.exists(query):
+ sys.stderr.write('Error: cannot find query (input) fasta file at {}\nPlease enter your fasta sequence file and refer to pangolin usage at:\nhttps://github.com/hCoV-2019/pangolin#usage\n for detailed instructions\n'.format(query))
+ sys.exit(-1)
+ else:
+ print(f"The query file is {query}")
# default output dir
outdir = ''
@@ -104,12 +118,14 @@ def main(sysargs = sys.argv[1:]):
temporary_directory = tempfile.TemporaryDirectory(suffix=None, prefix=None, dir=None)
tempdir = temporary_directory.name
if args.no_temp:
print(f"--no-temp: All intermediate files will be written to {outdir}")
tempdir = outdir
- """
+ """
QC steps:
1) check no empty seqs
2) check N content
@@ -131,7 +147,7 @@ def main(sysargs = sys.argv[1:]):
num_N = str(record.seq).upper().count("N")
prop_N = round((num_N)/len(record.seq), 2)
- if prop_N > args.maxambig:
+ if prop_N > args.maxambig:
record.description = record.description + f" fail=N_content:{prop_N}"
print(f"{record.id}\thas an N content of {prop_N}")
@@ -151,7 +167,7 @@ def main(sysargs = sys.argv[1:]):
print(f'Note: no query sequences have passed the qc\n')
post_qc_query = os.path.join(tempdir, 'query.post_qc.fasta')
with open(post_qc_query,"w") as fw:
SeqIO.write(run, fw, "fasta")
@@ -211,7 +227,7 @@ def main(sysargs = sys.argv[1:]):
elif fn.endswith(".csv") and fn.startswith("lineages"):
lineages_csv = os.path.join(r, fn)
if representative_aln=="" or guide_tree=="" or lineages_csv=="":
print("""Check your environment, didn't find appropriate files from the lineages repo, please see https://cov-lineages.org/pangolin.html for installation instructions. \nTreefile must end with `.treefile`.\
\nAlignment must be in `.fasta` format.\n Trained model must exist. \
@@ -278,7 +294,7 @@ def main(sysargs = sys.argv[1:]):
variants_file = pkg_resources.resource_filename('pangolin', 'data/config_b.1.1.7.csv')
config["b117_variants"] = variants_file
variants_file = pkg_resources.resource_filename('pangolin', 'data/config_b.1.351.csv')
config["b1351_variants"] = variants_file
@@ -287,7 +303,7 @@ def main(sysargs = sys.argv[1:]):
variants_file = pkg_resources.resource_filename('pangolin', 'data/config_p.2.csv')
config["p2_variants"] = variants_file
if args.write_tree:
@@ -311,5 +327,52 @@ def main(sysargs = sys.argv[1:]):
return 1
+def update(pangolin_version, lineages_version, pangoLEARN_version):
+ """
+ Using the github releases API check for the latest current release
+ of each pangolin, lineages, and pangoLEARN
+ Compare these to the currently running versions and if newer releases
+ exist update to them accordingly (or do nothing if current).
+ Afterwards, exit program safely with a 0 exit code.
+ pangolin_version: string containing the __version__ data for the currently
+ running pangolin module
+ lineages_version: string containing the __version__ data for the imported
+ lineages data module
+ pangoLEARN_version: string containing the __version__ data for the imported
+ pangoLEARN data module
+ """
+ # flag if any element is update if everything is the latest release
+ # we want to just continue running
+ for dependency, version in [('pangolin', pangolin_version),
+ ('pangoLEARN', pangoLEARN_version),
+ ('lineages', lineages_version)]:
+ latest_release = request.urlopen(\
+ f"https://api.github.com/repos/cov-lineages/{dependency}/releases")
+ latest_release = json.load(latest_release)
+ latest_release = latest_release[0]['tag_name']
+ # to match the tag names add a v to the pangolin internal version
+ if dependency == 'pangolin':
+ version = "v" + version
+ # lineages doesn't need any changes for matching
+ # to match the tag names for pangoLEARN add data release
+ elif dependency == 'pangoLEARN':
+ version = version.replace(' ', ' data release ')
+ if version != latest_release:
+ subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade',
+ f"git+https://github.com/cov-lineages/{dependency}.git@{latest_release}"],
+ check=True,
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL)
+ print(f"{dependency} updated to {latest_release}", file=sys.stderr)
+ else:
+ print(f"{dependency} already latest release ({latest_release})", file=sys.stderr)
+ sys.exit(0)
if __name__ == '__main__':