From 56e5a1ebeaf2b46c4a9572df8043e9562b94f16b Mon Sep 17 00:00:00 2001 From: Finlay Maguire Date: Fri, 22 Jan 2021 20:03:28 -0400 Subject: [PATCH 1/3] Add auto-update option to pangolin Adds a `--update` option to pangolin which automatically updates pangolin, pangoLEARN, and lineages to the latest stable release and exits (or if already the latest just checks and exits). Specifically: - Latest release tags are fetched from the github releases API - These are compared to versions of the currently installed (pangolin, pangoLEARN, lineages) - If there is a new release for any or all of these it updates them to that release using pip - Once updates are complete or if all packages were at the latest release the command just exits with 0 exitcode As requiring a query file isn't meaningful if a user is just running `pangolin --update` this adds a small amount of logic to tolerate no supplied query if `--update` is being used. --- README.md | 7 +++- pangolin/command.py | 88 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 79 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index b55e93f..9671563 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,12 @@ and you should see the versions of pangolin, and pangoL > Note: Even if you have previously installed pangolin, as it is being worked on intensively, we recommend you check for updates before running. -To update: +To update pangolin, pangoLEARN, and lineages automatically to the latest stable release: + +1. ``conda activate pangolin`` +2. ``pangolin --update`` + +Alternatively, this can be done manually: 1. ``conda activate pangolin`` 2. ``git pull`` \ diff --git a/pangolin/command.py b/pangolin/command.py index bfce7ac..c24dadd 100644 --- a/pangolin/command.py +++ b/pangolin/command.py @@ -4,6 +4,9 @@ import os.path import snakemake import sys +from urllib import request +import subprocess +import json from tempfile import gettempdir import tempfile import pprint @@ -24,11 +27,11 @@ def main(sysargs = sys.argv[1:]): - parser = argparse.ArgumentParser(prog = _program, - description='pangolin: Phylogenetic Assignment of Named Global Outbreak LINeages', + parser = argparse.ArgumentParser(prog = _program, + description='pangolin: Phylogenetic Assignment of Named Global Outbreak LINeages', usage='''pangolin [options]''') - parser.add_argument('query', help='Query fasta file of sequences to analyse.') + parser.add_argument('query', nargs="*", help='Query fasta file of sequences to analyse.') parser.add_argument('-o','--outdir', action="store",help="Output directory. Default: current working directory") parser.add_argument('--outfile', action="store",help="Optional output file name. Default: lineage_report.csv") parser.add_argument('-d', '--data', action='store',help="Data directory minimally containing a fasta alignment and guide tree") @@ -46,12 +49,17 @@ def main(sysargs = sys.argv[1:]): parser.add_argument("-v","--version", action='version', version=f"pangolin {__version__}") parser.add_argument("-lv","--lineages-version", action='version', version=f"lineages {lineages.__version__}",help="show lineages's version number and exit") parser.add_argument("-pv","--pangoLEARN-version", action='version', version=f"pangoLEARN {pangoLEARN.__version__}",help="show pangoLEARN's version number and exit") + parser.add_argument("--update", action='store_true', default=False, help="Automatically updates to latest release of pangolin, pangoLEARN, and lineages then exits") if len(sysargs)<1: parser.print_help() sys.exit(-1) else: args = parser.parse_args(sysargs) + args = parser.parse_args() + + if args.update: + update(__version__, lineages.__version__, pangoLEARN.__version__) if args.legacy: snakefile = os.path.join(thisdir, 'scripts','Snakefile') @@ -64,13 +72,20 @@ def main(sysargs = sys.argv[1:]): else: print("Found the snakefile") - # find the query fasta - query = os.path.join(cwd, args.query) - if not os.path.exists(query): - sys.stderr.write('Error: cannot find query (input) fasta file at {}\nPlease enter your fasta sequence file and refer to pangolin usage at:\nhttps://github.com/hCoV-2019/pangolin#usage\n for detailed instructions\n'.format(query)) + # to enable not having to pass a query if running update + # by allowing query to accept 0 to many arguments + if len(args.query) > 1: + print(f"Error: Too many query (input) fasta files supplied: {args.query}\nPlease supply one only") + parser.print_help() sys.exit(-1) else: - print(f"The query file is {query}") + # find the query fasta + query = os.path.join(cwd, args.query[0]) + if not os.path.exists(query): + sys.stderr.write('Error: cannot find query (input) fasta file at {}\nPlease enter your fasta sequence file and refer to pangolin usage at:\nhttps://github.com/hCoV-2019/pangolin#usage\n for detailed instructions\n'.format(query)) + sys.exit(-1) + else: + print(f"The query file is {query}") # default output dir outdir = '' @@ -101,12 +116,14 @@ def main(sysargs = sys.argv[1:]): else: temporary_directory = tempfile.TemporaryDirectory(suffix=None, prefix=None, dir=None) tempdir = temporary_directory.name - + if args.no_temp: print(f"--no-temp: All intermediate files will be written to {outdir}") tempdir = outdir - """ + + + """ QC steps: 1) check no empty seqs 2) check N content @@ -128,7 +145,7 @@ def main(sysargs = sys.argv[1:]): else: num_N = str(record.seq).upper().count("N") prop_N = round((num_N)/len(record.seq), 2) - if prop_N > args.maxambig: + if prop_N > args.maxambig: record.description = record.description + f" fail=N_content:{prop_N}" do_not_run.append(record) print(f"{record.id}\thas an N content of {prop_N}") @@ -148,7 +165,7 @@ def main(sysargs = sys.argv[1:]): fw.write(f"{record.id},None,0,{pangoLEARN.__version__},fail,{reason}\n") print(f'Note: no query sequences have passed the qc\n') sys.exit(0) - + post_qc_query = os.path.join(tempdir, 'query.post_qc.fasta') with open(post_qc_query,"w") as fw: SeqIO.write(run, fw, "fasta") @@ -207,7 +224,7 @@ def main(sysargs = sys.argv[1:]): elif fn.endswith(".csv") and fn.startswith("lineages"): lineages_csv = os.path.join(r, fn) - + if representative_aln=="" or guide_tree=="" or lineages_csv=="": print("""Check your environment, didn't find appropriate files from the lineages repo, please see https://cov-lineages.org/pangolin.html for installation instructions. \nTreefile must end with `.treefile`.\ \nAlignment must be in `.fasta` format.\n Trained model must exist. \ @@ -255,10 +272,10 @@ def main(sysargs = sys.argv[1:]): variants_file = pkg_resources.resource_filename('pangolin', 'data/config_b.1.1.7.csv') config["b117_variants"] = variants_file - + variants_file = pkg_resources.resource_filename('pangolin', 'data/config_b.1.351.csv') config["b1351_variants"] = variants_file - + if args.write_tree: config["write_tree"]="True" @@ -282,5 +299,46 @@ def main(sysargs = sys.argv[1:]): return 1 + +def update(pangolin_version, lineages_version, pangoLEARN_version): + """ + Using the github releases API check for the latest current release + of each pangolin, lineages, and pangoLEARN + + Compare these to the currently running versions and if newer versions + exist + + If newer versions exist then update and exit safely + """ + # flag if any element is update if everything is the latest release + # we want to just continue running + for dependency, version in [('pangolin', pangolin_version), + ('pangoLEARN', pangoLEARN_version), + ('lineages', lineages_version)]: + latest_release = request.urlopen(\ + f"https://api.github.com/repos/cov-lineages/{dependency}/releases") + latest_release = json.load(latest_release) + latest_release = latest_release[0]['tag_name'] + + # to match the tag names add a v to the pangolin internal version + if dependency == 'pangolin': + version = "v" + version + # lineages doesn't need any changes for matching + # to match the tag names for pangoLEARN add data release + elif dependency == 'pangoLEARN': + version = version.replace(' ', ' data release ') + + if version != latest_release: + subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade', + f"git+https://github.com/cov-lineages/{dependency}.git@{latest_release}"], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) + print(f"{dependency} updated to {latest_release}", file=sys.stderr) + else: + print(f"{dependency} already latest release ({latest_release})", file=sys.stderr) + + sys.exit(0) + if __name__ == '__main__': main() From 183a2f79cd902ff18650f4ace32e053828a707a9 Mon Sep 17 00:00:00 2001 From: Finlay Maguire Date: Sat, 23 Jan 2021 19:06:47 -0400 Subject: [PATCH 2/3] Add more information to docstring for update function --- pangolin/command.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pangolin/command.py b/pangolin/command.py index e22b453..48cca1e 100644 --- a/pangolin/command.py +++ b/pangolin/command.py @@ -334,9 +334,14 @@ def update(pangolin_version, lineages_version, pangoLEARN_version): of each pangolin, lineages, and pangoLEARN Compare these to the currently running versions and if newer versions - exist - - If newer versions exist then update and exit safely + exist then updates them (or if current) then exits safely + + pangolin_version: string containing the __version__ data for the currently + running pangolin module + lineages_version: string containing the __version__ data for the imported + lineages data module + pangoLEARN_version: string containing the __version__ data for the imported + pangoLEARN data module """ # flag if any element is update if everything is the latest release # we want to just continue running From 59566d4df1905a8d2c01cd9ba87da47d4e7bee80 Mon Sep 17 00:00:00 2001 From: Finlay Maguire Date: Sat, 23 Jan 2021 19:10:59 -0400 Subject: [PATCH 3/3] Fix typo in update docstring --- pangolin/command.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pangolin/command.py b/pangolin/command.py index 48cca1e..540ed9c 100644 --- a/pangolin/command.py +++ b/pangolin/command.py @@ -333,8 +333,9 @@ def update(pangolin_version, lineages_version, pangoLEARN_version): Using the github releases API check for the latest current release of each pangolin, lineages, and pangoLEARN - Compare these to the currently running versions and if newer versions - exist then updates them (or if current) then exits safely + Compare these to the currently running versions and if newer releases + exist update to them accordingly (or do nothing if current). + Afterwards, exit program safely with a 0 exit code. pangolin_version: string containing the __version__ data for the currently running pangolin module