Skip to content

Commit

Permalink
Merge pull request #435 from nextstrain/use-nextclade3
Browse files Browse the repository at this point in the history
feat: switch to nextclade3, by downloading "latest" binary
  • Loading branch information
corneliusroemer authored Feb 21, 2024
2 parents 4027640 + c5553ef commit 5f0b4e2
Showing 1 changed file with 20 additions and 10 deletions.
30 changes: 20 additions & 10 deletions workflow/snakemake_rules/nextclade.smk
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Produces the following outputs:
nextclade_info = f"data/{database}/nextclade.tsv"
alignment = f"data/{database}/aligned.fasta"
"""

from shlex import quote as shellquote


Expand Down Expand Up @@ -134,10 +135,10 @@ rule download_nextclade_executable:
shell:
"""
if [ "$(uname)" = "Darwin" ]; then
curl -fsSL "https://github.com/nextstrain/nextclade/releases/download/2.14.0/nextclade-x86_64-apple-darwin" -o "nextclade"
curl -fsSL "https://github.com/nextstrain/nextclade/releases/latest/download/nextclade-x86_64-apple-darwin" -o "nextclade"
else
curl -fsSL "https://github.com/nextstrain/nextclade/releases/download/2.14.0/nextclade-x86_64-unknown-linux-gnu" -o "nextclade"
curl -fsSL "https://github.com/nextstrain/nextclade/releases/latest/download/nextclade-x86_64-unknown-linux-gnu" -o "nextclade"
fi
chmod +x nextclade
Expand Down Expand Up @@ -171,12 +172,11 @@ rule run_wuhan_nextclade:
"""
input:
nextclade_path="nextclade",
dataset=lambda w: f"data/nextclade_data/sars-cov-2.zip",
dataset="data/nextclade_data/sars-cov-2.zip",
sequences=f"data/{database}/nextclade.sequences.fasta",
params:
genes=GENES_SPACE_DELIMITED,
translation_arg=lambda w: (
f"--output-translations=data/{database}/nextclade.translation_{{gene}}.upd.fasta"
f"--output-translations=data/{database}/nextclade.translation_{{cds}}.upd.fasta"
),
output:
info=f"data/{database}/nextclade_new_raw.tsv",
Expand All @@ -187,11 +187,18 @@ rule run_wuhan_nextclade:
],
shell:
"""
# If there are no sequences to run Nextclade on, create empty output files
if [[ ! -s {input.sequences} ]]; then
touch {output.info}
touch {output.alignment}
touch {output.translations}
exit 0
fi
./{input.nextclade_path} run \
{input.sequences}\
--input-dataset={input.dataset} \
--output-tsv={output.info} \
--genes {params.genes} \
{params.translation_arg} \
--output-fasta={output.alignment}
"""
Expand All @@ -205,17 +212,20 @@ rule run_21L_nextclade:
nextclade_path="nextclade",
dataset=lambda w: f"data/nextclade_data/sars-cov-2-21L.zip",
sequences=f"data/{database}/nextclade_21L.sequences.fasta",
params:
genes=GENES_SPACE_DELIMITED,
output:
info=f"data/{database}/nextclade_21L_new_raw.tsv",
shell:
"""
# If there are no sequences to run Nextclade on, create empty output files
if [[ ! -s {input.sequences} ]]; then
touch {output.info}
exit 0
fi
./{input.nextclade_path} run \
{input.sequences} \
--input-dataset={input.dataset} \
--output-tsv={output.info} \
--genes {params.genes}
"""


Expand All @@ -230,7 +240,7 @@ rule nextclade_tsv_concat_versions:
if [ -s {input.tsv} ]; then
# Get version numbers
nextclade_version="$(./nextclade --version)"
dataset_version="$(unzip -p {input.dataset} tag.json | jq -r '.tag')"
dataset_version="$(unzip -p {input.dataset} pathogen.json | jq -r '.version.tag')"
timestamp="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
# Combine input file with version numbers and write to output
Expand Down

0 comments on commit 5f0b4e2

Please sign in to comment.