Skip to content

Commit

Permalink
Ignore cache if Nextclade version is different
Browse files Browse the repository at this point in the history
Currently just checks Nextclade and dataset versions of the first row of
the nextclade.tsv file and formats them as the intended JSON proposed.
Once the version JSON metadata file is in place, it should be easy to
swap out the check for the separate file rather than the nextclade.tsv.
  • Loading branch information
joverlee521 committed Jul 24, 2024
1 parent 055c5b7 commit 5c2fcd8
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 2 deletions.
19 changes: 19 additions & 0 deletions bin/cache-version
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

s3_url="${1:?An S3 URL is required as the first argument}"


trap '' SIGPIPE

(aws s3 cp "${s3_url}" - \
| zstd -T0 -dcq \
| head -n 2 \
| tsv-select -H -f 'nextclade_version,dataset_version' \
| tail -n 1 \
| jq --raw-input --slurp -c '
split("\n")
| map(split("\t"))
| .[0:-1]
| map( { "nextclade_version": .[0], "nextclade_dataset_version": .[1] } )
| .[0]') \
2> /dev/null
25 changes: 23 additions & 2 deletions bin/use-nextclade-cache
Original file line number Diff line number Diff line change
@@ -1,19 +1,31 @@
#!/bin/bash
set -euo pipefail

vendored="$(dirname "$0")"/../vendored
bin="$(dirname "$0")"
vendored="$bin"/../vendored

main() {
s3_dst="${1:?A destination s3:// URL where the renew file is hosted is required as the first argument.}"
s3_src="${2:?A source s3:// URL where the fallback renew file is hosted is required as the second argument.}"
nextclade="${3:?A path to the Nextclade executable is required as the third argument}"
# Nextclade dataset reference wildcard
reference="${3:-}"
reference="${4:-}"

if renew-flag-exists; then
echo "[INFO] Found renew flag" >&2
echo "false"
exit 0
fi

cache_versions="$(get-cache-version-info)"
cache_nextclade_version="$(echo "$cache_versions" | jq -r .nextclade_version)"
current_nextclade_version="$("$nextclade" --version)"
if [[ "$cache_nextclade_version" != "$current_nextclade_version" ]]; then
echo "[INFO] Current Nextclade version (${current_nextclade_version}) is different from cache version (${cache_nextclade_version})" >&2
echo "false"
exit 0
fi

echo "true"
}

Expand All @@ -25,4 +37,13 @@ renew-flag-exists() {
"$vendored"/s3-object-exists "${dst_renew_file}" || "$vendored"/s3-object-exists "${src_renew_file}"
}

get-cache-version-info() {
# TODO: Update check a separate file for version info
# Currently just checks the first row of the nextclade.tsv file
local version_file="nextclade${reference}.tsv.zst"
local dst_version_file="${s3_dst}/${version_file}"

"$bin"/cache-version "$dst_version_file"
}

main "$@"
3 changes: 3 additions & 0 deletions workflow/snakemake_rules/nextclade.smk
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ if config.get("s3_dst") and config.get("s3_src"):
ruleorder: download_previous_alignment_from_s3 > create_empty_nextclade_aligned

rule use_nextclade_cache:
input:
nextclade="nextclade",
params:
dst_source=config["s3_dst"],
src_source=config["s3_src"],
Expand All @@ -73,6 +75,7 @@ if config.get("s3_dst") and config.get("s3_src"):
./bin/use-nextclade-cache \
{params.dst_source:q} \
{params.src_source:q} \
{input.nextclade:q} \
{wildcards.reference:q} \
> {output.use_nextclade_cache}
"""
Expand Down

0 comments on commit 5c2fcd8

Please sign in to comment.