diff --git a/ingest/defaults/config.yaml b/ingest/defaults/config.yaml index f5c16b2..f54f8c2 100644 --- a/ingest/defaults/config.yaml +++ b/ingest/defaults/config.yaml @@ -126,7 +126,7 @@ curate: nextclade: min_length: 1000 # E gene length is approximately 1400nt min_seed_cover: 0.1 - gene: ["E","C","M","pr","NS1","NS2A","NS2B","NS3","NS4A","2K","NS4B","NS5"] + gene: ["E","C","prM","NS1","NS2A","NS2B","NS3","NS4A","2K","NS4B","NS5"] # Nextclade Fields to rename to metadata field names. field_map: seqName: accession # ID field used to merge annotations @@ -134,4 +134,5 @@ nextclade: alignmentStart: alignmentStart alignmentEnd: alignmentEnd coverage: genome_coverage - failedCdses: failedCdses \ No newline at end of file + failedCdses: failedCdses + id_field: accession \ No newline at end of file diff --git a/ingest/rules/nextclade.smk b/ingest/rules/nextclade.smk index 06644d6..aea8fd6 100644 --- a/ingest/rules/nextclade.smk +++ b/ingest/rules/nextclade.smk @@ -16,7 +16,23 @@ https://docs.nextstrain.org/projects/nextclade/page/user/nextclade-cli.html SUPPORTED_NEXTCLADE_SEROTYPES = ['denv1', 'denv2', 'denv3', 'denv4'] SEROTYPE_CONSTRAINTS = '|'.join(SUPPORTED_NEXTCLADE_SEROTYPES) -rule nextclade_denvX: +rule get_nextclade_dataset: + """Download Nextclade dataset""" + output: + dataset="data/nextclade_data/v-gen-lab/{serotype}.zip", + params: + dataset_name=lambda wildcards: f"community/v-gen-lab/dengue/{wildcards.serotype}", + wildcard_constraints: + serotype=SEROTYPE_CONSTRAINTS, + shell: + r""" + nextclade3 dataset get \ + --name={params.dataset_name:q} \ + --output-zip={output.dataset} \ + --verbose + """ + +rule run_nextclade: """ For each type, classify into the appropriate Dengue genotype 1. Capture the alignment @@ -26,30 +42,35 @@ rule nextclade_denvX: """ input: sequences="results/sequences_{serotype}.fasta", - dataset="../nextclade_data/{serotype}", + dataset="data/nextclade_data/v-gen-lab/{serotype}.zip", output: - nextclade_denvX="data/nextclade_results/nextclade_{serotype}.tsv", - nextclade_alignment="results/aligned_{serotype}.fasta", - nextclade_translations=expand("data/translations/{{serotype}}/{gene}/seqs.gene.fasta", gene=config["nextclade"]["gene"]), + nextclade="results/v-gen-lab/{serotype}/nextclade.tsv", + alignment="results/v-gen-lab/{serotype}/alignment.fasta", + translations=expand("data/v-gen-lab/{{serotype}}/translations/{gene}/seqs.gene.fasta", gene=config["nextclade"]["gene"]), threads: 4 params: min_length=config["nextclade"]["min_length"], min_seed_cover=config["nextclade"]["min_seed_cover"], - output_translations = lambda wildcards: f"data/translations/{wildcards.serotype}/{{cds}}/seqs.gene.fasta", + output_translations = lambda wildcards: f"data/v-gen-lab/{wildcards.serotype}/translations/{{cds}}/seqs.gene.fasta", + log: + "logs/v-gen-lab/{serotype}/run_nextclade.txt", + benchmark: + "benchmarks/v-gen-lab/{serotype}/run_nextclade.txt", wildcard_constraints: serotype=SEROTYPE_CONSTRAINTS shell: - """ - nextclade run \ - --input-dataset {input.dataset} \ - -j {threads} \ - --output-tsv {output.nextclade_denvX} \ - --min-length {params.min_length} \ - --min-seed-cover {params.min_seed_cover} \ - --silent \ - --output-fasta {output.nextclade_alignment} \ - --output-translations {params.output_translations} \ - {input.sequences} + r""" + nextclade3 run \ + --input-dataset {input.dataset} \ + -j {threads} \ + --output-tsv {output.nextclade} \ + --min-length {params.min_length} \ + --min-seed-cover {params.min_seed_cover} \ + --silent \ + --output-fasta {output.alignment} \ + --output-translations {params.output_translations} \ + {input.sequences} \ + &> {log:q} """ rule concat_genotype_nextclade_results: @@ -57,58 +78,43 @@ rule concat_genotype_nextclade_results: Concatenate all the nextclade results for dengue genotype classification """ input: - nextclade_results_files = expand("data/nextclade_results/nextclade_{serotype}.tsv", serotype=SUPPORTED_NEXTCLADE_SEROTYPES), + nextclade_files=expand("results/v-gen-lab/{serotype}/nextclade.tsv", serotype=SUPPORTED_NEXTCLADE_SEROTYPES), output: - genotype_nextclade="results/nextclade_genotypes.tsv", + genotype_nextclade=temp("results/v-gen-lab/nextclade_metadata.tsv"), params: input_nextclade_fields=",".join([f'{key}' for key, value in config["nextclade"]["field_map"].items()]), output_nextclade_fields=",".join([f'{value}' for key, value in config["nextclade"]["field_map"].items()]), + log: + "logs/v-gen-lab/concat_genotype_nextclade_results.txt", + benchmark: + "benchmarks/v-gen-lab/concat_genotype_nextclade_results.txt", shell: """ echo "{params.output_nextclade_fields}" \ | tr ',' '\t' \ > {output.genotype_nextclade} - tsv-select -H -f "{params.input_nextclade_fields}" {input.nextclade_results_files} \ + tsv-select -H -f "{params.input_nextclade_fields}" {input.nextclade_files} \ | awk 'NR>1 {{print}}' \ >> {output.genotype_nextclade} """ -rule append_nextclade_columns: - """ - Append the nextclade results to the metadata - """ - input: - metadata="data/metadata_all.tsv", - genotype_nextclade="results/nextclade_genotypes.tsv", - output: - metadata_all="data/metadata_nextclade.tsv", - params: - id_field=list(config["nextclade"]["field_map"].values())[0], - output_nextclade_fields=",".join([f'{value}' for key, value in config["nextclade"]["field_map"].items()][1:]), - shell: - """ - tsv-join -H \ - --filter-file {input.genotype_nextclade} \ - --key-fields {params.id_field} \ - --append-fields {params.output_nextclade_fields} \ - --write-all ? \ - {input.metadata} \ - > {output.metadata_all} - """ - rule calculate_gene_coverage: """ Calculate the coverage of the gene of interest """ input: - nextclade_translation="data/translations/{serotype}/{gene}/seqs.gene.fasta", + nextclade_translation="data/v-gen-lab/{serotype}/translations/{gene}/seqs.gene.fasta", output: - gene_coverage="data/translations/{serotype}/{gene}/gene_coverage.tsv", + gene_coverage="data/v-gen-lab/{serotype}/translations/{gene}/gene_coverage.tsv", wildcard_constraints: serotype=SEROTYPE_CONSTRAINTS, params: id_field=config["curate"]["output_id_field"], + log: + "logs/v-gen-lab/{serotype}/{gene}/calculate_gene_coverage.txt", + benchmark: + "benchmarks/v-gen-lab/{serotype}/{gene}/calculate_gene_coverage.txt", shell: """ python scripts/calculate-gene-converage-from-nextclade-translation.py \ @@ -123,40 +129,119 @@ rule aggregate_gene_coverage_by_gene: Aggregate the gene coverage results by gene """ input: - gene_coverage=expand("data/translations/{serotype}/{{gene}}/gene_coverage.tsv", serotype=SUPPORTED_NEXTCLADE_SEROTYPES), + gene_coverage=expand("data/v-gen-lab/{serotype}/translations/{{gene}}/gene_coverage.tsv", serotype=SUPPORTED_NEXTCLADE_SEROTYPES), output: gene_coverage_all="results/{gene}/gene_coverage_all.tsv", + log: + "logs/v-gen-lab/{gene}/aggregate_gene_coverage_by_gene.txt", + benchmark: + "benchmarks/v-gen-lab/{gene}/aggregate_gene_coverage_by_gene.txt", shell: """ tsv-append -H {input.gene_coverage} > {output.gene_coverage_all} """ -rule append_gene_coverage_columns: +rule combine_gene_coverage_columns: """ Append the gene coverage results to the metadata + Since gene coverage values should be a value between 0 and 1, empty fields should be filled with 0's """ input: - metadata="data/metadata_nextclade.tsv", + metadata="data/metadata_all.tsv", gene_coverage=expand("results/{gene}/gene_coverage_all.tsv", gene=config["nextclade"]["gene"]) output: - metadata_all="results/metadata_all.tsv", + gene_coverage_combined="results/gene_coverage_combined.tsv", params: id_field=config["curate"]["output_id_field"], + log: + "logs/v-gen-lab/combine_gene_coverage_columns.txt", + benchmark: + "benchmarks/v-gen-lab/combine_gene_coverage_columns.txt", shell: """ - cp {input.metadata} {output.metadata_all} + tsv-select -H -f "{params.id_field}" {input.metadata} > {output.gene_coverage_combined} for FILE in {input.gene_coverage}; do tsv-join -H \ --filter-file $FILE \ --key-fields {params.id_field} \ --append-fields '*_coverage' \ --write-all 0 \ - {output.metadata_all} \ + {output.gene_coverage_combined} \ > results/temp_aggregate_gene_coverage.tsv - mv results/temp_aggregate_gene_coverage.tsv {output.metadata_all} + mv results/temp_aggregate_gene_coverage.tsv {output.gene_coverage_combined} done """ +rule append_nextclade_and_gene_coverage_columns: + """ + Append the nextclade results to the metadata + """ + input: + metadata="data/metadata_all.tsv", + genotype_nextclade="results/v-gen-lab/nextclade_metadata.tsv", + gene_coverage="results/gene_coverage_combined.tsv", + output: + metadata="data/metadata_nextclade.tsv", + params: + metadata_id_field=config["curate"]["output_id_field"], + nextclade_id_field=config["nextclade"]["id_field"], + log: + "logs/v-gen-lab/append_nextclade_and_gene_coverage_columns.txt", + benchmark: + "benchmarks/v-gen-lab/append_nextclade_and_gene_coverage_columns.txt", + shell: + """ + augur merge \ + --metadata \ + metadata={input.metadata:q} \ + nextclade={input.genotype_nextclade:q} \ + gene_coverage={input.gene_coverage:q} \ + --metadata-id-columns \ + metadata={params.metadata_id_field:q} \ + nextclade={params.nextclade_id_field:q} \ + gene_coverage={params.metadata_id_field:q} \ + --output-metadata {output.metadata:q} \ + --no-source-columns \ + &> {log:q} + """ + +rule infer_major_lineage: + """ + Infer Major dengue lineages + Reference: https://dengue-lineages.org/assets/img/homepage-img-01.png + For example: + Minor lineage -> Major lineage + 1I_A.2.3 -> 1I_A + 2II_B -> 2II_B + 4III -> 4III + """ + input: + metadata="data/metadata_nextclade.tsv", + output: + metadata="results/metadata_all.tsv", + params: + nextclade_field="genotype_nextclade", + log: + "logs/v-gen-lab/infer_major_lineage.txt", + benchmark: + "benchmarks/v-gen-lab/infer_major_lineage.txt", + shell: + """ + cat {input.metadata:q} \ + | csvtk -tl mutate \ + -f {params.nextclade_field} \ + -n genotype \ + -p "^([0-9][A-Z]+)" \ + | csvtk -tl mutate \ + -f {params.nextclade_field} \ + -n major_lineage \ + -p "^([0-9][A-Z]+(?:_[A-Z])?)" \ + | csvtk -tl mutate \ + -f {params.nextclade_field} \ + -n minor_lineage \ + > {output.metadata:q} + """ + rule split_metadata_by_serotype: """ Split the metadata by serotype @@ -169,7 +254,11 @@ rule split_metadata_by_serotype: serotype=SEROTYPE_CONSTRAINTS params: serotype_field=config["curate"]["serotype_field"], + log: + "logs/split_metadata_by_serotype_{serotype}.txt", + benchmark: + "benchmarks/split_metadata_by_serotype_{serotype}.txt", shell: """ tsv-filter -H --str-eq {params.serotype_field}:{wildcards.serotype} {input.metadata} > {output.serotype_metadata} - """ + """ \ No newline at end of file diff --git a/phylogenetic/defaults/color_orderings.tsv b/phylogenetic/defaults/color_orderings.tsv index b60ed60..6ce3fb5 100644 --- a/phylogenetic/defaults/color_orderings.tsv +++ b/phylogenetic/defaults/color_orderings.tsv @@ -240,23 +240,288 @@ serotype_genbank denv2 serotype_genbank denv3 serotype_genbank denv4 -genotype_nextclade DENV1/I -genotype_nextclade DENV1/II -genotype_nextclade DENV1/III -genotype_nextclade DENV1/IV -genotype_nextclade DENV1/V -genotype_nextclade DENV2/AA -genotype_nextclade DENV2/AI -genotype_nextclade DENV2/AII -genotype_nextclade DENV2/AM -genotype_nextclade DENV2/C -genotype_nextclade DENV2/S -genotype_nextclade DENV3/I -genotype_nextclade DENV3/II -genotype_nextclade DENV3/III -genotype_nextclade DENV3/IV -genotype_nextclade DENV4/I -genotype_nextclade DENV4/II -genotype_nextclade DENV4/S - ################ +# Hill 2024 dataset +genotype 1I +genotype 1II +genotype 1III +genotype 1IV +genotype 1V +genotype 1VII +genotype 2I +genotype 2II +genotype 2III +genotype 2IV +genotype 2V +genotype 2VI +genotype 3I +genotype 3II +genotype 3III +genotype 3V +genotype 4I +genotype 4II +genotype 4III +genotype 4IV + +major_lineage 1I +major_lineage 1I_A +major_lineage 1I_B +major_lineage 1I_C +major_lineage 1I_D +major_lineage 1I_E +major_lineage 1I_F +major_lineage 1I_G +major_lineage 1I_H +major_lineage 1I_J +major_lineage 1I_K +major_lineage 1II +major_lineage 1III +major_lineage 1III_A +major_lineage 1III_B +major_lineage 1IV +major_lineage 1IV_A +major_lineage 1IV_B +major_lineage 1IV_C +major_lineage 1V +major_lineage 1V_A +major_lineage 1V_B +major_lineage 1V_C +major_lineage 1V_D +major_lineage 1V_E +major_lineage 1V_F +major_lineage 1V_G +major_lineage 1V_H +major_lineage 1V_J +major_lineage 1VII +major_lineage 1VII_A +major_lineage 1VII_B +major_lineage 2I +major_lineage 2II +major_lineage 2II_A +major_lineage 2II_B +major_lineage 2II_C +major_lineage 2II_D +major_lineage 2II_E +major_lineage 2II_F +major_lineage 2III +major_lineage 2III_A +major_lineage 2III_B +major_lineage 2III_C +major_lineage 2III_D +major_lineage 2III_E +major_lineage 2IV +major_lineage 2V +major_lineage 2V_A +major_lineage 2V_B +major_lineage 2V_C +major_lineage 2V_D +major_lineage 2V_E +major_lineage 2VI +major_lineage 3I +major_lineage 3I_A +major_lineage 3I_B +major_lineage 3I_C +major_lineage 3II +major_lineage 3II_A +major_lineage 3II_B +major_lineage 3III +major_lineage 3III_A +major_lineage 3III_B +major_lineage 3III_C +major_lineage 3V +major_lineage 4I +major_lineage 4I_A +major_lineage 4I_B +major_lineage 4II +major_lineage 4II_A +major_lineage 4II_B +major_lineage 4III +major_lineage 4IV + +minor_lineage 1I +minor_lineage 1I_A +minor_lineage 1I_B +minor_lineage 1I_C +minor_lineage 1I_D +minor_lineage 1I_E +minor_lineage 1I_E.1 +minor_lineage 1I_E.1.1 +minor_lineage 1I_E.1.2 +minor_lineage 1I_E.2 +minor_lineage 1I_E.3 +minor_lineage 1I_E.4 +minor_lineage 1I_F +minor_lineage 1I_G +minor_lineage 1I_H +minor_lineage 1I_H.1 +minor_lineage 1I_H.2 +minor_lineage 1I_H.3 +minor_lineage 1I_J +minor_lineage 1I_K +minor_lineage 1I_K.1 +minor_lineage 1I_K.1.1 +minor_lineage 1I_K.1.2 +minor_lineage 1I_K.2 +minor_lineage 1I_K.3 +minor_lineage 1I_K.4 +minor_lineage 1I_K.5 +minor_lineage 1I_K.6 +minor_lineage 1I_K.7 +minor_lineage 1II +minor_lineage 1III +minor_lineage 1III_A +minor_lineage 1III_A.1 +minor_lineage 1III_A.2 +minor_lineage 1III_A.3 +minor_lineage 1III_A.4 +minor_lineage 1III_B +minor_lineage 1IV +minor_lineage 1IV_A +minor_lineage 1IV_B +minor_lineage 1IV_B.1 +minor_lineage 1IV_B.2 +minor_lineage 1IV_C +minor_lineage 1V +minor_lineage 1V_A +minor_lineage 1V_B +minor_lineage 1V_C +minor_lineage 1V_D +minor_lineage 1V_D.1 +minor_lineage 1V_D.1.1 +minor_lineage 1V_D.1.2 +minor_lineage 1V_D.2 +minor_lineage 1V_E +minor_lineage 1V_E.1 +minor_lineage 1V_E.2 +minor_lineage 1V_E.3 +minor_lineage 1V_E.4 +minor_lineage 1V_F +minor_lineage 1V_G +minor_lineage 1V_H +minor_lineage 1V_J +minor_lineage 1VII +minor_lineage 1VII_A +minor_lineage 1VII_B +minor_lineage 2I +minor_lineage 2II +minor_lineage 2II_A +minor_lineage 2II_A.1 +minor_lineage 2II_A.1.1 +minor_lineage 2II_A.1.1.1 +minor_lineage 2II_A.1.1.2 +minor_lineage 2II_A.1.2 +minor_lineage 2II_A.2.1 +minor_lineage 2II_A.2.2 +minor_lineage 2II_B +minor_lineage 2II_C +minor_lineage 2II_C.1 +minor_lineage 2II_C.2 +minor_lineage 2II_D +minor_lineage 2II_D.1 +minor_lineage 2II_D.1.1 +minor_lineage 2II_D.1.2 +minor_lineage 2II_D.2 +minor_lineage 2II_D.3 +minor_lineage 2II_E +minor_lineage 2II_E.1 +minor_lineage 2II_E.2 +minor_lineage 2II_F +minor_lineage 2II_F.1 +minor_lineage 2II_F.1.1 +minor_lineage 2II_F.1.1.1 +minor_lineage 2II_F.1.1.2 +minor_lineage 2II_F.1.1.3 +minor_lineage 2II_F.1.1.4 +minor_lineage 2II_F.1.1.5 +minor_lineage 2II_F.1.1.6 +minor_lineage 2II_F.1.2 +minor_lineage 2II_F.1.3 +minor_lineage 2II_F.2 +minor_lineage 2II_F.2.1 +minor_lineage 2II_F.2.2 +minor_lineage 2III +minor_lineage 2III_A +minor_lineage 2III_A.1 +minor_lineage 2III_A.2 +minor_lineage 2III_B +minor_lineage 2III_C +minor_lineage 2III_C.1 +minor_lineage 2III_C.1.1 +minor_lineage 2III_C.1.2 +minor_lineage 2III_C.2 +minor_lineage 2III_D +minor_lineage 2III_D.1 +minor_lineage 2III_D.1.1 +minor_lineage 2III_D.1.2 +minor_lineage 2III_D.1.3 +minor_lineage 2III_D.2 +minor_lineage 2III_D.3 +minor_lineage 2III_E +minor_lineage 2IV +minor_lineage 2V +minor_lineage 2V_A +minor_lineage 2V_A.1 +minor_lineage 2V_A.1.1 +minor_lineage 2V_A.1.2 +minor_lineage 2V_A.1.3 +minor_lineage 2V_A.2 +minor_lineage 2V_A.3 +minor_lineage 2V_A.4 +minor_lineage 2V_B +minor_lineage 2V_C +minor_lineage 2V_D +minor_lineage 2V_E +minor_lineage 2VI +minor_lineage 3I +minor_lineage 3I_A +minor_lineage 3I_A.1 +minor_lineage 3I_A.1.1 +minor_lineage 3I_A.1.2 +minor_lineage 3I_A.2 +minor_lineage 3I_B +minor_lineage 3I_C +minor_lineage 3II +minor_lineage 3II_A +minor_lineage 3II_A.1 +minor_lineage 3II_A.2 +minor_lineage 3II_A.3 +minor_lineage 3II_A.4 +minor_lineage 3II_A.5 +minor_lineage 3II_B +minor_lineage 3III +minor_lineage 3III_A +minor_lineage 3III_A.1 +minor_lineage 3III_A.2 +minor_lineage 3III_B +minor_lineage 3III_B.1 +minor_lineage 3III_B.2 +minor_lineage 3III_B.3 +minor_lineage 3III_B.3.1 +minor_lineage 3III_B.3.2 +minor_lineage 3III_C +minor_lineage 3III_C.1 +minor_lineage 3III_C.2 +minor_lineage 3III_C.2.1 +minor_lineage 3III_C.2.2 +minor_lineage 3V +minor_lineage 4I +minor_lineage 4I_A +minor_lineage 4I_A.1 +minor_lineage 4I_A.1.1 +minor_lineage 4I_A.1.2 +minor_lineage 4I_A.2 +minor_lineage 4I_A.3 +minor_lineage 4I_B +minor_lineage 4I_B.1 +minor_lineage 4I_B.2 +minor_lineage 4II +minor_lineage 4II_A +minor_lineage 4II_A.1 +minor_lineage 4II_A.2 +minor_lineage 4II_B +minor_lineage 4II_B.1 +minor_lineage 4II_B.1.1 +minor_lineage 4II_B.1.2 +minor_lineage 4II_B.2 +minor_lineage 4III +minor_lineage 4IV diff --git a/phylogenetic/defaults/config_dengue.yaml b/phylogenetic/defaults/config_dengue.yaml index 6fdffe0..f41dd9d 100644 --- a/phylogenetic/defaults/config_dengue.yaml +++ b/phylogenetic/defaults/config_dengue.yaml @@ -19,11 +19,11 @@ filter: traits: sampling_bias_correction: '3' traits_columns: - all: 'region serotype_genbank genotype_nextclade' - denv1: 'country region serotype_genbank genotype_nextclade' - denv2: 'country region serotype_genbank genotype_nextclade' - denv3: 'country region serotype_genbank genotype_nextclade' - denv4: 'country region serotype_genbank genotype_nextclade' + all: 'region serotype_genbank' + denv1: 'country region serotype_genbank' + denv2: 'country region serotype_genbank' + denv3: 'country region serotype_genbank' + denv4: 'country region serotype_genbank' clades: clade_definitions: diff --git a/phylogenetic/defaults/description.md b/phylogenetic/defaults/description.md index bc09c38..c363889 100644 --- a/phylogenetic/defaults/description.md +++ b/phylogenetic/defaults/description.md @@ -12,3 +12,27 @@ We curate sequence data and metadata from NCBI as starting point for our analyse * [data.nextstrain.org/files/workflows/dengue/metadata_denv3.tsv.zst](https://data.nextstrain.org/files/workflows/dengue/metadata_denv3.tsv.zst) * [data.nextstrain.org/files/workflows/dengue/sequences_denv4.fasta.zst](https://data.nextstrain.org/files/workflows/dengue/sequences_denv4.fasta.zst) * [data.nextstrain.org/files/workflows/dengue/metadata_denv4.tsv.zst](https://data.nextstrain.org/files/workflows/dengue/metadata_denv4.tsv.zst) + + +### Lineage coloring + +There are up to six different lineage coloring options available depending on the source of data: + +**Based on a Nextclade call against [community/v-gen-lab/dengue datasets](https://github.com/nextstrain/nextclade_data/tree/master/data/community/v-gen-lab/dengue).** + +The lineage color options are split into levels of detail, with an example shown beside each level: + +* **Genotype (Nextclade):** 3III +* **Major Lineage (Nextclade):** 3III_B +* **Minor Lineage (Nextclade):** 3III_B.3.2 + +For more information about the dengue lineage system please visit [dengue-lineages.org](https://dengue-lineages.org/). + +**Based on NCBI GenBank metadata** + +* **Serotype (GenBank metadata):** denv1 to denv4 + +**Based on Augur clade assignment (`clade_membership`)** +_(Only available for genome trees, not gene trees)_ +* **Serotype (Nextstrain):** DENV1 – DENV4, annotated only on the "all" tree +* **Genotype (Nextstrain):** such as DENV3/III _(equivalent to 3III in "Genotype (Nextclade)")_, annotated only on the serotype-specific trees. diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index cf1a1fd..7862c9f 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -42,8 +42,8 @@ rule prepare_auspice_config: output: auspice_config="results/defaults/{gene}/auspice_config_{serotype}.json", params: - replace_clade_key=lambda wildcard: r"clade_membership" if wildcard.gene in ['genome'] else r"genotype_nextclade", - replace_clade_title=lambda wildcard: r"Serotype" if wildcard.serotype in ['all'] else r"Dengue Genotype (Nextclade)", + replace_clade_key=lambda wildcard: r"clade_membership" if wildcard.gene in ['genome'] else r"major_lineage", + replace_clade_title=lambda wildcard: r"Serotype" if wildcard.serotype in ['all'] else r"Genotype (Nextclade)", run: data = { "title": "Real-time tracking of dengue virus evolution", @@ -79,13 +79,23 @@ rule prepare_auspice_config: "type": "categorical" }, { - "key": "genotype_nextclade", - "title": "Dengue Genotype (Nextclade)", + "key": "serotype_genbank", + "title": "Serotype (Genbank metadata)", "type": "categorical" }, { - "key": "serotype_genbank", - "title": "Serotype (Genbank metadata)", + "key": "genotype", + "title": "Genotype (Nextclade)", + "type": "categorical" + }, + { + "key": "major_lineage", + "title": "Major lineage (Nextclade)", + "type": "categorical" + }, + { + "key": "minor_lineage", + "title": "Minor lineage (Nextclade)", "type": "categorical" } ], @@ -122,7 +132,7 @@ rule prepare_auspice_config: if wildcards.gene in ['genome'] and wildcards.serotype in ['all']: clade_membership_title="Serotype (Nextstrain)" else: - clade_membership_title="Dengue Genotype (Nextstrain)" + clade_membership_title="Genotype (Nextstrain)" data["colorings"].append({ "key": "clade_membership",