Skip to content

Commit

Permalink
ingest: Remove use of ncov-ingest geolocation rules
Browse files Browse the repository at this point in the history
Remove the use of the ncov-ingest geolocation rules since Augur
now uses the built-in geolocation rules by default.

Depends on the release of
<nextstrain/augur#1745>
  • Loading branch information
joverlee521 committed Feb 12, 2025
1 parent b400173 commit dcfa475
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 34 deletions.
4 changes: 0 additions & 4 deletions ingest/defaults/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,6 @@ ncbi_datasets_fields:

# Config parameters related to the curate pipeline
curate:
# URL pointed to public generalized geolocation rules
# For the Nextstrain team, this is currently
# "https://raw.githubusercontent.com/nextstrain/ncov-ingest/master/source-data/gisaid_geoLocationRules.tsv"
geolocation_rules_url: "https://raw.githubusercontent.com/nextstrain/ncov-ingest/master/source-data/gisaid_geoLocationRules.tsv"
# The path to the local geolocation rules within the pathogen repo
# The path should be relative to the ingest directory.
local_geolocation_rules: "defaults/geolocation_rules.tsv"
Expand Down
32 changes: 2 additions & 30 deletions ingest/rules/curate.smk
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,6 @@ OUTPUTS:
"""


# The following two rules can be ignored if you choose not to use the
# generalized geolocation rules that are shared across pathogens.
# The Nextstrain team will try to maintain a generalized set of geolocation
# rules that can then be overridden by local geolocation rules per pathogen repo.
rule fetch_general_geolocation_rules:
output:
general_geolocation_rules="data/general-geolocation-rules.tsv",
params:
geolocation_rules_url=config["curate"]["geolocation_rules_url"],
shell:
"""
curl {params.geolocation_rules_url} > {output.general_geolocation_rules}
"""


rule concat_geolocation_rules:
input:
general_geolocation_rules="data/general-geolocation-rules.tsv",
local_geolocation_rules=config["curate"]["local_geolocation_rules"],
output:
all_geolocation_rules="data/all-geolocation-rules.tsv",
shell:
"""
cat {input.general_geolocation_rules} {input.local_geolocation_rules} >> {output.all_geolocation_rules}
"""


def format_field_map(field_map: dict[str, str]) -> str:
"""
Format dict to `"key1"="value1" "key2"="value2"...` for use in shell commands.
Expand All @@ -57,8 +30,7 @@ def format_field_map(field_map: dict[str, str]) -> str:
rule curate:
input:
sequences_ndjson="data/ncbi.ndjson",
# Change the geolocation_rules input path if you are removing the above two rules
all_geolocation_rules="data/all-geolocation-rules.tsv",
geolocation_rules=config["curate"]["local_geolocation_rules"],
annotations=config["curate"]["annotations"],
output:
metadata="data/all_metadata.tsv",
Expand Down Expand Up @@ -106,7 +78,7 @@ rule curate:
--default-value {params.authors_default_value} \
--abbr-authors-field {params.abbr_authors_field} \
| augur curate apply-geolocation-rules \
--geolocation-rules {input.all_geolocation_rules} \
--geolocation-rules {input.geolocation_rules} \
| augur curate apply-record-annotations \
--annotations {input.annotations} \
--id-field {params.annotations_id} \
Expand Down

0 comments on commit dcfa475

Please sign in to comment.