Skip to content

Commit

Permalink
filter: Add support for relative dates in --min-date and --max-date
Browse files Browse the repository at this point in the history
The relative dates are parsed by `numeric_date` which uses datetime.date.today() to translate the relative date to an absolute date.

Relative dates are positive duration values following the ISO 8601 duration syntax
e.g. `--min-date 1Y2W5D` for 1 year, 2 weeks and 5 days ago or `--max-date 1D` for yesterday

This also adds a package dependency `isodate` to parse the duration string.
  • Loading branch information
benjaminotter authored and victorlin committed Apr 5, 2022
1 parent fbb1303 commit 99bc2a5
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 4 deletions.
41 changes: 37 additions & 4 deletions augur/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import csv
import datetime
import heapq
import isodate
import itertools
import json
import numpy as np
Expand Down Expand Up @@ -1108,8 +1109,16 @@ def register_arguments(parser):
Uses Pandas Dataframe querying, see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-query for syntax.
(e.g., --query "country == 'Colombia'" or --query "(country == 'USA' & (division == 'Washington'))")"""
)
metadata_filter_group.add_argument('--min-date', type=numeric_date, help="minimal cutoff for date, the cutoff date is inclusive; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD")
metadata_filter_group.add_argument('--max-date', type=numeric_date, help="maximal cutoff for date, the cutoff date is inclusive; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD")
metadata_filter_group.add_argument('--min-date', type=numeric_date,
help="""minimal cutoff for date, the cutoff date is inclusive; may be specified as:
1. an Augur-style numeric date with the year as the integer part (e.g. 2020.42)
2. a date in ISO 8601 date format (i.e. YYYY-MM-DD) (e.g. '2020-06-04')
3. a backwards-looking relative date in ISO 8601 duration format with optional P prefix (e.g. '1W', 'P1W')""")
metadata_filter_group.add_argument('--max-date', type=numeric_date,
help="""maximal cutoff for date, the cutoff date is inclusive; may be specified as:
1. an Augur-style numeric date with the year as the integer part (e.g. 2020.42)
2. a date in ISO 8601 date format (i.e. YYYY-MM-DD) (e.g. '2020-06-04')
3. a backwards-looking relative date in ISO 8601 duration format with optional P prefix (e.g. '1W', 'P1W')""")
metadata_filter_group.add_argument('--exclude-ambiguous-dates-by', choices=['any', 'day', 'month', 'year'],
help='Exclude ambiguous dates by day (e.g., 2020-09-XX), month (e.g., 2020-XX-XX), year (e.g., 200X-10-01), or any date fields. An ambiguous year makes the corresponding month and day ambiguous, too, even if those fields have unambiguous values (e.g., "201X-10-01"). Similarly, an ambiguous month makes the corresponding day ambiguous (e.g., "2010-XX-01").')
metadata_filter_group.add_argument('--exclude', type=str, nargs="+", help="file(s) with list of strains to exclude")
Expand Down Expand Up @@ -1688,18 +1697,42 @@ def numeric_date(date):
"""
Converts the given *date* string to a :py:class:`float`.
*date* may be given as a number (a float) with year as the integer part, or
in the YYYY-MM-DD (ISO 8601) syntax.
*date* may be given as:
1. A string or float (number) with year as the integer part
2. A string in the YYYY-MM-DD (ISO 8601) syntax
3. A string representing a relative date (duration before datetime.date.today())
>>> numeric_date("2020.42")
2020.42
>>> numeric_date("2020-06-04")
2020.42486...
>>> import datetime, isodate, treetime
>>> numeric_date("1W") == treetime.utils.numeric_date(datetime.date.today() - isodate.parse_duration("P1W"))
True
"""
# date is numeric
try:
return float(date)
except ValueError:
pass

# date is in YYYY-MM-DD form
try:
return treetime.utils.numeric_date(datetime.date(*map(int, date.split("-", 2))))
except ValueError:
pass

# date is a duration treated as a backwards-looking relative date
try:
# make a copy of date for this block
duration_str = str(date)
if duration_str.startswith('P'):
duration_str = duration_str
else:
duration_str = 'P'+duration_str
return treetime.utils.numeric_date(datetime.date.today() - isodate.parse_duration(duration_str))
except (ValueError, isodate.ISO8601Error):
pass


def calculate_sequences_per_group(target_max_value, counts_per_group, allow_probabilistic=True):
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
install_requires = [
"bcbio-gff >=0.6.0, ==0.6.*",
"biopython >=1.67, !=1.77, !=1.78",
"isodate ==0.6.*",
"jsonschema >=3.0.0, ==3.*",
"networkx >= 2.5, ==2.*",
"packaging >=19.2",
Expand Down

0 comments on commit 99bc2a5

Please sign in to comment.