From 99bc2a5a1eba49eaf6109b88afc79cf7d076cf74 Mon Sep 17 00:00:00 2001 From: Benjamin Otter Date: Thu, 24 Jun 2021 12:52:40 +0200 Subject: [PATCH] filter: Add support for relative dates in --min-date and --max-date The relative dates are parsed by `numeric_date` which uses datetime.date.today() to translate the relative date to an absolute date. Relative dates are positive duration values following the ISO 8601 duration syntax e.g. `--min-date 1Y2W5D` for 1 year, 2 weeks and 5 days ago or `--max-date 1D` for yesterday This also adds a package dependency `isodate` to parse the duration string. --- augur/filter.py | 41 +++++++++++++++++++++++++++++++++++++---- setup.py | 1 + 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/augur/filter.py b/augur/filter.py index 58850b2ef..c850f3ed8 100644 --- a/augur/filter.py +++ b/augur/filter.py @@ -6,6 +6,7 @@ import csv import datetime import heapq +import isodate import itertools import json import numpy as np @@ -1108,8 +1109,16 @@ def register_arguments(parser): Uses Pandas Dataframe querying, see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-query for syntax. (e.g., --query "country == 'Colombia'" or --query "(country == 'USA' & (division == 'Washington'))")""" ) - metadata_filter_group.add_argument('--min-date', type=numeric_date, help="minimal cutoff for date, the cutoff date is inclusive; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD") - metadata_filter_group.add_argument('--max-date', type=numeric_date, help="maximal cutoff for date, the cutoff date is inclusive; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD") + metadata_filter_group.add_argument('--min-date', type=numeric_date, + help="""minimal cutoff for date, the cutoff date is inclusive; may be specified as: + 1. an Augur-style numeric date with the year as the integer part (e.g. 2020.42) + 2. a date in ISO 8601 date format (i.e. YYYY-MM-DD) (e.g. '2020-06-04') + 3. a backwards-looking relative date in ISO 8601 duration format with optional P prefix (e.g. '1W', 'P1W')""") + metadata_filter_group.add_argument('--max-date', type=numeric_date, + help="""maximal cutoff for date, the cutoff date is inclusive; may be specified as: + 1. an Augur-style numeric date with the year as the integer part (e.g. 2020.42) + 2. a date in ISO 8601 date format (i.e. YYYY-MM-DD) (e.g. '2020-06-04') + 3. a backwards-looking relative date in ISO 8601 duration format with optional P prefix (e.g. '1W', 'P1W')""") metadata_filter_group.add_argument('--exclude-ambiguous-dates-by', choices=['any', 'day', 'month', 'year'], help='Exclude ambiguous dates by day (e.g., 2020-09-XX), month (e.g., 2020-XX-XX), year (e.g., 200X-10-01), or any date fields. An ambiguous year makes the corresponding month and day ambiguous, too, even if those fields have unambiguous values (e.g., "201X-10-01"). Similarly, an ambiguous month makes the corresponding day ambiguous (e.g., "2010-XX-01").') metadata_filter_group.add_argument('--exclude', type=str, nargs="+", help="file(s) with list of strains to exclude") @@ -1688,18 +1697,42 @@ def numeric_date(date): """ Converts the given *date* string to a :py:class:`float`. - *date* may be given as a number (a float) with year as the integer part, or - in the YYYY-MM-DD (ISO 8601) syntax. + *date* may be given as: + 1. A string or float (number) with year as the integer part + 2. A string in the YYYY-MM-DD (ISO 8601) syntax + 3. A string representing a relative date (duration before datetime.date.today()) >>> numeric_date("2020.42") 2020.42 >>> numeric_date("2020-06-04") 2020.42486... + >>> import datetime, isodate, treetime + >>> numeric_date("1W") == treetime.utils.numeric_date(datetime.date.today() - isodate.parse_duration("P1W")) + True """ + # date is numeric try: return float(date) except ValueError: + pass + + # date is in YYYY-MM-DD form + try: return treetime.utils.numeric_date(datetime.date(*map(int, date.split("-", 2)))) + except ValueError: + pass + + # date is a duration treated as a backwards-looking relative date + try: + # make a copy of date for this block + duration_str = str(date) + if duration_str.startswith('P'): + duration_str = duration_str + else: + duration_str = 'P'+duration_str + return treetime.utils.numeric_date(datetime.date.today() - isodate.parse_duration(duration_str)) + except (ValueError, isodate.ISO8601Error): + pass def calculate_sequences_per_group(target_max_value, counts_per_group, allow_probabilistic=True): diff --git a/setup.py b/setup.py index 907168ae7..f5eda48a4 100644 --- a/setup.py +++ b/setup.py @@ -53,6 +53,7 @@ install_requires = [ "bcbio-gff >=0.6.0, ==0.6.*", "biopython >=1.67, !=1.77, !=1.78", + "isodate ==0.6.*", "jsonschema >=3.0.0, ==3.*", "networkx >= 2.5, ==2.*", "packaging >=19.2",