Skip to content

Commit c0bccdf

Browse files
authored
Merge pull request #1656: Centralize --validation-mode and --skip-validation
2 parents 37958a6 + a6e045d commit c0bccdf

File tree

8 files changed

+406
-39
lines changed

8 files changed

+406
-39
lines changed

CHANGES.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,17 @@
22

33
## __NEXT__
44

5+
### Features
6+
7+
* ancestral, translate: Add `--skip-validation` as an alias to `--validation-mode=skip`. [#1656][] (@victorlin)
8+
* clades: Allow customizing the validation of input node data JSON files with `--validation-mode` and `--skip-validation`. [#1656][] (@victorlin)
9+
510
### Bug Fixes
611

712
* index: Previously specifying a directory that does not exist in the path to `--output` would result in an incorrect error stating that the input file does not exist. It now shows the correct path responsible for the error. [#1644][] (@victorlin)
813

914
[#1644]: https://github.com/nextstrain/augur/issues/1644
15+
[#1656]: https://github.com/nextstrain/augur/pull/1656
1016

1117
## 26.0.0 (17 September 2024)
1218

augur/ancestral.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,8 @@
3434
from .io.vcf import is_vcf as is_filename_vcf
3535
from treetime.vcf_utils import read_vcf, write_vcf
3636
from collections import defaultdict
37-
from .types import ValidationMode
37+
from .argparse_ import add_validation_arguments
3838
from .util_support.node_data_file import NodeDataObject
39-
from .export_v2 import validation_mode_help_message
4039

4140
def ancestral_sequence_inference(tree=None, aln=None, ref=None, infer_gtr=True,
4241
marginal=False, fill_overhangs=True, infer_tips=False,
@@ -335,8 +334,7 @@ def register_parser(parent_subparsers):
335334
general_group = parser.add_argument_group(
336335
"general",
337336
)
338-
general_group.add_argument('--validation-mode', type=ValidationMode, choices=[mode for mode in ValidationMode], default=ValidationMode.ERROR,
339-
help=validation_mode_help_message)
337+
add_validation_arguments(general_group)
340338

341339
return parser
342340

augur/argparse_.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
"""
22
Custom helpers for the argparse standard library.
33
"""
4-
from argparse import Action, ArgumentDefaultsHelpFormatter
4+
from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser, _ArgumentGroup
5+
from typing import Union
6+
from .types import ValidationMode
57

68

79
# Include this in an argument help string to suppress the automatic appending
@@ -93,3 +95,34 @@ def __call__(self, parser, namespace, value, option_string = None):
9395
current = []
9496

9597
setattr(namespace, self.dest, [*current, *value])
98+
99+
100+
def add_validation_arguments(parser: Union[ArgumentParser, _ArgumentGroup]):
101+
"""
102+
Add arguments to configure validation mode of node data JSON files.
103+
"""
104+
parser.add_argument(
105+
'--validation-mode',
106+
dest="validation_mode",
107+
type=ValidationMode,
108+
choices=[mode for mode in ValidationMode],
109+
default=ValidationMode.ERROR,
110+
help="""
111+
Control if optional validation checks are performed and what
112+
happens if they fail.
113+
114+
'error' and 'warn' modes perform validation and emit messages about
115+
failed validation checks. 'error' mode causes a non-zero exit
116+
status if any validation checks failed, while 'warn' does not.
117+
118+
'skip' mode performs no validation.
119+
120+
Note that some validation checks are non-optional and as such are
121+
not affected by this setting.
122+
""")
123+
parser.add_argument(
124+
'--skip-validation',
125+
dest="validation_mode",
126+
action="store_const",
127+
const=ValidationMode.SKIP,
128+
help="Skip validation of input/output files, equivalent to --validation-mode=skip. Use at your own risk!")

augur/clades.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from .io.file import PANDAS_READ_CSV_OPTIONS
2323
from argparse import SUPPRESS
2424
from .utils import get_parent_name_by_child_name_for_tree, read_node_data, write_json, get_json_name
25+
from .argparse_ import add_validation_arguments
2526

2627
UNASSIGNED = 'unassigned'
2728

@@ -324,10 +325,10 @@ def get_reference_sequence_from_root_node(all_muts, root_name):
324325

325326
return ref
326327

327-
def parse_nodes(tree_file, node_data_files):
328+
def parse_nodes(tree_file, node_data_files, validation_mode):
328329
tree = Phylo.read(tree_file, 'newick')
329330
# don't supply tree to read_node_data as we don't want to require that every node is present in the node_data JSONs
330-
node_data = read_node_data(node_data_files)
331+
node_data = read_node_data(node_data_files, validation_mode=validation_mode)
331332
# node_data files can be parsed without 'nodes' (if they have 'branches')
332333
if "nodes" not in node_data or len(node_data['nodes'].keys())==0:
333334
raise AugurError(f"No nodes found in the supplied node data files. Please check {', '.join(node_data_files)}")
@@ -347,11 +348,12 @@ def register_parser(parent_subparsers):
347348
parser.add_argument('--output-node-data', type=str, metavar="NODE_DATA_JSON", help='name of JSON file to save clade assignments to')
348349
parser.add_argument('--membership-name', type=str, default="clade_membership", help='Key to store clade membership under; use "None" to not export this')
349350
parser.add_argument('--label-name', type=str, default="clade", help='Key to store clade labels under; use "None" to not export this')
351+
add_validation_arguments(parser)
350352
return parser
351353

352354

353355
def run(args):
354-
(tree, all_muts) = parse_nodes(args.tree, args.mutations)
356+
(tree, all_muts) = parse_nodes(args.tree, args.mutations, args.validation_mode)
355357

356358
if args.reference:
357359
# PLACE HOLDER FOR vcf WORKFLOW.

augur/export_v2.py

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from Bio import Phylo
1414
from typing import Dict, Union, TypedDict, Any, Tuple
1515

16-
from .argparse_ import ExtendOverwriteDefault
16+
from .argparse_ import ExtendOverwriteDefault, add_validation_arguments
1717
from .errors import AugurError
1818
from .io.file import open_file
1919
from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, InvalidDelimiter, read_metadata
@@ -926,20 +926,6 @@ def node_data_prop_is_normal_trait(name):
926926

927927
return True
928928

929-
validation_mode_help_message = """
930-
Control if optional validation checks are performed and what
931-
happens if they fail.
932-
933-
'error' and 'warn' modes perform validation and emit messages about
934-
failed validation checks. 'error' mode causes a non-zero exit
935-
status if any validation checks failed, while 'warn' does not.
936-
937-
'skip' mode performs no validation.
938-
939-
Note that some validation checks are non-optional and as such are
940-
not affected by this setting.
941-
"""
942-
943929

944930
def register_parser(parent_subparsers):
945931
parser = parent_subparsers.add_parser("v2", help=__doc__)
@@ -1007,19 +993,7 @@ def register_parser(parent_subparsers):
1007993
optional_settings = parser.add_argument_group(
1008994
title="OTHER OPTIONAL SETTINGS"
1009995
)
1010-
optional_settings.add_argument(
1011-
'--validation-mode',
1012-
dest="validation_mode",
1013-
type=ValidationMode,
1014-
choices=[mode for mode in ValidationMode],
1015-
default=ValidationMode.ERROR,
1016-
help=validation_mode_help_message)
1017-
optional_settings.add_argument(
1018-
'--skip-validation',
1019-
dest="validation_mode",
1020-
action="store_const",
1021-
const=ValidationMode.SKIP,
1022-
help="Skip validation of input/output files, equivalent to --validation-mode=skip. Use at your own risk!")
996+
add_validation_arguments(optional_settings)
1023997

1024998
return parser
1025999

augur/translate.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,8 @@
2222
from treetime.vcf_utils import read_vcf
2323
from augur.errors import AugurError
2424
from textwrap import dedent
25-
from .types import ValidationMode
25+
from .argparse_ import add_validation_arguments
2626
from .util_support.node_data_file import NodeDataObject
27-
from .export_v2 import validation_mode_help_message
2827

2928
class MissingNodeError(Exception):
3029
pass
@@ -373,7 +372,7 @@ def register_parser(parent_subparsers):
373372
parser.add_argument('--alignment-output', type=str, help="write out translated gene alignments. "
374373
"If a VCF-input, a .vcf or .vcf.gz will be output here (depending on file ending). If fasta-input, specify the file name "
375374
"like so: 'my_alignment_%%GENE.fasta', where '%%GENE' will be replaced by the name of the gene")
376-
parser.add_argument('--validation-mode', type=ValidationMode, choices=[mode for mode in ValidationMode], default=ValidationMode.ERROR, help=validation_mode_help_message)
375+
add_validation_arguments(parser)
377376

378377
vcf_only = parser.add_argument_group(
379378
title="VCF specific",
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
Integration tests for augur clades.
2+
3+
$ source "$TESTDIR"/_setup.sh
4+
5+
Node-data JSONs produced from a different major version of augur
6+
are not allowed.
7+
8+
$ ${AUGUR} clades \
9+
> --tree "$TESTDIR/../data/tree.nwk" \
10+
> --mutations "$TESTDIR/../data/aa_muts_generated_by.json" \
11+
> --clades "$TESTDIR/../data/clades.tsv" \
12+
> --output-node-data clades.json
13+
ERROR: Augur version incompatibility detected: the JSON .*aa_muts_generated_by\.json.* was generated by \{'program': 'augur', 'version': '21.1.0'\}, which is incompatible with the current augur version \([.0-9]+\). We suggest you rerun the pipeline using the current version of augur. (re)
14+
[2]
15+
16+
Skipping validation allows mismatched augur versions to be used without error.
17+
18+
$ ${AUGUR} clades \
19+
> --tree "$TESTDIR/../data/tree.nwk" \
20+
> --mutations "$TESTDIR/../data/aa_muts_generated_by.json" \
21+
> --clades "$TESTDIR/../data/clades.tsv" \
22+
> --output-node-data clades.json \
23+
> --skip-validation &>/dev/null

0 commit comments

Comments
 (0)