Skip to content

Commit

Permalink
🚧 export v2: Automatically minify output
Browse files Browse the repository at this point in the history
Automatically format the output JSON file based on the number of tips,
unless either --minify-json or --no-minify-json (new option) is
specified.
  • Loading branch information
victorlin committed Dec 8, 2023
1 parent 7cb3848 commit fb82a79
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 2 deletions.
25 changes: 23 additions & 2 deletions augur/export_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
from .utils import read_node_data, write_json, read_config, read_lat_longs, read_colors
from .validate import export_v2 as validate_v2, auspice_config_v2 as validate_auspice_config_v2, ValidateError


MINIFY_THRESHOLD = 1000 # number of nodes


# Set up warnings & exceptions
warn = warnings.warn
deprecationWarningsEmitted = False
Expand Down Expand Up @@ -878,7 +882,10 @@ def register_parser(parent_subparsers):
optional_settings = parser.add_argument_group(
title="OPTIONAL SETTINGS"
)
optional_settings.add_argument('--minify-json', action="store_true", help="export JSONs without indentation or line returns")
minify = optional_settings.add_mutually_exclusive_group()
minify.add_argument('--minify-json', action="store_true", help="export JSONs without indentation or line returns, regardless of how many nodes are in the tree.")
minify.add_argument('--no-minify-json', action="store_true", help="export JSONs to be human readable, regardless of how many nodes are in the tree.")

root_sequence = optional_settings.add_mutually_exclusive_group()
root_sequence.add_argument('--include-root-sequence', action="store_true", help="Export an additional JSON containing the root sequence (reference sequence for vcf) used to identify mutations. The filename will follow the pattern of <OUTPUT>_root-sequence.json for a main auspice JSON of <OUTPUT>.json")
root_sequence.add_argument('--include-root-sequence-inline', action="store_true", help="Export the root sequence (reference sequence for vcf) used to identify mutations as part of the main dataset JSON. This should only be used for small genomes for file size reasons.")
Expand Down Expand Up @@ -1165,8 +1172,22 @@ def run(args):
if config.get("extensions"):
data_json["meta"]["extensions"] = config["extensions"]

# Should output be minified?
# User-specified arguments take precedence before determining behavior based
# on the size of the tree.
if args.minify_json:
minify = True
elif args.no_minify_json:
minify = False
else:
num_nodes = len(node_data["nodes"])
if num_nodes > MINIFY_THRESHOLD:
minify = True
else:
minify = False

# Write outputs - the (unified) dataset JSON intended for auspice & perhaps the ref root-sequence JSON
indent = {"indent": None} if args.minify_json else {}
indent = {"indent": None} if minify else {}
if args.include_root_sequence or args.include_root_sequence_inline:
# Note - argparse enforces that only one of these args will be true
if 'reference' in node_data:
Expand Down
31 changes: 31 additions & 0 deletions tests/functional/export_v2/cram/minify-output.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Setup

$ source "$TESTDIR"/_setup.sh

Tree with a few nodes is not automatically minified.

$ ${AUGUR} export v2 \
> --tree "$TESTDIR/../data/tree.nwk" \
> --output output.json &>/dev/null

$ head -c 20 output.json
{
"version": "v2", (no-eol)

It can be forcefully minified with an argument.

$ ${AUGUR} export v2 \
> --tree "$TESTDIR/../data/tree.nwk" \
> --minify-json \
> --output output.json &>/dev/null

$ head -c 20 output.json
{"version": "v2", "m (no-eol)
Tree with many nodes is automatically minified.
FIXME: find a tree with >1000 nodes for testing here.
It can be forcefully not minified with an argument.
FIXME: add test for --no-minify-json.

0 comments on commit fb82a79

Please sign in to comment.