|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +import argparse |
| 4 | +import json |
| 5 | +import subprocess |
| 6 | +import os |
| 7 | + |
| 8 | +def parse_args(): |
| 9 | + parser = argparse.ArgumentParser(description="""Create published files from config file""", |
| 10 | + formatter_class=argparse.RawTextHelpFormatter) |
| 11 | + parser.add_argument('--unaligned_fasta', dest = 'unaligned_fasta', required=True, help='Raw FASTA') |
| 12 | + parser.add_argument('--aligned_fasta', dest = 'aligned_fasta', required=True, help='Aligned, masked, untrimmed FASTA') |
| 13 | + parser.add_argument('--trimmed_fasta', dest = 'trimmed_fasta', required=True, help='Aligned, masked, trimmed and filtered FASTA') |
| 14 | + parser.add_argument('--cog_global_fasta', dest = 'cog_global_fasta', required=True, help='COG GISAID aligned FASTA') |
| 15 | + |
| 16 | + parser.add_argument('--cog_metadata', dest = 'cog_metadata', required=True, help='MASSIVE CSV') |
| 17 | + parser.add_argument('--cog_global_metadata', dest = 'cog_global_metadata', required=True, help='MASSIVE CSV') |
| 18 | + |
| 19 | + parser.add_argument('--cog_variants', dest = 'cog_variants', required=True, help='Mutations CSV') |
| 20 | + parser.add_argument('--cog_global_variants', dest = 'cog_global_variants', required=True, help='Mutations CSV') |
| 21 | + |
| 22 | + parser.add_argument('--recipes', dest = 'recipes', required=True, help='JSON of recipes') |
| 23 | + |
| 24 | + args = parser.parse_args() |
| 25 | + return args |
| 26 | + |
| 27 | +#"data": "cog" or "cog_global" |
| 28 | +#"fasta": "unaligned", "aligned", "trimmed", "cog_global" |
| 29 | +#"metadata_fields": [] |
| 30 | +#"variants": True or False to add columns from variants |
| 31 | +#"where": free text to be passed to fastafunk fetch --where-column |
| 32 | +#"suffix": something to append to file names |
| 33 | + |
| 34 | +def get_info_from_config(config_dict, outdir, fasta_dict, csv_dict, var_dict): |
| 35 | + info_dict = {"suffix":None, "data":None, "fasta":None, "metadata_fields":None, "where": None, "variants":False, |
| 36 | + "in_fa":None, "in_csv":None, "in_var":None, |
| 37 | + "out_fa":"tmp.fa", "out_csv":"tmp.csv", "out_var":None} |
| 38 | + info_dict.update(config_dict) |
| 39 | + |
| 40 | + if info_dict["fasta"] in fasta_dict.keys(): |
| 41 | + info_dict["in_fa"] = fasta_dict[info_dict["fasta"]] |
| 42 | + elif info_dict["data"] == "cog_global": |
| 43 | + info_dict["in_fa"] = fasta_dict["cog_global"] |
| 44 | + elif info_dict["data"] == "cog": |
| 45 | + info_dict["in_fa"] = fasta_dict["trimmed"] |
| 46 | + else: |
| 47 | + sys.exit("Config entries need to specify either fasta in ['unaligned', 'aligned', 'trimmed', 'cog_global'] or data \ |
| 48 | + in ['cog', 'cog_global']") |
| 49 | + |
| 50 | + if info_dict["data"] == "cog_global": |
| 51 | + info_dict["in_csv"] = csv_dict["cog_global"] |
| 52 | + info_dict["in_var"] = var_dict["cog_global"] |
| 53 | + elif info_dict["data"] == "cog": |
| 54 | + info_dict["in_csv"] = csv_dict["cog"] |
| 55 | + info_dict["in_var"] = var_dict["cog"] |
| 56 | + |
| 57 | + if info_dict["data"] is None: |
| 58 | + if fasta == "cog_global": |
| 59 | + info_dict["data"] = "cog_global" |
| 60 | + else: |
| 61 | + info_dict["data"] = "cog" |
| 62 | + |
| 63 | + if info_dict["variants"]: |
| 64 | + if info_dict["suffix"] is None: |
| 65 | + info_dict["out_var"] = "%s/%s_%s_metadata.csv" %(outdir, info_dict["data"], info_dict["date"]) |
| 66 | + else: |
| 67 | + info_dict["out_var"] = "%s/%s_%s_%s_metadata.csv" %(outdir, info_dict["data"], info_dict["date"], info_dict["suffix"]) |
| 68 | + elif info_dict["metadata_fields"]: |
| 69 | + if info_dict["suffix"] is None: |
| 70 | + info_dict["out_csv"] = "%s/%s_%s_metadata.csv" %(outdir, info_dict["data"], info_dict["date"]) |
| 71 | + else: |
| 72 | + info_dict["out_csv"] = "%s/%s_%s_%s_metadata.csv" %(outdir, info_dict["data"], info_dict["date"], info_dict["suffix"]) |
| 73 | + |
| 74 | + if info_dict["fasta"]: |
| 75 | + if info_dict["metadata_fields"]: |
| 76 | + if info_dict["suffix"] is None: |
| 77 | + info_dict["out_fa"] = "%s/%s_%s_alignment.csv" %(outdir, info_dict["data"], info_dict["date"]) |
| 78 | + else: |
| 79 | + info_dict["out_fa"] = "%s/%s_%s_%s_alignment.csv" %(outdir, info_dict["data"], info_dict["date"], info_dict["suffix"]) |
| 80 | + else: |
| 81 | + if info_dict["suffix"] is None: |
| 82 | + info_dict["out_fa"] = "%s/%s_%s.csv" %(outdir, info_dict["data"], info_dict["date"]) |
| 83 | + else: |
| 84 | + info_dict["out_fa"] = "%s/%s_%s_%s.csv" %(outdir, info_dict["data"], info_dict["date"], info_dict["suffix"]) |
| 85 | + |
| 86 | + return info_dict |
| 87 | + |
| 88 | + |
| 89 | +def publish_file(outdir, info_dict): |
| 90 | + os.makedirs(outdir) |
| 91 | + |
| 92 | + if info_dict["metadata_fields"] is None: |
| 93 | + cmd_list = ["cp", info_dict["in_fa"], info_dict["out_fa"]] |
| 94 | + subprocess.run(' '.join(cmd_list), shell=True) |
| 95 | + return |
| 96 | + |
| 97 | + cmd_list = ["fastafunk fetch --in-fasta", info_dict["in_fa"], "--in-metadata", info_dict["in_csv"], |
| 98 | + "--index-column sequence_name --out-fasta", info_dict["out_fa"], |
| 99 | + "--out-metadata", info_dict["out_csv"], "--restrict"] |
| 100 | + if info_dict["where"]: |
| 101 | + cmd_list.append("--where-column %s" %info_dict["where"]) |
| 102 | + subprocess.run(' '.join(cmd_list), shell=True) |
| 103 | + |
| 104 | + if info_dict["variants"]: |
| 105 | + cmd_list = ["fastafunk add_columns --in-metadata", info_dict["out_csv"], |
| 106 | + "--in-data", info_dict["in_var"], "--index-column sequence_name", |
| 107 | + "--join-on query --out-metadata", info_dict["out_var"]] |
| 108 | + subprocess.run(' '.join(cmd_list), shell=True) |
| 109 | + |
| 110 | + subprocess.run("rm tmp.*", shell=True) |
| 111 | + |
| 112 | +def main(): |
| 113 | + args = parse_args() |
| 114 | + |
| 115 | + fasta_dict = {"unaligned":args.unaligned_fasta, "aligned":args.aligned_fasta, "trimmed":args.trimmed_fasta, "cog_global": args.cog_global_fasta} |
| 116 | + csv_dict = {"cog":args.cog_metadata, "cog_global":args.cog_global_metadata} |
| 117 | + var_dict = {"cog":args.cog_variants, "cog_global":args.cog_global_variants} |
| 118 | + |
| 119 | + recipes = {} |
| 120 | + with open(args.recipes, 'r') as f: |
| 121 | + recipes = json.load(f) |
| 122 | + |
| 123 | + for outdir in recipes.keys(): |
| 124 | + for recipe in recipes[outdir]: |
| 125 | + info_dict = get_info_from_config(recipe, outdir, fasta_dict, csv_dict, var_dict) |
| 126 | + publish_file(outdir, info_dict) |
| 127 | + |
| 128 | +if __name__ == '__main__': |
| 129 | + main() |
0 commit comments