From 25f0491f3e88d6b564e4a8d5c3377a4a730078df Mon Sep 17 00:00:00 2001 From: Michael Cormier <35238534+mikecormier@users.noreply.github.com> Date: Fri, 1 Jan 2021 15:16:39 -0700 Subject: [PATCH] Minor meta-recipe features (#49) * Add prediction to dir-path for meta-recipes using and ID * New unit tests for predict-path with 'dir-path' and 'id' arguments for meta-recipes * Update search results for meta-recipes * Unit test for searching for meta-recipes * Update version to 1.1.1 --- ggd/__init__.py | 2 +- ggd/predict_path.py | 123 ++++++++++++++++++++++++------------- ggd/search.py | 5 +- tests/test_info_scripts.py | 90 ++++++++++++++++++++++++--- tests/test_search.py | 30 +++++++++ 5 files changed, 195 insertions(+), 55 deletions(-) diff --git a/ggd/__init__.py b/ggd/__init__.py index aebf1b8..c5762ad 100644 --- a/ggd/__init__.py +++ b/ggd/__init__.py @@ -1,2 +1,2 @@ -__version__ = "1.1.0" +__version__ = "1.1.1" diff --git a/ggd/predict_path.py b/ggd/predict_path.py index b051ae7..5025a4e 100644 --- a/ggd/predict_path.py +++ b/ggd/predict_path.py @@ -18,31 +18,53 @@ def add_predict_path(p): help="Predict the install file path of a data package that hasn't been installed yet. (Use for workflows, such as Snakemake)", description="Get a predicted install file path for a data package before it is installed. (Use for workflows, such as Snakemake)", ) + c.add_argument( "-c", "--channel", default="genomics", choices=[str(x) for x in get_ggd_channels()], help="The ggd channel of the recipe to find. (Default = genomics)", + ) c.add_argument( "--prefix", default=None, help="(Optional) The name or the full directory path to an conda environment. The predicted path will be based on this conda environment. When installing, the data package should also be installed in this environment. (Only needed if not predicting for a path in the current conda environment)", ) - c2 = c.add_argument_group("required arguments") + + c.add_argument( + "--id", + metavar="meta-recipe ID", + default = None, + help = "(Optional) The ID to predict the path for if the package is a meta-recipe. If it is not a meta-recipe it will be ignored" + ) + + c2 = c.add_argument_group("One Argument Required") + c2.add_argument( - "-pn", - "--package-name", - required=True, - help="(Required) The name of the data package to predict a file path for", + + "--dir-path", + action="store_true", + help = "(Required if '--file-name' not used) Whether or not to get the predicted directory path rather then the predicted file path. If both --file-name and --dir-path are provided the --file-name will be used and --dir-path will be ignored", ) + c2.add_argument( "-fn", "--file-name", + default = None, + help="(Required if '--dir-path' not used) The name of the file to predict that path for. It is best if you give the full and correct name of the file to predict the path for. If not, ggd will try to identify the right file, but won't guarantee that it is the right file", + ) + + c3 = c.add_argument_group("Required Arguments") + + c3.add_argument( + "-pn", + "--package-name", required=True, - help="(Required) The name of the file to predict that path for. It is best if you give the full and correct name of the file to predict the path for. If not, ggd will try to identify the right file, but won't guarantee that it is the right file", + help="(Required) The name of the data package to predict a file path for", ) + c.set_defaults(func=predict_path) @@ -89,7 +111,12 @@ def predict_path(parser, args): import os import re - from .utils import conda_root, get_conda_prefix_path, prefix_in_conda + from .utils import check_for_meta_recipes, conda_root, get_conda_prefix_path, prefix_in_conda + from .install import get_idname_from_metarecipe + + if not args.dir_path and args.file_name is None: + print(":ggd:predict-path: !!ERROR!! Either the '--file-name' or the '--dir-path' argument is required. Neither was given") + sys.exit() ## get prefix CONDA_ROOT = ( @@ -109,52 +136,60 @@ def predict_path(parser, args): ) ) - ## Check there is a "final-files" in the metadata for the package - if ( - "final-files" not in metadata_dict["packages"][args.package_name]["tags"] - or len( - metadata_dict["packages"][args.package_name]["tags"].get("final-files", []) - ) - == 0 - ): - sys.exit( - "\n:ggd:predict-path: The {p} data package does not have the final data files listed. This packages needs to be updated. To update, contact the GoGetData team at https://github.com/gogetdata/ggd-recipes\n".format( - p=args.package_name - ) - ) + if args.file_name is not None: - ## Check that the file is one of the final-files listed in the metadata - if ( - args.file_name - not in metadata_dict["packages"][args.package_name]["tags"]["final-files"] - ): - matching_files = [ - x - for x in metadata_dict["packages"][args.package_name]["tags"]["final-files"] - if re.search(args.file_name, x) - ] - if len(matching_files) > 0: - ## Chose the first file that matched - file_name = matching_files[0] - else: + ## Check there is a "final-files" in the metadata for the package + if ( + "final-files" not in metadata_dict["packages"][args.package_name]["tags"] + or len( + metadata_dict["packages"][args.package_name]["tags"].get("final-files", []) + ) + == 0 + ): sys.exit( - "\n:ggd:predict-path: The {f} file is not one of the files listed for this package. The files installed by this package are: \n\t\t{fo}".format( - f=args.file_name, - fo="\n\t\t".join( - metadata_dict["packages"][args.package_name]["tags"][ - "final-files" - ] - ), + "\n:ggd:predict-path: The {p} data package does not have the final data files listed. This packages needs to be updated. To update, contact the GoGetData team at https://github.com/gogetdata/ggd-recipes\n".format( + p=args.package_name ) ) - else: - file_name = args.file_name + + ## Check that the file is one of the final-files listed in the metadata + if ( + args.file_name + not in metadata_dict["packages"][args.package_name]["tags"]["final-files"] + ): + matching_files = [ + x + for x in metadata_dict["packages"][args.package_name]["tags"]["final-files"] + if re.search(args.file_name, x) + ] + if len(matching_files) > 0: + ## Chose the first file that matched + file_name = matching_files[0] + else: + sys.exit( + "\n:ggd:predict-path: The {f} file is not one of the files listed for this package. The files installed by this package are: \n\t\t{fo}".format( + f=args.file_name, + fo="\n\t\t".join( + metadata_dict["packages"][args.package_name]["tags"][ + "final-files" + ] + ), + ) + ) + else: + file_name = args.file_name + + elif args.dir_path: + file_name = "" + ## Get path information species = metadata_dict["packages"][args.package_name]["identifiers"]["species"] build = metadata_dict["packages"][args.package_name]["identifiers"]["genome-build"] version = metadata_dict["packages"][args.package_name]["version"] + name = args.package_name if not check_for_meta_recipes(args.package_name, metadata_dict) else get_idname_from_metarecipe(args.id.lower(), args.package_name, metadata_dict) if args.id is not None else args.package_name + ## Print the path path = os.path.join( CONDA_ROOT, @@ -162,7 +197,7 @@ def predict_path(parser, args): "ggd", species, build, - args.package_name, + name, version, file_name, ) diff --git a/ggd/search.py b/ggd/search.py index 91129a3..6b22611 100644 --- a/ggd/search.py +++ b/ggd/search.py @@ -461,7 +461,10 @@ def print_summary(search_terms, json_dict, match_list, installed_pkgs, installed % installed_paths[pkg] ) else: - results.append("\n\tTo install run:\n\t\tggd install %s" % pkg) + from .utils import check_for_meta_recipes + + results.append("\n\tTo install run:\n\t\tggd install %s %s" %(pkg, "--id " if check_for_meta_recipes(pkg,json_dict) else "" )) + print("\n\n".join(results)) print("\n", dash) diff --git a/tests/test_info_scripts.py b/tests/test_info_scripts.py index 5240f95..f8a67b9 100644 --- a/tests/test_info_scripts.py +++ b/tests/test_info_scripts.py @@ -1505,7 +1505,7 @@ def test_predict_path(): ## Testing with grch37-autosomal-dominant-genes-berg-v1 data package ## Test bad package name - args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz', package_name='bad_package_name-grch37-autosomal-dominant-genes-berg-v1', prefix=None) + args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz', package_name='bad_package_name-grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None) with pytest.raises(SystemExit) as pytest_wrapped_e: predict_path.predict_path((), args) @@ -1514,7 +1514,7 @@ def test_predict_path(): ## Test bad file name - args = Namespace(channel='genomics', command='predict-path', file_name='autodom-genes-berg', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None) + args = Namespace(channel='genomics', command='predict-path', file_name='autodom-genes-berg', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None) with pytest.raises(SystemExit) as pytest_wrapped_e: predict_path.predict_path((), args) @@ -1523,7 +1523,7 @@ def test_predict_path(): ## Test closest file name - args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None) + args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None) temp_stdout = StringIO() with redirect_stdout(temp_stdout): @@ -1533,7 +1533,7 @@ def test_predict_path(): ## Test closest file name - args = Namespace(channel='genomics', command='predict-path', file_name='berg-v1.compliment', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None) + args = Namespace(channel='genomics', command='predict-path', file_name='berg-v1.compliment', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None) temp_stdout = StringIO() with redirect_stdout(temp_stdout): @@ -1543,7 +1543,7 @@ def test_predict_path(): ## Test full name file name - args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None) + args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None) temp_stdout = StringIO() with redirect_stdout(temp_stdout): @@ -1552,6 +1552,17 @@ def test_predict_path(): assert os.path.join(utils.conda_root(),"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1","grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi") in str(output) + ## Test no file-name or dir-path + args = Namespace(channel='genomics', command='predict-path', file_name=None, package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None) + + temp_stdout = StringIO() + with pytest.raises(SystemExit) as pytest_wrapped_e, redirect_stdout(temp_stdout): + predict_path.predict_path((), args) + assert "SystemExit" in str(pytest_wrapped_e.exconly()) ## test that SystemExit was raised by sys.exit() + output = temp_stdout.getvalue().strip() + assert ":ggd:predict-path: !!ERROR!! Either the '--file-name' or the '--dir-path' argument is required. Neither was given" in output + + ## Test prdiction in different environmnet ### Temp conda environment temp_env = os.path.join(utils.conda_root(), "envs", "predict-path") @@ -1565,7 +1576,17 @@ def test_predict_path(): sp.check_output(["conda", "create", "--name", "predict-path"]) ## Test full name file name - args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=temp_env) + args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=temp_env, dir_path = False, id = None) + + temp_stdout = StringIO() + with redirect_stdout(temp_stdout): + predict_path.predict_path((), args) + output = temp_stdout.getvalue().strip() + assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1","grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi") in str(output) + + + ## Test full name file name and that the ID is ignored for a non meta-recipe + args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=temp_env, dir_path = False, id = "SOME ID") temp_stdout = StringIO() with redirect_stdout(temp_stdout): @@ -1573,6 +1594,58 @@ def test_predict_path(): output = temp_stdout.getvalue().strip() assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1","grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi") in str(output) + + ## Test full name file name and dir-path. (File name should be used over dir path) + args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=temp_env, dir_path = True, id = None) + + temp_stdout = StringIO() + with redirect_stdout(temp_stdout): + predict_path.predict_path((), args) + output = temp_stdout.getvalue().strip() + assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1","grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi") in str(output) + + ## Test dir path + args = Namespace(channel='genomics', command='predict-path', file_name=None, package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=temp_env, dir_path = True, id = None) + + temp_stdout = StringIO() + with redirect_stdout(temp_stdout): + predict_path.predict_path((), args) + output = temp_stdout.getvalue().strip() + assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1") in str(output) + assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1","grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi") not in str(output) + + + ## Test dir path and that the ID is ignored for a non meta-recipe + args = Namespace(channel='genomics', command='predict-path', file_name=None, package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=temp_env, dir_path = True, id = "SOME_ID") + + temp_stdout = StringIO() + with redirect_stdout(temp_stdout): + predict_path.predict_path((), args) + output = temp_stdout.getvalue().strip() + assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1") in str(output) + assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1","grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi") not in str(output) + + + ## Test meta-recipe without an ID + args = Namespace(channel='genomics', command='predict-path', file_name=None, package_name='meta-recipe-geo-accession-geo-v1', prefix=temp_env, dir_path = True, id = None) + + temp_stdout = StringIO() + with redirect_stdout(temp_stdout): + predict_path.predict_path((), args) + output = temp_stdout.getvalue().strip() + assert os.path.join(temp_env,"share","ggd", "meta-recipe","meta-recipe","meta-recipe-geo-accession-geo-v1","1") in str(output) + + + ## Test meta-recipe with an ID and that the id is set to lower case + args = Namespace(channel='genomics', command='predict-path', file_name=None, package_name='meta-recipe-geo-accession-geo-v1', prefix=temp_env, dir_path = True, id = "GSE123") + + temp_stdout = StringIO() + with redirect_stdout(temp_stdout): + predict_path.predict_path((), args) + output = temp_stdout.getvalue().strip() + assert os.path.join(temp_env,"share","ggd", "meta-recipe","meta-recipe","gse123-geo-v1","1") in str(output) + + ## Remove temp env created in test_get_environment_variables() sp.check_output(["conda", "env", "remove", "--name", "predict-path"]) try: @@ -1594,7 +1667,7 @@ def test_predict_path(): output = str(temp_stdout.getvalue().strip()) assert os.path.exists(str(output)) - args2 = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None) + args2 = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None) temp_stdout = StringIO() with redirect_stdout(temp_stdout): predict_path.predict_path((), args2) @@ -1602,8 +1675,7 @@ def test_predict_path(): assert str(output2) == str(output) - args = Namespace(channel='genomics', command='uninstall', names=["grch37-autosomal-dominant-genes-berg-v1"]) - uninstall.uninstall((),args) + sp.check_call(["ggd","uninstall","grch37-autosomal-dominant-genes-berg-v1"]) #-------------------------------------------------------- diff --git a/tests/test_search.py b/tests/test_search.py index 8fbe450..8d3f3bc 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -512,6 +512,7 @@ def test_print_summary(): installed_paths = [] assert search.print_summary(search_term,json_dict,matches,installed_pkgs,installed_paths) == True + ## Test that a match that does not exists in the json_dict is handeled correctly search_term = ["gaps"] matches = ["hg19-gaps", "bad-package"] @@ -520,6 +521,28 @@ def test_print_summary(): assert search.print_summary(search_term,json_dict,matches,installed_pkgs,installed_paths) == True + ## Test a meta-recipe + ggd_jdict = {u'channeldata_version': 1, u'subdirs': [u'noarch'], u'packages': {u'meta-recipe-geo-accession-geo-v1': {u'activate.d': + False, u'version': u'1', u'tags': {u'cached': [], u'ggd-channel': u'genomics', u'data-version': + u'', u'data-provider': u'GEO'}, u'post_link': True, u'binary_prefix': False, u'run_exports': {}, u'pre_unlink': + False, u'subdirs': [u'noarch'], u'deactivate.d': False, u'reference_package': + u'noarch/meta-recipe-geo-accession-geo-v1-1-0.tar.bz2', u'pre_link': False, u'keywords': [u'GEO', u'Gene Expression Omnibus'], + u'summary': u'GEO Meta-Recipe', u'text_prefix': False, u'identifiers': {u'genome-build': + u'meta-recipe', u'species': u'meta-recipe'}}}} + + search_term = ["GEO"] + matches = ["meta-recipe-geo-accession-geo-v1"] + installed_pkgs = set() + installed_paths = [] + + temp_stdout = StringIO() + args = Namespace(channel='genomics', command='search', display_number=100, genome_build=[], match_score='75', search_type = "both", search_term=['reference','grch37'], species=[]) + with redirect_stdout(temp_stdout): + assert search.print_summary(search_term,ggd_jdict,matches,installed_pkgs,installed_paths) == True + output = temp_stdout.getvalue().strip() + assert "ggd install meta-recipe-geo-accession-geo-v1 --id " in output + + def test_main_search(): """ Test the main search method with different argument parameters @@ -693,5 +716,12 @@ def test_main_search(): assert "SystemExit" in str(pytest_wrapped_e.exconly()) ## test that SystemExit was raised by sys.exit() assert pytest_wrapped_e.match("") ## Check that the exit code is 1 + ## test meta-recipe + temp_stdout = StringIO() + args = Namespace(channel='genomics', command='search', display_number=1, genome_build=[], match_score='75', search_type = "both", search_term=['GEO'], species=[]) + with redirect_stdout(temp_stdout): + search.search(parser,args) + output = temp_stdout.getvalue().strip() + assert "ggd install meta-recipe-geo-accession-geo-v1 --id " in output