From b61906b41ccc06e5493f008164c1f65dd658c7d3 Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Tue, 7 May 2024 09:49:28 -0400 Subject: [PATCH 1/9] current progress on loading specified input file and glob all files if no input file provided --- src/diffpy/labpdfproc/labpdfprocapp.py | 79 +++++++++++++---------- src/diffpy/labpdfproc/tests/test_tools.py | 45 ++++++++++++- src/diffpy/labpdfproc/tools.py | 25 +++++++ 3 files changed, 115 insertions(+), 34 deletions(-) diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py index 72ca6b7..d367ce6 100644 --- a/src/diffpy/labpdfproc/labpdfprocapp.py +++ b/src/diffpy/labpdfproc/labpdfprocapp.py @@ -3,7 +3,13 @@ from pathlib import Path from diffpy.labpdfproc.functions import apply_corr, compute_cve -from diffpy.labpdfproc.tools import known_sources, load_user_metadata, set_output_directory, set_wavelength +from diffpy.labpdfproc.tools import ( + known_sources, + load_user_metadata, + set_input_files, + set_output_directory, + set_wavelength, +) from diffpy.utils.parsers.loaddata import loadData from diffpy.utils.scattering_objects.diffraction_objects import XQUANTITIES, Diffraction_object @@ -76,45 +82,52 @@ def get_args(override_cli_inputs=None): def main(): args = get_args() - args = load_user_metadata(args) + args = set_input_files(args) args.output_directory = set_output_directory(args) args.wavelength = set_wavelength(args) + args = load_user_metadata(args) - filepath = Path(args.input_file) - outfilestem = filepath.stem + "_corrected" - corrfilestem = filepath.stem + "_cve" - outfile = args.output_directory / (outfilestem + ".chi") - corrfile = args.output_directory / (corrfilestem + ".chi") + for input_file in args.input_file: + filepath = Path(args.input_file) + outfilestem = filepath.stem + "_corrected" + corrfilestem = filepath.stem + "_cve" + outfile = args.output_directory / (outfilestem + ".chi") + corrfile = args.output_directory / (corrfilestem + ".chi") - if outfile.exists() and not args.force_overwrite: - sys.exit( - f"Output file {str(outfile)} already exists. Please rerun " - f"specifying -f if you want to overwrite it." - ) - if corrfile.exists() and args.output_correction and not args.force_overwrite: - sys.exit( - f"Corrections file {str(corrfile)} was requested and already " - f"exists. Please rerun specifying -f if you want to overwrite it." - ) + if outfile.exists() and not args.force_overwrite: + sys.exit( + f"Output file {str(outfile)} already exists. Please rerun " + f"specifying -f if you want to overwrite it." + ) + if corrfile.exists() and args.output_correction and not args.force_overwrite: + sys.exit( + f"Corrections file {str(corrfile)} was requested and already " + f"exists. Please rerun specifying -f if you want to overwrite it." + ) - input_pattern = Diffraction_object(wavelength=args.wavelength) - xarray, yarray = loadData(args.input_file, unpack=True) - input_pattern.insert_scattering_quantity( - xarray, - yarray, - "tth", - scat_quantity="x-ray", - name=str(args.input_file), - metadata={"muD": args.mud, "anode_type": args.anode_type}, - ) + input_pattern = Diffraction_object(wavelength=args.wavelength) + + try: + xarray, yarray = loadData(args.input_file, unpack=True) + except Exception as e: + raise ValueError(f"Failed to load data from {filepath}: {e}.") + + input_pattern.insert_scattering_quantity( + xarray, + yarray, + "tth", + scat_quantity="x-ray", + name=str(args.input_file), + metadata={"muD": args.mud, "anode_type": args.anode_type}, + ) - absorption_correction = compute_cve(input_pattern, args.mud, args.wavelength) - corrected_data = apply_corr(input_pattern, absorption_correction) - corrected_data.name = f"Absorption corrected input_data: {input_pattern.name}" - corrected_data.dump(f"{outfile}", xtype="tth") + absorption_correction = compute_cve(input_pattern, args.mud, args.wavelength) + corrected_data = apply_corr(input_pattern, absorption_correction) + corrected_data.name = f"Absorption corrected input_data: {input_pattern.name}" + corrected_data.dump(f"{outfile}", xtype="tth") - if args.output_correction: - absorption_correction.dump(f"{corrfile}", xtype="tth") + if args.output_correction: + absorption_correction.dump(f"{corrfile}", xtype="tth") if __name__ == "__main__": diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py index 5121e9f..6fa7aea 100644 --- a/src/diffpy/labpdfproc/tests/test_tools.py +++ b/src/diffpy/labpdfproc/tests/test_tools.py @@ -6,7 +6,50 @@ import pytest from diffpy.labpdfproc.labpdfprocapp import get_args -from diffpy.labpdfproc.tools import known_sources, load_user_metadata, set_output_directory, set_wavelength +from diffpy.labpdfproc.tools import ( + known_sources, + load_user_metadata, + set_input_files, + set_output_directory, + set_wavelength, +) +from diffpy.utils.parsers.loaddata import loadData + +params1 = [ + ( + [], + [ + ".", + ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl", "input_dir"], + ], + ), + (["--input-file", "good_data.chi"], [".", "good_data.chi"]), + (["--input-file", "input_dir/unreadable_file.txt"], ["input_dir", "input_dir/unreadable_file.txt"]), + # ([Path.cwd()], [Path.cwd()]), +] + + +@pytest.mark.parametrize("inputs, expected", params1) +def test_set_input_files(inputs, expected, user_filesystem): + expected_input_directory = Path(user_filesystem) / expected[0] + expected_input_files = expected[1] + + cli_inputs = ["2.5"] + inputs + actual_args = get_args(cli_inputs) + actual_args = set_input_files(actual_args) + assert actual_args.input_directory == expected_input_directory + assert set(actual_args.input_file) == set(expected_input_files) + + +def test_loadData_with_input_files(user_filesystem): + xarray_chi, yarray_chi = loadData("good_data.chi", unpack=True) + xarray_xy, yarray_xy = loadData("good_data.xy", unpack=True) + xarray_txt, yarray_txt = loadData("good_data.txt", unpack=True) + with pytest.raises(ValueError): + xarray_txt, yarray_txt = loadData("unreadable_file.txt", unpack=True) + with pytest.raises(ValueError): + xarray_pkl, yarray_pkl = loadData("binary.pkl", unpack=True) + params1 = [ ([None], ["."]), diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py index caa012d..f05819a 100644 --- a/src/diffpy/labpdfproc/tools.py +++ b/src/diffpy/labpdfproc/tools.py @@ -1,9 +1,34 @@ +import glob +import os from pathlib import Path WAVELENGTHS = {"Mo": 0.71, "Ag": 0.59, "Cu": 1.54} known_sources = [key for key in WAVELENGTHS.keys()] +def set_input_files(args): + """ + Set input directory and files, default is current working directory and all files in it + + Parameters + ---------- + args argparse.Namespace + the arguments from the parser + + Returns + ------- + args argparse.Namespace + + """ + input_dir = Path.cwd() / Path(args.input_file).parent if args.input_file else Path.cwd() + setattr(args, "input_directory", input_dir) + if not args.input_file: + input_files = glob.glob(str(input_dir) + "/*", recursive=True) + input_file_names = [os.path.basename(input_file_path) for input_file_path in input_files] + args.input_file = input_file_names + return args + + def set_output_directory(args): """ set the output directory based on the given input arguments From d164dbaf57d099a48cda1eccc8c43dddf6c98495 Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Tue, 7 May 2024 10:48:29 -0400 Subject: [PATCH 2/9] modified input file function to accept either one file or one directory --- src/diffpy/labpdfproc/tests/test_tools.py | 6 +++--- src/diffpy/labpdfproc/tools.py | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py index 64c4144..9a17981 100644 --- a/src/diffpy/labpdfproc/tests/test_tools.py +++ b/src/diffpy/labpdfproc/tests/test_tools.py @@ -15,14 +15,14 @@ params1 = [ ( - [], + ["--input-file", "."], [ ".", - ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl", "input_dir"], + ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], ], ), (["--input-file", "good_data.chi"], [".", "good_data.chi"]), - (["--input-file", "input_dir/unreadable_file.txt"], ["input_dir", "input_dir/unreadable_file.txt"]), + (["--input-file", "input_dir/unreadable_file.txt"], ["input_dir", "unreadable_file.txt"]), # ([Path.cwd()], [Path.cwd()]), ] diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py index f05819a..a93bbf3 100644 --- a/src/diffpy/labpdfproc/tools.py +++ b/src/diffpy/labpdfproc/tools.py @@ -8,7 +8,7 @@ def set_input_files(args): """ - Set input directory and files, default is current working directory and all files in it + Set input directory and files Parameters ---------- @@ -20,12 +20,19 @@ def set_input_files(args): args argparse.Namespace """ - input_dir = Path.cwd() / Path(args.input_file).parent if args.input_file else Path.cwd() - setattr(args, "input_directory", input_dir) - if not args.input_file: - input_files = glob.glob(str(input_dir) + "/*", recursive=True) + if not args.input_file or not Path(args.input_file).exists(): + raise ValueError("Please specify valid input file or directory.") + + if not Path(args.input_file).is_dir(): + input_dir = Path.cwd() / Path(args.input_file).parent + input_file_name = Path(args.input_file).name + args.input_file = input_file_name + else: + input_dir = Path(args.input_file).resolve() + input_files = [file for file in glob.glob(str(input_dir) + "/*", recursive=True) if os.path.isfile(file)] input_file_names = [os.path.basename(input_file_path) for input_file_path in input_files] args.input_file = input_file_names + setattr(args, "input_directory", input_dir) return args From 3c49a18316abebba908977aa5001d76c917f2fec Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Tue, 7 May 2024 12:21:00 -0400 Subject: [PATCH 3/9] added test cases for UC1-4 and made input a required argument --- src/diffpy/labpdfproc/labpdfprocapp.py | 2 +- src/diffpy/labpdfproc/tests/test_tools.py | 56 +++++++++++++++++------ src/diffpy/labpdfproc/tools.py | 15 +++--- 3 files changed, 51 insertions(+), 22 deletions(-) diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py index d367ce6..bc09b9d 100644 --- a/src/diffpy/labpdfproc/labpdfprocapp.py +++ b/src/diffpy/labpdfproc/labpdfprocapp.py @@ -17,7 +17,7 @@ def get_args(override_cli_inputs=None): p = ArgumentParser() p.add_argument("mud", help="Value of mu*D for your " "sample. Required.", type=float) - p.add_argument("-i", "--input-file", help="The filename of the " "datafile to load.") + p.add_argument("input", help="The filename or directory of the " "datafile to load.") p.add_argument( "-a", "--anode-type", diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py index 9a17981..f1f3f52 100644 --- a/src/diffpy/labpdfproc/tests/test_tools.py +++ b/src/diffpy/labpdfproc/tests/test_tools.py @@ -13,21 +13,36 @@ ) from diffpy.utils.parsers.loaddata import loadData -params1 = [ +# Use cases can be found here: https://github.com/diffpy/diffpy.labpdfproc/issues/48 +params_input = [ + (["good_data.chi"], [".", "good_data.chi"]), + (["input_dir/good_data.chi"], ["input_dir", "good_data.chi"]), + (["./input_dir/good_data.chi"], ["input_dir", "good_data.chi"]), ( - ["--input-file", "."], + ["."], [ ".", ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], ], ), - (["--input-file", "good_data.chi"], [".", "good_data.chi"]), - (["--input-file", "input_dir/unreadable_file.txt"], ["input_dir", "unreadable_file.txt"]), - # ([Path.cwd()], [Path.cwd()]), + ( + ["./input_dir"], + [ + "input_dir", + ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], + ], + ), + ( + ["input_dir"], + [ + "input_dir", + ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], + ], + ), ] -@pytest.mark.parametrize("inputs, expected", params1) +@pytest.mark.parametrize("inputs, expected", params_input) def test_set_input_files(inputs, expected, user_filesystem): expected_input_directory = Path(user_filesystem) / expected[0] expected_input_files = expected[1] @@ -39,6 +54,21 @@ def test_set_input_files(inputs, expected, user_filesystem): assert set(actual_args.input_file) == set(expected_input_files) +params_input_bad = [ + (["new_file.xy"]), + (["./input_dir/new_file.xy"]), + (["./new_dir"]), +] + + +@pytest.mark.parametrize("inputs", params_input_bad) +def test_set_input_files_bad(inputs, user_filesystem): + cli_inputs = ["2.5"] + inputs + actual_args = get_args(cli_inputs) + with pytest.raises(ValueError): + actual_args = set_input_files(actual_args) + + def test_loadData_with_input_files(user_filesystem): xarray_chi, yarray_chi = loadData("good_data.chi", unpack=True) xarray_xy, yarray_xy = loadData("good_data.xy", unpack=True) @@ -60,7 +90,7 @@ def test_loadData_with_input_files(user_filesystem): @pytest.mark.parametrize("inputs, expected", params1) def test_set_output_directory(inputs, expected, user_filesystem): expected_output_directory = Path(user_filesystem) / expected[0] - cli_inputs = ["2.5"] + inputs + cli_inputs = ["2.5", "data.xy"] + inputs actual_args = get_args(cli_inputs) actual_args.output_directory = set_output_directory(actual_args) assert actual_args.output_directory == expected_output_directory @@ -69,7 +99,7 @@ def test_set_output_directory(inputs, expected, user_filesystem): def test_set_output_directory_bad(user_filesystem): - cli_inputs = ["2.5", "--output-directory", "good_data.chi"] + cli_inputs = ["2.5", "data.xy", "--output-directory", "good_data.chi"] actual_args = get_args(cli_inputs) with pytest.raises(FileExistsError): actual_args.output_directory = set_output_directory(actual_args) @@ -88,7 +118,7 @@ def test_set_output_directory_bad(user_filesystem): @pytest.mark.parametrize("inputs, expected", params2) def test_set_wavelength(inputs, expected): expected_wavelength = expected[0] - cli_inputs = ["2.5"] + inputs + cli_inputs = ["2.5", "data.xy"] + inputs actual_args = get_args(cli_inputs) actual_args.wavelength = set_wavelength(actual_args) assert actual_args.wavelength == expected_wavelength @@ -112,7 +142,7 @@ def test_set_wavelength(inputs, expected): @pytest.mark.parametrize("inputs, msg", params3) def test_set_wavelength_bad(inputs, msg): - cli_inputs = ["2.5"] + inputs + cli_inputs = ["2.5", "data.xy"] + inputs actual_args = get_args(cli_inputs) with pytest.raises(ValueError, match=re.escape(msg[0])): actual_args.wavelength = set_wavelength(actual_args) @@ -130,12 +160,12 @@ def test_set_wavelength_bad(inputs, msg): @pytest.mark.parametrize("inputs, expected", params5) def test_load_user_metadata(inputs, expected): - expected_args = get_args(["2.5"]) + expected_args = get_args(["2.5", "data.xy"]) for expected_pair in expected: setattr(expected_args, expected_pair[0], expected_pair[1]) delattr(expected_args, "user_metadata") - cli_inputs = ["2.5"] + inputs + cli_inputs = ["2.5", "data.xy"] + inputs actual_args = get_args(cli_inputs) actual_args = load_user_metadata(actual_args) assert actual_args == expected_args @@ -172,7 +202,7 @@ def test_load_user_metadata(inputs, expected): @pytest.mark.parametrize("inputs, msg", params6) def test_load_user_metadata_bad(inputs, msg): - cli_inputs = ["2.5"] + inputs + cli_inputs = ["2.5", "data.xy"] + inputs actual_args = get_args(cli_inputs) with pytest.raises(ValueError, match=msg[0]): actual_args = load_user_metadata(actual_args) diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py index a93bbf3..70bf72a 100644 --- a/src/diffpy/labpdfproc/tools.py +++ b/src/diffpy/labpdfproc/tools.py @@ -20,19 +20,18 @@ def set_input_files(args): args argparse.Namespace """ - if not args.input_file or not Path(args.input_file).exists(): + if not Path(args.input).exists(): raise ValueError("Please specify valid input file or directory.") - if not Path(args.input_file).is_dir(): - input_dir = Path.cwd() / Path(args.input_file).parent - input_file_name = Path(args.input_file).name - args.input_file = input_file_name + if not Path(args.input).is_dir(): + input_dir = Path.cwd() / Path(args.input).parent + input_file_name = Path(args.input).name else: - input_dir = Path(args.input_file).resolve() + input_dir = Path(args.input).resolve() input_files = [file for file in glob.glob(str(input_dir) + "/*", recursive=True) if os.path.isfile(file)] - input_file_names = [os.path.basename(input_file_path) for input_file_path in input_files] - args.input_file = input_file_names + input_file_name = [os.path.basename(input_file_path) for input_file_path in input_files] setattr(args, "input_directory", input_dir) + setattr(args, "input_file", input_file_name) return args From f8b7203bcec4be870e06c05b8b525223da81e1ca Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Tue, 7 May 2024 17:11:12 -0400 Subject: [PATCH 4/9] added more test cases --- src/diffpy/labpdfproc/tests/test_tools.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py index f1f3f52..927c5cb 100644 --- a/src/diffpy/labpdfproc/tests/test_tools.py +++ b/src/diffpy/labpdfproc/tests/test_tools.py @@ -39,6 +39,11 @@ ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], ], ), + # ([".chi"], [".", ["file1.chi", "file2.chi", "file10.chi"]]), + # (["input_dir/.chi"], ["input_dir", ["file1.chi", "file2.chi", "file10.chi"]]), + # (["file1.chi", "file10.chi"], [".", ["file1.chi", "file10.chi"]]), + # (["file1.chi", "file10.chi"], [".", ["file1.chi", "file10.chi"]]), + # (["input_file_list.txt"], [".", ["file1.chi", "file10.chi"]]), ] From 3d5c5ee1d6d0ff94093e54dc8085432d01402324 Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Wed, 8 May 2024 12:39:57 -0400 Subject: [PATCH 5/9] included comments for tests --- src/diffpy/labpdfproc/labpdfprocapp.py | 2 +- src/diffpy/labpdfproc/tests/test_tools.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py index bc09b9d..6666ccf 100644 --- a/src/diffpy/labpdfproc/labpdfprocapp.py +++ b/src/diffpy/labpdfproc/labpdfprocapp.py @@ -17,7 +17,7 @@ def get_args(override_cli_inputs=None): p = ArgumentParser() p.add_argument("mud", help="Value of mu*D for your " "sample. Required.", type=float) - p.add_argument("input", help="The filename or directory of the " "datafile to load.") + p.add_argument("input", help="The filename or directory of the datafile to load. Required.") p.add_argument( "-a", "--anode-type", diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py index 927c5cb..cf6dc38 100644 --- a/src/diffpy/labpdfproc/tests/test_tools.py +++ b/src/diffpy/labpdfproc/tests/test_tools.py @@ -14,6 +14,9 @@ from diffpy.utils.parsers.loaddata import loadData # Use cases can be found here: https://github.com/diffpy/diffpy.labpdfproc/issues/48 + +# This test covers existing single input file or directory +# We store absolute path into input_directory and file names into input_file params_input = [ (["good_data.chi"], [".", "good_data.chi"]), (["input_dir/good_data.chi"], ["input_dir", "good_data.chi"]), @@ -39,11 +42,6 @@ ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], ], ), - # ([".chi"], [".", ["file1.chi", "file2.chi", "file10.chi"]]), - # (["input_dir/.chi"], ["input_dir", ["file1.chi", "file2.chi", "file10.chi"]]), - # (["file1.chi", "file10.chi"], [".", ["file1.chi", "file10.chi"]]), - # (["file1.chi", "file10.chi"], [".", ["file1.chi", "file10.chi"]]), - # (["input_file_list.txt"], [".", ["file1.chi", "file10.chi"]]), ] @@ -59,21 +57,23 @@ def test_set_input_files(inputs, expected, user_filesystem): assert set(actual_args.input_file) == set(expected_input_files) +# This test covers non-existing single input file or directory, in this case we raise an error with message params_input_bad = [ - (["new_file.xy"]), - (["./input_dir/new_file.xy"]), - (["./new_dir"]), + (["non_existing_file.xy"], "Please specify valid input file or directory."), + (["./input_dir/non_existing_file.xy"], "Please specify valid input file or directory."), + (["./non_existing_dir"], "Please specify valid input file or directory."), ] -@pytest.mark.parametrize("inputs", params_input_bad) -def test_set_input_files_bad(inputs, user_filesystem): +@pytest.mark.parametrize("inputs, msg", params_input_bad) +def test_set_input_files_bad(inputs, msg, user_filesystem): cli_inputs = ["2.5"] + inputs actual_args = get_args(cli_inputs) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=msg[0]): actual_args = set_input_files(actual_args) +# Pass files to loadData and use it to check if file is valid or not def test_loadData_with_input_files(user_filesystem): xarray_chi, yarray_chi = loadData("good_data.chi", unpack=True) xarray_xy, yarray_xy = loadData("good_data.xy", unpack=True) From 06ae14b64ac2c3add660852cc7361ccc57ed8166 Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Wed, 8 May 2024 23:27:17 -0400 Subject: [PATCH 6/9] added tests for a file list and edited help message addressing the rules for inputing a file list --- src/diffpy/labpdfproc/labpdfprocapp.py | 8 ++++++- src/diffpy/labpdfproc/tests/conftest.py | 9 ++++++++ src/diffpy/labpdfproc/tests/test_tools.py | 26 ++++++++++++++++++++++- src/diffpy/labpdfproc/tools.py | 23 +++++++++++++++++++- 4 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py index 6666ccf..adaf0aa 100644 --- a/src/diffpy/labpdfproc/labpdfprocapp.py +++ b/src/diffpy/labpdfproc/labpdfprocapp.py @@ -17,7 +17,13 @@ def get_args(override_cli_inputs=None): p = ArgumentParser() p.add_argument("mud", help="Value of mu*D for your " "sample. Required.", type=float) - p.add_argument("input", help="The filename or directory of the datafile to load. Required.") + p.add_argument( + "input", + help="The filename or directory of the datafile to load. Required. " + "Supports either a single input file or directory, or a file containing a list of files. " + "If providing a file list, please ensure all files are in the same directory as the file list, " + "and each filename is written line by line in the file list. ", + ) p.add_argument( "-a", "--anode-type", diff --git a/src/diffpy/labpdfproc/tests/conftest.py b/src/diffpy/labpdfproc/tests/conftest.py index 1e4ab40..9296425 100644 --- a/src/diffpy/labpdfproc/tests/conftest.py +++ b/src/diffpy/labpdfproc/tests/conftest.py @@ -39,4 +39,13 @@ def user_filesystem(tmp_path): with open(os.path.join(input_dir, "binary.pkl"), "wb") as f: f.write(binary_data) + file_list_dir = Path(tmp_path).resolve() / "file_list_dir" + file_list_dir.mkdir(parents=True, exist_ok=True) + with open(os.path.join(file_list_dir, "file_list.txt"), "w") as f: + f.write("good_data.chi \n good_data.xy \n good_data.txt") + with open(os.path.join(file_list_dir, "invalid_file_list.txt"), "w") as f: + f.write("good_data.chi \n non_existing_file.xy \n non_existing_file.txt") + with open(os.path.join(file_list_dir, "invalid_format_file_list.txt"), "w") as f: + f.write("good_data.chi good_data.xy \n non_existing_file.txt") + yield tmp_path diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py index cf6dc38..7d475d7 100644 --- a/src/diffpy/labpdfproc/tests/test_tools.py +++ b/src/diffpy/labpdfproc/tests/test_tools.py @@ -1,3 +1,4 @@ +import os import re from pathlib import Path @@ -15,7 +16,7 @@ # Use cases can be found here: https://github.com/diffpy/diffpy.labpdfproc/issues/48 -# This test covers existing single input file or directory +# This test covers existing single input file, directory, or a file list # We store absolute path into input_directory and file names into input_file params_input = [ (["good_data.chi"], [".", "good_data.chi"]), @@ -42,6 +43,7 @@ ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], ], ), + (["file_list_dir/file_list.txt"], ["file_list_dir", ["good_data.chi", "good_data.xy", "good_data.txt"]]), ] @@ -57,6 +59,28 @@ def test_set_input_files(inputs, expected, user_filesystem): assert set(actual_args.input_file) == set(expected_input_files) +# This test is for existing single input file or directory absolute path not in cwd +# Here we are in user_filesystem/input_dir, testing for a file or directory in user_filesystem +params_input_not_cwd = [ + (["good_data.chi"], [".", "good_data.chi"]), + (["."], [".", ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"]]), +] + + +@pytest.mark.parametrize("inputs, expected", params_input_not_cwd) +def test_set_input_files_not_cwd(inputs, expected, user_filesystem): + expected_input_directory = Path(user_filesystem) / expected[0] + expected_input_files = expected[1] + actual_input = [str(Path(user_filesystem) / inputs[0])] + os.chdir("input_dir") + + cli_inputs = ["2.5"] + actual_input + actual_args = get_args(cli_inputs) + actual_args = set_input_files(actual_args) + assert actual_args.input_directory == expected_input_directory + assert set(actual_args.input_file) == set(expected_input_files) + + # This test covers non-existing single input file or directory, in this case we raise an error with message params_input_bad = [ (["non_existing_file.xy"], "Please specify valid input file or directory."), diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py index 70bf72a..f13543c 100644 --- a/src/diffpy/labpdfproc/tools.py +++ b/src/diffpy/labpdfproc/tools.py @@ -15,21 +15,42 @@ def set_input_files(args): args argparse.Namespace the arguments from the parser + It is implemented as this: + If input is a file, we first try to read it as a file list and store all listed file names. + If any filename is invalid, then proceed to treat it as a data file. + Otherwise if we have a directory, glob all files within it. + Returns ------- args argparse.Namespace """ + if not Path(args.input).exists(): raise ValueError("Please specify valid input file or directory.") if not Path(args.input).is_dir(): input_dir = Path.cwd() / Path(args.input).parent - input_file_name = Path(args.input).name + file_names = [] + with open(args.input, "r") as f: + for line in f: + if not os.path.isfile(line.strip()): + file_names = [] + break + else: + file_name = line.strip() + file_names.append(file_name) + + if len(file_names) > 0: + input_file_name = file_names + else: + input_file_name = Path(args.input).name + else: input_dir = Path(args.input).resolve() input_files = [file for file in glob.glob(str(input_dir) + "/*", recursive=True) if os.path.isfile(file)] input_file_name = [os.path.basename(input_file_path) for input_file_path in input_files] + setattr(args, "input_directory", input_dir) setattr(args, "input_file", input_file_name) return args From a02b573996117daf4166c84439f9a7b811c8a7a3 Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Thu, 9 May 2024 18:38:51 -0400 Subject: [PATCH 7/9] added tests for file list and multiple files --- src/diffpy/labpdfproc/labpdfprocapp.py | 5 +- src/diffpy/labpdfproc/tests/conftest.py | 10 ++-- src/diffpy/labpdfproc/tests/test_tools.py | 37 ++++++++++----- src/diffpy/labpdfproc/tools.py | 56 +++++++++++++++-------- 4 files changed, 68 insertions(+), 40 deletions(-) diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py index adaf0aa..1d08cb9 100644 --- a/src/diffpy/labpdfproc/labpdfprocapp.py +++ b/src/diffpy/labpdfproc/labpdfprocapp.py @@ -19,10 +19,9 @@ def get_args(override_cli_inputs=None): p.add_argument("mud", help="Value of mu*D for your " "sample. Required.", type=float) p.add_argument( "input", + nargs="+", help="The filename or directory of the datafile to load. Required. " - "Supports either a single input file or directory, or a file containing a list of files. " - "If providing a file list, please ensure all files are in the same directory as the file list, " - "and each filename is written line by line in the file list. ", + "Supports either a single input file, a directory, a file containing a list of files, or multiple files. ", ) p.add_argument( "-a", diff --git a/src/diffpy/labpdfproc/tests/conftest.py b/src/diffpy/labpdfproc/tests/conftest.py index 9296425..075428c 100644 --- a/src/diffpy/labpdfproc/tests/conftest.py +++ b/src/diffpy/labpdfproc/tests/conftest.py @@ -42,10 +42,10 @@ def user_filesystem(tmp_path): file_list_dir = Path(tmp_path).resolve() / "file_list_dir" file_list_dir.mkdir(parents=True, exist_ok=True) with open(os.path.join(file_list_dir, "file_list.txt"), "w") as f: - f.write("good_data.chi \n good_data.xy \n good_data.txt") - with open(os.path.join(file_list_dir, "invalid_file_list.txt"), "w") as f: - f.write("good_data.chi \n non_existing_file.xy \n non_existing_file.txt") - with open(os.path.join(file_list_dir, "invalid_format_file_list.txt"), "w") as f: - f.write("good_data.chi good_data.xy \n non_existing_file.txt") + f.write("good_data.chi \n good_data.xy \n good_data.txt \n missing_file.txt") + with open(os.path.join(file_list_dir, "file_list_example2.txt"), "w") as f: + f.write("input_dir/good_data.chi \n") + f.write("good_data.xy \n") + f.write(str(os.path.abspath(os.path.join(input_dir, "good_data.txt"))) + "\n") yield tmp_path diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py index 7d475d7..0cbc41e 100644 --- a/src/diffpy/labpdfproc/tests/test_tools.py +++ b/src/diffpy/labpdfproc/tests/test_tools.py @@ -16,34 +16,47 @@ # Use cases can be found here: https://github.com/diffpy/diffpy.labpdfproc/issues/48 -# This test covers existing single input file, directory, or a file list -# We store absolute path into input_directory and file names into input_file +# This test covers existing single input file, directory, a file list, and multiple files +# We store absolute paths into input_directory and file names into input_file params_input = [ - (["good_data.chi"], [".", "good_data.chi"]), - (["input_dir/good_data.chi"], ["input_dir", "good_data.chi"]), - (["./input_dir/good_data.chi"], ["input_dir", "good_data.chi"]), - ( + (["good_data.chi"], [".", "good_data.chi"]), # single good file, same directory + (["input_dir/good_data.chi"], ["input_dir", "good_data.chi"]), # single good file, input directory + ( # glob current directory ["."], [ ".", ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], ], ), - ( + ( # glob input directory ["./input_dir"], [ "input_dir", ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], ], ), - ( - ["input_dir"], + ( # list of files provided (we skip if encountering an invalid files) + ["good_data.chi", "good_data.xy", "unreadable_file.txt", "missing_file.txt"], [ - "input_dir", - ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], + ".", + ["good_data.chi", "good_data.xy", "unreadable_file.txt"], ], ), - (["file_list_dir/file_list.txt"], ["file_list_dir", ["good_data.chi", "good_data.xy", "good_data.txt"]]), + ( # list of files provided (with invalid files and files in different directory) + ["input_dir/good_data.chi", "good_data.xy", "missing_file.txt"], + [ + ".", + ["input_dir/good_data.chi", "good_data.xy"], + ], + ), + ( # file_list.txt list of files provided + ["file_list_dir/file_list.txt"], + [".", ["good_data.chi", "good_data.xy", "good_data.txt"]], + ), + ( # file_list_example2.txt list of files provided with different paths + ["file_list_dir/file_list_example2.txt"], + [".", ["input_dir/good_data.chi", "good_data.xy", "input_dir/good_data.txt"]], + ), ] diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py index f13543c..0c5b753 100644 --- a/src/diffpy/labpdfproc/tools.py +++ b/src/diffpy/labpdfproc/tools.py @@ -15,10 +15,12 @@ def set_input_files(args): args argparse.Namespace the arguments from the parser - It is implemented as this: + It is implemented as the following: + If user input multiple files, we store their common directory as input directory and all of their names. If input is a file, we first try to read it as a file list and store all listed file names. - If any filename is invalid, then proceed to treat it as a data file. + If the first filename is invalid, then we proceed to treat it as a data file. Otherwise if we have a directory, glob all files within it. + If there are any invalid filenames (for the cases of multiple files, file list, or directory), we skip them. Returns ------- @@ -26,28 +28,42 @@ def set_input_files(args): """ - if not Path(args.input).exists(): + if len(args.input) > 1: + input_paths = [] + input_paths_parent = [] + for input in args.input: + if Path(input).is_file(): + input_paths.append(Path(input).resolve()) + input_paths_parent.append(Path(input).resolve().parent) + input_dir = Path(os.path.commonprefix([str(path) for path in input_paths_parent])) + input_file_name = [str(path.relative_to(input_dir)) for path in input_paths] + setattr(args, "input_directory", input_dir) + setattr(args, "input_file", input_file_name) + return args + + if not Path(args.input[0]).exists(): raise ValueError("Please specify valid input file or directory.") - if not Path(args.input).is_dir(): - input_dir = Path.cwd() / Path(args.input).parent - file_names = [] - with open(args.input, "r") as f: - for line in f: - if not os.path.isfile(line.strip()): - file_names = [] - break - else: - file_name = line.strip() - file_names.append(file_name) - - if len(file_names) > 0: - input_file_name = file_names - else: - input_file_name = Path(args.input).name + if not Path(args.input[0]).is_dir(): + input_paths = [] + input_paths_parent = [] + with open(args.input[0], "r") as f: + lines = [line.strip() for line in f] + if not os.path.isfile(lines[0]): + input_dir = Path.cwd() / Path(args.input[0]).parent + input_file_name = Path(args.input[0]).name + else: + for line in lines: + if not os.path.isfile(line): + continue + else: + input_paths.append(Path(line).resolve()) + input_paths_parent.append(Path(line).resolve().parent) + input_dir = Path(os.path.commonprefix([str(path) for path in input_paths_parent])) + input_file_name = [str(path.relative_to(input_dir)) for path in input_paths] else: - input_dir = Path(args.input).resolve() + input_dir = Path(args.input[0]).resolve() input_files = [file for file in glob.glob(str(input_dir) + "/*", recursive=True) if os.path.isfile(file)] input_file_name = [os.path.basename(input_file_path) for input_file_path in input_files] From 7d39a74f08a1443a9c7cd14c0780348c468cda3f Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Thu, 9 May 2024 18:55:46 -0400 Subject: [PATCH 8/9] fix grammar --- src/diffpy/labpdfproc/tests/test_tools.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py index 0cbc41e..ed02838 100644 --- a/src/diffpy/labpdfproc/tests/test_tools.py +++ b/src/diffpy/labpdfproc/tests/test_tools.py @@ -17,7 +17,7 @@ # Use cases can be found here: https://github.com/diffpy/diffpy.labpdfproc/issues/48 # This test covers existing single input file, directory, a file list, and multiple files -# We store absolute paths into input_directory and file names into input_file +# We store absolute path into input_directory and file names into input_file params_input = [ (["good_data.chi"], [".", "good_data.chi"]), # single good file, same directory (["input_dir/good_data.chi"], ["input_dir", "good_data.chi"]), # single good file, input directory @@ -35,14 +35,14 @@ ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], ], ), - ( # list of files provided (we skip if encountering an invalid files) + ( # list of files provided (we skip if encountering invalid files) ["good_data.chi", "good_data.xy", "unreadable_file.txt", "missing_file.txt"], [ ".", ["good_data.chi", "good_data.xy", "unreadable_file.txt"], ], ), - ( # list of files provided (with invalid files and files in different directory) + ( # list of files provided (with invalid files and files in different directories) ["input_dir/good_data.chi", "good_data.xy", "missing_file.txt"], [ ".", @@ -53,7 +53,7 @@ ["file_list_dir/file_list.txt"], [".", ["good_data.chi", "good_data.xy", "good_data.txt"]], ), - ( # file_list_example2.txt list of files provided with different paths + ( # file_list_example2.txt list of files provided in different directories ["file_list_dir/file_list_example2.txt"], [".", ["input_dir/good_data.chi", "good_data.xy", "input_dir/good_data.txt"]], ), From ead58300b3853001117a6f44b0707fa3f7d79221 Mon Sep 17 00:00:00 2001 From: yucongalicechen Date: Fri, 10 May 2024 00:29:44 -0400 Subject: [PATCH 9/9] intermediate process (more tests need to be added): using input_directory only for simplication --- src/diffpy/labpdfproc/labpdfprocapp.py | 11 ++-- src/diffpy/labpdfproc/tests/test_tools.py | 58 ++++++++--------- src/diffpy/labpdfproc/tools.py | 79 +++++++++++------------ 3 files changed, 68 insertions(+), 80 deletions(-) diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py index 1d08cb9..ef3f253 100644 --- a/src/diffpy/labpdfproc/labpdfprocapp.py +++ b/src/diffpy/labpdfproc/labpdfprocapp.py @@ -1,6 +1,5 @@ import sys from argparse import ArgumentParser -from pathlib import Path from diffpy.labpdfproc.functions import apply_corr, compute_cve from diffpy.labpdfproc.tools import ( @@ -20,8 +19,11 @@ def get_args(override_cli_inputs=None): p.add_argument( "input", nargs="+", - help="The filename or directory of the datafile to load. Required. " - "Supports either a single input file, a directory, a file containing a list of files, or multiple files. ", + help="The filename(s) or folder(s) of the datafile(s) to load. Required. " + "Supports multiple arguments of input file or directory. " + "The file can be either a data file or a file containing a list of files. " + "If a directory is provided, we will load all data files in it. " + "For example, file.xy, data/file.xy, file_list.txt, ./data/file.xy, ./data are all valid inputs. ", ) p.add_argument( "-a", @@ -92,8 +94,7 @@ def main(): args.wavelength = set_wavelength(args) args = load_user_metadata(args) - for input_file in args.input_file: - filepath = Path(args.input_file) + for filepath in args.input_directory: outfilestem = filepath.stem + "_corrected" corrfilestem = filepath.stem + "_cve" outfile = args.output_directory / (outfilestem + ".chi") diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py index ed02838..717e5eb 100644 --- a/src/diffpy/labpdfproc/tests/test_tools.py +++ b/src/diffpy/labpdfproc/tests/test_tools.py @@ -19,86 +19,80 @@ # This test covers existing single input file, directory, a file list, and multiple files # We store absolute path into input_directory and file names into input_file params_input = [ - (["good_data.chi"], [".", "good_data.chi"]), # single good file, same directory - (["input_dir/good_data.chi"], ["input_dir", "good_data.chi"]), # single good file, input directory + (["good_data.chi"], ["good_data.chi"]), # single good file, same directory + (["input_dir/good_data.chi"], ["input_dir/good_data.chi"]), # single good file, input directory ( # glob current directory ["."], - [ - ".", - ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], - ], + ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], ), ( # glob input directory ["./input_dir"], [ - "input_dir", - ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], + "input_dir/good_data.chi", + "input_dir/good_data.xy", + "input_dir/good_data.txt", + "input_dir/unreadable_file.txt", + "input_dir/binary.pkl", ], ), ( # list of files provided (we skip if encountering invalid files) ["good_data.chi", "good_data.xy", "unreadable_file.txt", "missing_file.txt"], - [ - ".", - ["good_data.chi", "good_data.xy", "unreadable_file.txt"], - ], + ["good_data.chi", "good_data.xy", "unreadable_file.txt"], ), ( # list of files provided (with invalid files and files in different directories) - ["input_dir/good_data.chi", "good_data.xy", "missing_file.txt"], - [ - ".", - ["input_dir/good_data.chi", "good_data.xy"], - ], + ["input_dir/good_data.chi", "good_data.chi", "missing_file.txt"], + ["input_dir/good_data.chi", "good_data.chi"], ), ( # file_list.txt list of files provided ["file_list_dir/file_list.txt"], - [".", ["good_data.chi", "good_data.xy", "good_data.txt"]], + ["good_data.chi", "good_data.xy", "good_data.txt"], ), ( # file_list_example2.txt list of files provided in different directories ["file_list_dir/file_list_example2.txt"], - [".", ["input_dir/good_data.chi", "good_data.xy", "input_dir/good_data.txt"]], + ["input_dir/good_data.chi", "good_data.xy", "input_dir/good_data.txt"], ), ] @pytest.mark.parametrize("inputs, expected", params_input) def test_set_input_files(inputs, expected, user_filesystem): - expected_input_directory = Path(user_filesystem) / expected[0] - expected_input_files = expected[1] + expected_input_directory = [] + for expected_path in expected: + expected_input_directory.append(Path(user_filesystem) / expected_path) cli_inputs = ["2.5"] + inputs actual_args = get_args(cli_inputs) actual_args = set_input_files(actual_args) - assert actual_args.input_directory == expected_input_directory - assert set(actual_args.input_file) == set(expected_input_files) + assert set(actual_args.input_directory) == set(expected_input_directory) # This test is for existing single input file or directory absolute path not in cwd # Here we are in user_filesystem/input_dir, testing for a file or directory in user_filesystem params_input_not_cwd = [ - (["good_data.chi"], [".", "good_data.chi"]), - (["."], [".", ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"]]), + (["good_data.chi"], ["good_data.chi"]), + (["."], ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"]), ] @pytest.mark.parametrize("inputs, expected", params_input_not_cwd) def test_set_input_files_not_cwd(inputs, expected, user_filesystem): - expected_input_directory = Path(user_filesystem) / expected[0] - expected_input_files = expected[1] + expected_input_directory = [] + for expected_path in expected: + expected_input_directory.append(Path(user_filesystem) / expected_path) actual_input = [str(Path(user_filesystem) / inputs[0])] os.chdir("input_dir") cli_inputs = ["2.5"] + actual_input actual_args = get_args(cli_inputs) actual_args = set_input_files(actual_args) - assert actual_args.input_directory == expected_input_directory - assert set(actual_args.input_file) == set(expected_input_files) + assert set(actual_args.input_directory) == set(expected_input_directory) # This test covers non-existing single input file or directory, in this case we raise an error with message params_input_bad = [ - (["non_existing_file.xy"], "Please specify valid input file or directory."), - (["./input_dir/non_existing_file.xy"], "Please specify valid input file or directory."), - (["./non_existing_dir"], "Please specify valid input file or directory."), + (["non_existing_file.xy"], "Please specify at least one valid input file or directory."), + (["./input_dir/non_existing_file.xy"], "Please specify at least one valid input file or directory."), + (["./non_existing_dir"], "Please specify at least one valid input file or directory."), ] diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py index 0c5b753..df3139d 100644 --- a/src/diffpy/labpdfproc/tools.py +++ b/src/diffpy/labpdfproc/tools.py @@ -16,11 +16,12 @@ def set_input_files(args): the arguments from the parser It is implemented as the following: - If user input multiple files, we store their common directory as input directory and all of their names. + For each input, we try to read it as a file or a directory. If input is a file, we first try to read it as a file list and store all listed file names. If the first filename is invalid, then we proceed to treat it as a data file. Otherwise if we have a directory, glob all files within it. - If there are any invalid filenames (for the cases of multiple files, file list, or directory), we skip them. + If any file does not exist, we raise a ValueError telling which file(s) does not exist. + If all files are invalid, we raise an Error telling user to specify at least one valid file or directory. Returns ------- @@ -28,47 +29,39 @@ def set_input_files(args): """ - if len(args.input) > 1: - input_paths = [] - input_paths_parent = [] - for input in args.input: - if Path(input).is_file(): - input_paths.append(Path(input).resolve()) - input_paths_parent.append(Path(input).resolve().parent) - input_dir = Path(os.path.commonprefix([str(path) for path in input_paths_parent])) - input_file_name = [str(path.relative_to(input_dir)) for path in input_paths] - setattr(args, "input_directory", input_dir) - setattr(args, "input_file", input_file_name) - return args - - if not Path(args.input[0]).exists(): - raise ValueError("Please specify valid input file or directory.") - - if not Path(args.input[0]).is_dir(): - input_paths = [] - input_paths_parent = [] - with open(args.input[0], "r") as f: - lines = [line.strip() for line in f] - if not os.path.isfile(lines[0]): - input_dir = Path.cwd() / Path(args.input[0]).parent - input_file_name = Path(args.input[0]).name - else: - for line in lines: - if not os.path.isfile(line): - continue - else: - input_paths.append(Path(line).resolve()) - input_paths_parent.append(Path(line).resolve().parent) - input_dir = Path(os.path.commonprefix([str(path) for path in input_paths_parent])) - input_file_name = [str(path.relative_to(input_dir)) for path in input_paths] - - else: - input_dir = Path(args.input[0]).resolve() - input_files = [file for file in glob.glob(str(input_dir) + "/*", recursive=True) if os.path.isfile(file)] - input_file_name = [os.path.basename(input_file_path) for input_file_path in input_files] - - setattr(args, "input_directory", input_dir) - setattr(args, "input_file", input_file_name) + input_paths = [] + for input in args.input: + try: + if Path(input).exists(): + if not Path(input).is_dir(): + with open(args.input[0], "r") as f: + lines = [line.strip() for line in f] + if not os.path.isfile(lines[0]): + input_paths.append(Path(input).resolve()) + else: + for line in lines: + try: + if os.path.isfile(line): + input_paths.append(Path(line).resolve()) + except Exception as e: + raise ValueError(f"{line} does not exist. {e}.") + + else: + input_dir = Path(input).resolve() + input_files = [ + Path(file).resolve() + for file in glob.glob(str(input_dir) + "/*", recursive=True) + if os.path.isfile(file) + ] + input_paths.extend(input_files) + + except Exception as e: + raise ValueError(f"{input} does not exist. {e}.") + + if len(input_paths) == 0: + raise ValueError("Please specify at least one valid input file or directory.") + + setattr(args, "input_directory", input_paths) return args