diffpy · yucongalicechen · May 7, 2024 · May 7, 2024 · May 7, 2024 · May 7, 2024
diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py
@@ -3,15 +3,27 @@
 from pathlib import Path
 
 from diffpy.labpdfproc.functions import apply_corr, compute_cve
-from diffpy.labpdfproc.tools import known_sources, load_user_metadata, set_output_directory, set_wavelength
+from diffpy.labpdfproc.tools import (
+    known_sources,
+    load_user_metadata,
+    set_input_files,
+    set_output_directory,
+    set_wavelength,
+)
 from diffpy.utils.parsers.loaddata import loadData
 from diffpy.utils.scattering_objects.diffraction_objects import XQUANTITIES, Diffraction_object
 
 
 def get_args(override_cli_inputs=None):
     p = ArgumentParser()
     p.add_argument("mud", help="Value of mu*D for your " "sample. Required.", type=float)
-    p.add_argument("-i", "--input-file", help="The filename of the " "datafile to load.")
+    p.add_argument(
+        "input",
+        help="The filename or directory of the datafile to load. Required. "
+        "Supports either a single input file or directory, or a file containing a list of files. "
+        "If providing a file list, please ensure all files are in the same directory as the file list, "
+        "and each filename is written line by line in the file list. ",
+    )
     p.add_argument(
         "-a",
         "--anode-type",
@@ -76,45 +88,52 @@ def get_args(override_cli_inputs=None):
 
 def main():
     args = get_args()
+    args = set_input_files(args)
     args.output_directory = set_output_directory(args)
     args.wavelength = set_wavelength(args)
     args = load_user_metadata(args)
 
-    filepath = Path(args.input_file)
-    outfilestem = filepath.stem + "_corrected"
-    corrfilestem = filepath.stem + "_cve"
-    outfile = args.output_directory / (outfilestem + ".chi")
-    corrfile = args.output_directory / (corrfilestem + ".chi")
+    for input_file in args.input_file:
+        filepath = Path(args.input_file)
+        outfilestem = filepath.stem + "_corrected"
+        corrfilestem = filepath.stem + "_cve"
+        outfile = args.output_directory / (outfilestem + ".chi")
+        corrfile = args.output_directory / (corrfilestem + ".chi")
 
-    if outfile.exists() and not args.force_overwrite:
-        sys.exit(
-            f"Output file {str(outfile)} already exists. Please rerun "
-            f"specifying -f if you want to overwrite it."
-        )
-    if corrfile.exists() and args.output_correction and not args.force_overwrite:
-        sys.exit(
-            f"Corrections file {str(corrfile)} was requested and already "
-            f"exists. Please rerun specifying -f if you want to overwrite it."
-        )
+        if outfile.exists() and not args.force_overwrite:
+            sys.exit(
+                f"Output file {str(outfile)} already exists. Please rerun "
+                f"specifying -f if you want to overwrite it."
+            )
+        if corrfile.exists() and args.output_correction and not args.force_overwrite:
+            sys.exit(
+                f"Corrections file {str(corrfile)} was requested and already "
+                f"exists. Please rerun specifying -f if you want to overwrite it."
+            )
 
-    input_pattern = Diffraction_object(wavelength=args.wavelength)
-    xarray, yarray = loadData(args.input_file, unpack=True)
-    input_pattern.insert_scattering_quantity(
-        xarray,
-        yarray,
-        "tth",
-        scat_quantity="x-ray",
-        name=str(args.input_file),
-        metadata={"muD": args.mud, "anode_type": args.anode_type},
-    )
+        input_pattern = Diffraction_object(wavelength=args.wavelength)
+
+        try:
+            xarray, yarray = loadData(args.input_file, unpack=True)
+        except Exception as e:
+            raise ValueError(f"Failed to load data from {filepath}: {e}.")
+
+        input_pattern.insert_scattering_quantity(
+            xarray,
+            yarray,
+            "tth",
+            scat_quantity="x-ray",
+            name=str(args.input_file),
+            metadata={"muD": args.mud, "anode_type": args.anode_type},
+        )
 
-    absorption_correction = compute_cve(input_pattern, args.mud, args.wavelength)
-    corrected_data = apply_corr(input_pattern, absorption_correction)
-    corrected_data.name = f"Absorption corrected input_data: {input_pattern.name}"
-    corrected_data.dump(f"{outfile}", xtype="tth")
+        absorption_correction = compute_cve(input_pattern, args.mud, args.wavelength)
+        corrected_data = apply_corr(input_pattern, absorption_correction)
+        corrected_data.name = f"Absorption corrected input_data: {input_pattern.name}"
+        corrected_data.dump(f"{outfile}", xtype="tth")
 
-    if args.output_correction:
-        absorption_correction.dump(f"{corrfile}", xtype="tth")
+        if args.output_correction:
+            absorption_correction.dump(f"{corrfile}", xtype="tth")
 
 
 if __name__ == "__main__":

diff --git a/src/diffpy/labpdfproc/tests/conftest.py b/src/diffpy/labpdfproc/tests/conftest.py
@@ -39,4 +39,13 @@ def user_filesystem(tmp_path):
     with open(os.path.join(input_dir, "binary.pkl"), "wb") as f:
         f.write(binary_data)
 
+    file_list_dir = Path(tmp_path).resolve() / "file_list_dir"
+    file_list_dir.mkdir(parents=True, exist_ok=True)
+    with open(os.path.join(file_list_dir, "file_list.txt"), "w") as f:
+        f.write("good_data.chi \n good_data.xy \n good_data.txt")
+    with open(os.path.join(file_list_dir, "invalid_file_list.txt"), "w") as f:
+        f.write("good_data.chi \n non_existing_file.xy \n non_existing_file.txt")
+    with open(os.path.join(file_list_dir, "invalid_format_file_list.txt"), "w") as f:
+        f.write("good_data.chi good_data.xy \n non_existing_file.txt")
+
     yield tmp_path
diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py
@@ -1,10 +1,112 @@
+import os
 import re
 from pathlib import Path
 
 import pytest
 
 from diffpy.labpdfproc.labpdfprocapp import get_args
-from diffpy.labpdfproc.tools import known_sources, load_user_metadata, set_output_directory, set_wavelength
+from diffpy.labpdfproc.tools import (
+    known_sources,
+    load_user_metadata,
+    set_input_files,
+    set_output_directory,
+    set_wavelength,
+)
+from diffpy.utils.parsers.loaddata import loadData
+
+# Use cases can be found here: https://github.com/diffpy/diffpy.labpdfproc/issues/48
+
+# This test covers existing single input file, directory, or a file list
+# We store absolute path into input_directory and file names into input_file
+params_input = [
+    (["good_data.chi"], [".", "good_data.chi"]),
+    (["input_dir/good_data.chi"], ["input_dir", "good_data.chi"]),
+    (["./input_dir/good_data.chi"], ["input_dir", "good_data.chi"]),
+    (
+        ["."],
+        [
+            ".",
+            ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"],
+        ],
+    ),
+    (
+        ["./input_dir"],
+        [
+            "input_dir",
+            ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"],
+        ],
+    ),
+    (
+        ["input_dir"],
+        [
+            "input_dir",
+            ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"],
+        ],
+    ),
+    (["file_list_dir/file_list.txt"], ["file_list_dir", ["good_data.chi", "good_data.xy", "good_data.txt"]]),
+]
+
+
+@pytest.mark.parametrize("inputs, expected", params_input)
+def test_set_input_files(inputs, expected, user_filesystem):
+    expected_input_directory = Path(user_filesystem) / expected[0]
+    expected_input_files = expected[1]
+
+    cli_inputs = ["2.5"] + inputs
+    actual_args = get_args(cli_inputs)
+    actual_args = set_input_files(actual_args)
+    assert actual_args.input_directory == expected_input_directory
+    assert set(actual_args.input_file) == set(expected_input_files)
+
+
+# This test is for existing single input file or directory absolute path not in cwd
+# Here we are in user_filesystem/input_dir, testing for a file or directory in user_filesystem
+params_input_not_cwd = [
+    (["good_data.chi"], [".", "good_data.chi"]),
+    (["."], [".", ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"]]),
+]
+
+
+@pytest.mark.parametrize("inputs, expected", params_input_not_cwd)
+def test_set_input_files_not_cwd(inputs, expected, user_filesystem):
+    expected_input_directory = Path(user_filesystem) / expected[0]
+    expected_input_files = expected[1]
+    actual_input = [str(Path(user_filesystem) / inputs[0])]
+    os.chdir("input_dir")
+
+    cli_inputs = ["2.5"] + actual_input
+    actual_args = get_args(cli_inputs)
+    actual_args = set_input_files(actual_args)
+    assert actual_args.input_directory == expected_input_directory
+    assert set(actual_args.input_file) == set(expected_input_files)
+
+
+# This test covers non-existing single input file or directory, in this case we raise an error with message
+params_input_bad = [
+    (["non_existing_file.xy"], "Please specify valid input file or directory."),
+    (["./input_dir/non_existing_file.xy"], "Please specify valid input file or directory."),
+    (["./non_existing_dir"], "Please specify valid input file or directory."),
+]
+
+
+@pytest.mark.parametrize("inputs, msg", params_input_bad)
+def test_set_input_files_bad(inputs, msg, user_filesystem):
+    cli_inputs = ["2.5"] + inputs
+    actual_args = get_args(cli_inputs)
+    with pytest.raises(ValueError, match=msg[0]):
+        actual_args = set_input_files(actual_args)
+
+
+# Pass files to loadData and use it to check if file is valid or not
+def test_loadData_with_input_files(user_filesystem):
+    xarray_chi, yarray_chi = loadData("good_data.chi", unpack=True)
+    xarray_xy, yarray_xy = loadData("good_data.xy", unpack=True)
+    xarray_txt, yarray_txt = loadData("good_data.txt", unpack=True)
+    with pytest.raises(ValueError):
+        xarray_txt, yarray_txt = loadData("unreadable_file.txt", unpack=True)
+    with pytest.raises(ValueError):
+        xarray_pkl, yarray_pkl = loadData("binary.pkl", unpack=True)
+
 
 params1 = [
     ([], ["."]),
@@ -17,7 +119,7 @@
 @pytest.mark.parametrize("inputs, expected", params1)
 def test_set_output_directory(inputs, expected, user_filesystem):
     expected_output_directory = Path(user_filesystem) / expected[0]
-    cli_inputs = ["2.5"] + inputs
+    cli_inputs = ["2.5", "data.xy"] + inputs
     actual_args = get_args(cli_inputs)
     actual_args.output_directory = set_output_directory(actual_args)
     assert actual_args.output_directory == expected_output_directory
@@ -26,7 +128,7 @@ def test_set_output_directory(inputs, expected, user_filesystem):
 
 
 def test_set_output_directory_bad(user_filesystem):
-    cli_inputs = ["2.5", "--output-directory", "good_data.chi"]
+    cli_inputs = ["2.5", "data.xy", "--output-directory", "good_data.chi"]
     actual_args = get_args(cli_inputs)
     with pytest.raises(FileExistsError):
         actual_args.output_directory = set_output_directory(actual_args)
@@ -45,7 +147,7 @@ def test_set_output_directory_bad(user_filesystem):
 @pytest.mark.parametrize("inputs, expected", params2)
 def test_set_wavelength(inputs, expected):
     expected_wavelength = expected[0]
-    cli_inputs = ["2.5"] + inputs
+    cli_inputs = ["2.5", "data.xy"] + inputs
     actual_args = get_args(cli_inputs)
     actual_args.wavelength = set_wavelength(actual_args)
     assert actual_args.wavelength == expected_wavelength
@@ -69,7 +171,7 @@ def test_set_wavelength(inputs, expected):
 
 @pytest.mark.parametrize("inputs, msg", params3)
 def test_set_wavelength_bad(inputs, msg):
-    cli_inputs = ["2.5"] + inputs
+    cli_inputs = ["2.5", "data.xy"] + inputs
     actual_args = get_args(cli_inputs)
     with pytest.raises(ValueError, match=re.escape(msg[0])):
         actual_args.wavelength = set_wavelength(actual_args)
@@ -87,12 +189,12 @@ def test_set_wavelength_bad(inputs, msg):
 
 @pytest.mark.parametrize("inputs, expected", params5)
 def test_load_user_metadata(inputs, expected):
-    expected_args = get_args(["2.5"])
+    expected_args = get_args(["2.5", "data.xy"])
     for expected_pair in expected:
         setattr(expected_args, expected_pair[0], expected_pair[1])
     delattr(expected_args, "user_metadata")
 
-    cli_inputs = ["2.5"] + inputs
+    cli_inputs = ["2.5", "data.xy"] + inputs
     actual_args = get_args(cli_inputs)
     actual_args = load_user_metadata(actual_args)
     assert actual_args == expected_args
@@ -129,7 +231,7 @@ def test_load_user_metadata(inputs, expected):
 
 @pytest.mark.parametrize("inputs, msg", params6)
 def test_load_user_metadata_bad(inputs, msg):
-    cli_inputs = ["2.5"] + inputs
+    cli_inputs = ["2.5", "data.xy"] + inputs
     actual_args = get_args(cli_inputs)
     with pytest.raises(ValueError, match=msg[0]):
         actual_args = load_user_metadata(actual_args)
diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py
@@ -1,9 +1,61 @@
+import glob
+import os
 from pathlib import Path
 
 WAVELENGTHS = {"Mo": 0.71, "Ag": 0.59, "Cu": 1.54}
 known_sources = [key for key in WAVELENGTHS.keys()]
 
 
+def set_input_files(args):
+    """
+    Set input directory and files
+
+    Parameters
+    ----------
+    args argparse.Namespace
+        the arguments from the parser
+
+    It is implemented as this:
+    If input is a file, we first try to read it as a file list and store all listed file names.
+    If any filename is invalid, then proceed to treat it as a data file.
+    Otherwise if we have a directory, glob all files within it.
+
+    Returns
+    -------
+    args argparse.Namespace
+
+    """
+
+    if not Path(args.input).exists():
+        raise ValueError("Please specify valid input file or directory.")
+
+    if not Path(args.input).is_dir():
+        input_dir = Path.cwd() / Path(args.input).parent
+        file_names = []
+        with open(args.input, "r") as f:
+            for line in f:
+                if not os.path.isfile(line.strip()):
+                    file_names = []
+                    break
+                else:
+                    file_name = line.strip()
+                    file_names.append(file_name)
+
+        if len(file_names) > 0:
+            input_file_name = file_names
+        else:
+            input_file_name = Path(args.input).name
+
+    else:
+        input_dir = Path(args.input).resolve()
+        input_files = [file for file in glob.glob(str(input_dir) + "/*", recursive=True) if os.path.isfile(file)]
+        input_file_name = [os.path.basename(input_file_path) for input_file_path in input_files]
+
+    setattr(args, "input_directory", input_dir)
+    setattr(args, "input_file", input_file_name)
+    return args
+
+
 def set_output_directory(args):
     """
     set the output directory based on the given input arguments