Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement wildcard pattern for input #59

Merged
merged 7 commits into from
May 16, 2024
12 changes: 6 additions & 6 deletions src/diffpy/labpdfproc/labpdfprocapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from diffpy.labpdfproc.functions import apply_corr, compute_cve
from diffpy.labpdfproc.tools import (
expand_list_file,
known_sources,
load_user_metadata,
set_input_lists,
Expand All @@ -29,7 +28,9 @@ def get_args(override_cli_inputs=None):
"'.' (load everything in the current directory), 'data' (load"
"everything in the folder ./data), 'data/file_list.txt' (load"
" the list of files contained in the text-file called "
"file_list.txt that can be found in the folder ./data).",
"file_list.txt that can be found in the folder ./data), "
"'./*.chi', 'data/*.chi' (load all files with extension .chi in the "
"folder ./data).",
)
p.add_argument(
"-a",
Expand Down Expand Up @@ -95,13 +96,12 @@ def get_args(override_cli_inputs=None):

def main():
args = get_args()
args = expand_list_file(args)
args = set_input_lists(args)
args.output_directory = set_output_directory(args)
args.wavelength = set_wavelength(args)
args = load_user_metadata(args)

for filepath in args.input_directory:
for filepath in args.input_paths:
outfilestem = filepath.stem + "_corrected"
corrfilestem = filepath.stem + "_cve"
outfile = args.output_directory / (outfilestem + ".chi")
Expand All @@ -119,13 +119,13 @@ def main():
)

input_pattern = Diffraction_object(wavelength=args.wavelength)
xarray, yarray = loadData(args.input_file, unpack=True)
xarray, yarray = loadData(filepath, unpack=True)
input_pattern.insert_scattering_quantity(
xarray,
yarray,
"tth",
scat_quantity="x-ray",
name=str(args.input_file),
name=filepath.stem,
metadata={"muD": args.mud, "anode_type": args.anode_type},
)

Expand Down
1 change: 1 addition & 0 deletions src/diffpy/labpdfproc/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def user_filesystem(tmp_path):
with open(input_dir / "file_list.txt", "w") as f:
f.write("good_data.chi \n good_data.xy \n good_data.txt \n missing_file.txt")
with open(input_dir / "file_list_example2.txt", "w") as f:
f.write("input_dir/*.txt \n")
f.write("input_dir/good_data.chi \n")
f.write("good_data.xy \n")
f.write(f"{str(input_dir.resolve() / 'good_data.txt')}\n")
Expand Down
19 changes: 14 additions & 5 deletions src/diffpy/labpdfproc/tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from diffpy.labpdfproc.labpdfprocapp import get_args
from diffpy.labpdfproc.tools import (
expand_list_file,
known_sources,
load_user_metadata,
set_input_lists,
Expand Down Expand Up @@ -50,9 +49,21 @@
"input_dir/binary.pkl",
],
),
( # file_list_example2.txt list of files provided in different directories
( # file_list_example2.txt list of files provided in different directories with wildcard
["input_dir/file_list_example2.txt"],
["input_dir/good_data.chi", "good_data.xy", "input_dir/good_data.txt"],
["input_dir/good_data.chi", "good_data.xy", "input_dir/good_data.txt", "input_dir/unreadable_file.txt"],
),
( # wildcard pattern, matching files with .chi extension in the same directory
["./*.chi"],
["good_data.chi"],
),
( # wildcard pattern, matching files with .chi extension in the input directory
["input_dir/*.chi"],
["input_dir/good_data.chi"],
),
( # wildcard pattern, matching files starting with good_data
["good_data*"],
["good_data.chi", "good_data.xy", "good_data.txt"],
),
]

Expand All @@ -65,7 +76,6 @@ def test_set_input_lists(inputs, expected, user_filesystem):

cli_inputs = ["2.5"] + inputs
actual_args = get_args(cli_inputs)
actual_args = expand_list_file(actual_args)
actual_args = set_input_lists(actual_args)
assert sorted(actual_args.input_paths) == sorted(expected_paths)

Expand Down Expand Up @@ -98,7 +108,6 @@ def test_set_input_files_bad(inputs, msg, user_filesystem):
os.chdir(base_dir)
cli_inputs = ["2.5"] + inputs
actual_args = get_args(cli_inputs)
actual_args = expand_list_file(actual_args)
with pytest.raises(FileNotFoundError, match=msg[0]):
actual_args = set_input_lists(actual_args)

Expand Down
12 changes: 9 additions & 3 deletions src/diffpy/labpdfproc/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ def set_output_directory(args):
return output_dir


def expand_list_file(args):
def _expand_user_input(args):
"""
Expands the list of inputs by adding files from file lists and removing the file list.
Expands the list of inputs by adding files from file lists and wildcards.

Parameters
----------
Expand All @@ -48,6 +48,11 @@ def expand_list_file(args):
file_inputs = [input_name.strip() for input_name in f.readlines()]
args.input.extend(file_inputs)
args.input.remove(file_list_input)
wildcard_inputs = [input_name for input_name in args.input if "*" in input_name]
for wildcard_input in wildcard_inputs:
input_files = [str(file) for file in Path(".").glob(wildcard_input) if "file_list" not in file.name]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we may be able to remove if file_list not in file.name because file_list files have been removed already.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is for files in the glob directory (not in args.input), so if we have a file list in the same directory as the wildcard, then it'll be loaded if we don't skip it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, yes better to have it to be on the safe side. It may be better to make it stricter so if file.name == file_list.txt but it is probably ok as it is.

args.input.extend(input_files)
args.input.remove(wildcard_input)
return args


Expand All @@ -70,6 +75,7 @@ def set_input_lists(args):
"""

input_paths = []
args = _expand_user_input(args)
for input_name in args.input:
input_path = Path(input_name).resolve()
if input_path.exists():
Expand All @@ -86,7 +92,7 @@ def set_input_lists(args):
f"Cannot find {input_name}. Please specify valid input file(s) or directories."
)
else:
raise FileNotFoundError(f"Cannot find {input_name}")
raise FileNotFoundError(f"Cannot find {input_name}.")
setattr(args, "input_paths", list(set(input_paths)))
return args

Expand Down
Loading