Skip to content

implement wildcard pattern for input #59

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions src/diffpy/labpdfproc/labpdfprocapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,14 @@ def get_args(override_cli_inputs=None):
"'.' (load everything in the current directory), 'data' (load"
"everything in the folder ./data), 'data/file_list.txt' (load"
" the list of files contained in the text-file called "
"file_list.txt that can be found in the folder ./data).",
"file_list.txt that can be found in the folder ./data). "
"\nWildcard character (*) is accepted. Examples include './*.chi'"
" (load all files with .chi extension), 'data/*.chi' (load all "
"files in 'data' file with .chi extension), 'file*.chi' (load all "
"files starting with 'file' and ending with .chi extension), 'test*' "
"(load all files and directories starting with 'test'), 'test*/*.chi' "
"(load all directories starting with 'test' and all files under "
"with .chi extension). ",
)
p.add_argument(
"-a",
Expand Down Expand Up @@ -101,7 +108,7 @@ def main():
args.wavelength = set_wavelength(args)
args = load_user_metadata(args)

for filepath in args.input_directory:
for filepath in args.input_paths:
outfilestem = filepath.stem + "_corrected"
corrfilestem = filepath.stem + "_cve"
outfile = args.output_directory / (outfilestem + ".chi")
Expand All @@ -125,7 +132,7 @@ def main():
yarray,
"tth",
scat_quantity="x-ray",
name=str(args.input_file),
name=filepath.stem,
metadata={"muD": args.mud, "anode_type": args.anode_type},
)

Expand Down
45 changes: 44 additions & 1 deletion src/diffpy/labpdfproc/tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from diffpy.labpdfproc.labpdfprocapp import get_args
from diffpy.labpdfproc.tools import (
expand_list_file,
expand_wildcard_file,
known_sources,
load_user_metadata,
set_input_lists,
Expand Down Expand Up @@ -54,6 +55,36 @@
["input_dir/file_list_example2.txt"],
["input_dir/good_data.chi", "good_data.xy", "input_dir/good_data.txt"],
),
( # wildcard pattern, matching files with .chi extension in the same directory
["./*.chi"],
["good_data.chi"],
),
( # wildcard pattern, matching files with .chi extension in the input directory
["input_dir/*.chi"],
["input_dir/good_data.chi"],
),
( # wildcard pattern, matching files starting with good_data
["good_data*"],
["good_data.chi", "good_data.xy", "good_data.txt"],
),
( # wildcard pattern, matching files or directories starting with input
["input*"],
[
"input_dir/good_data.chi",
"input_dir/good_data.xy",
"input_dir/good_data.txt",
"input_dir/unreadable_file.txt",
"input_dir/binary.pkl",
],
),
( # wildcard pattern, matching files or directories starting with unreadable and ending with .txt extension
["unreadable*.txt"],
["unreadable_file.txt"],
),
( # wildcard pattern, matching directories starting with input and all files under with .chi extension
["input*/*.chi"],
["input_dir/good_data.chi"],
),
]


Expand All @@ -65,6 +96,7 @@ def test_set_input_lists(inputs, expected, user_filesystem):

cli_inputs = ["2.5"] + inputs
actual_args = get_args(cli_inputs)
actual_args = expand_wildcard_file(actual_args)
actual_args = expand_list_file(actual_args)
actual_args = set_input_lists(actual_args)
assert sorted(actual_args.input_paths) == sorted(expected_paths)
Expand All @@ -89,6 +121,16 @@ def test_set_input_lists(inputs, expected, user_filesystem):
["input_dir/file_list.txt"],
"Cannot find missing_file.txt. Please specify valid input file(s) or directories.",
),
( # valid wildcard pattern, but does not match any files or directories
["non_existing_dir*"],
"Invalid wildcard input non_existing_dir*. "
"Please ensure the wildcard pattern matches at least one file or directory.",
),
( # invalid wildcard pattern
["invalid_dir**"],
"Invalid wildcard input invalid_dir**. "
"Please ensure the wildcard pattern matches at least one file or directory.",
),
]


Expand All @@ -98,8 +140,9 @@ def test_set_input_files_bad(inputs, msg, user_filesystem):
os.chdir(base_dir)
cli_inputs = ["2.5"] + inputs
actual_args = get_args(cli_inputs)
actual_args = expand_list_file(actual_args)
with pytest.raises(FileNotFoundError, match=msg[0]):
actual_args = expand_wildcard_file(actual_args)
actual_args = expand_list_file(actual_args)
actual_args = set_input_lists(actual_args)


Expand Down
38 changes: 37 additions & 1 deletion src/diffpy/labpdfproc/tools.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import glob
from pathlib import Path

WAVELENGTHS = {"Mo": 0.71, "Ag": 0.59, "Cu": 1.54}
Expand Down Expand Up @@ -28,6 +29,41 @@ def set_output_directory(args):
return output_dir


def expand_wildcard_file(args):
"""
Expands wildcard inputs by adding all files or directories within directories matching the pattern.

Parameters
----------
args argparse.Namespace
the arguments from the parser

Returns
-------
the arguments with the wildcard inputs expanded

"""
wildcard_inputs = [input_name for input_name in args.input if "*" in input_name]
for wildcard_input in wildcard_inputs:
if not glob.glob(wildcard_input):
raise FileNotFoundError(
f"Invalid wildcard input {wildcard_input}. "
f"Please ensure the wildcard pattern matches at least one file or directory."
)
input_files = Path(".").glob(wildcard_input)
for input_file in input_files:
if input_file.is_file():
args.input.append(str(input_file))
elif input_file.is_dir():
files = input_file.glob("*")
inputs = [str(file) for file in files if file.is_file() and "file_list" not in file.name]
args.input.extend(inputs)
else:
raise FileNotFoundError(f"Invalid wildcard input {wildcard_input}.")
args.input.remove(wildcard_input)
return args


def expand_list_file(args):
"""
Expands the list of inputs by adding files from file lists and removing the file list.
Expand Down Expand Up @@ -86,7 +122,7 @@ def set_input_lists(args):
f"Cannot find {input_name}. Please specify valid input file(s) or directories."
)
else:
raise FileNotFoundError(f"Cannot find {input_name}")
raise FileNotFoundError(f"Cannot find {input_name}.")
setattr(args, "input_paths", list(set(input_paths)))
return args

Expand Down
Loading