-
Notifications
You must be signed in to change notification settings - Fork 11
load input directory and files #50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
b61906b
f7d1174
d164dba
3c49a18
f8b7203
3d5c5ee
06ae14b
a02b573
7d39a74
ead5830
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,112 @@ | ||
import os | ||
import re | ||
from pathlib import Path | ||
|
||
import pytest | ||
|
||
from diffpy.labpdfproc.labpdfprocapp import get_args | ||
from diffpy.labpdfproc.tools import known_sources, load_user_metadata, set_output_directory, set_wavelength | ||
from diffpy.labpdfproc.tools import ( | ||
known_sources, | ||
load_user_metadata, | ||
set_input_files, | ||
set_output_directory, | ||
set_wavelength, | ||
) | ||
from diffpy.utils.parsers.loaddata import loadData | ||
|
||
# Use cases can be found here: https://github.com/diffpy/diffpy.labpdfproc/issues/48 | ||
|
||
# This test covers existing single input file, directory, or a file list | ||
# We store absolute path into input_directory and file names into input_file | ||
params_input = [ | ||
(["good_data.chi"], [".", "good_data.chi"]), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to have a discussion, by comment thread of zoom if it is easier, about what we want the program to do given the inputs and therefore what we want to test. It seems to me that these tests are just testing something to do with metadata, but this PR is about much more than that. Here is a start: single-file case:
We want to make sure all these things are tested. Some will be tested by other functions. But we need tests here for all the things that won't be covered by other functions. Then we would like a similar list for teh other cases (a list of files, a glob....) and tests for those too. Please can you think about this and have a crack at it. |
||
(["input_dir/good_data.chi"], ["input_dir", "good_data.chi"]), | ||
(["./input_dir/good_data.chi"], ["input_dir", "good_data.chi"]), | ||
( | ||
["."], | ||
[ | ||
".", | ||
["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], | ||
], | ||
), | ||
( | ||
["./input_dir"], | ||
[ | ||
"input_dir", | ||
["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], | ||
], | ||
), | ||
( | ||
["input_dir"], | ||
[ | ||
"input_dir", | ||
["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"], | ||
], | ||
), | ||
(["file_list_dir/file_list.txt"], ["file_list_dir", ["good_data.chi", "good_data.xy", "good_data.txt"]]), | ||
] | ||
|
||
|
||
@pytest.mark.parametrize("inputs, expected", params_input) | ||
def test_set_input_files(inputs, expected, user_filesystem): | ||
expected_input_directory = Path(user_filesystem) / expected[0] | ||
expected_input_files = expected[1] | ||
|
||
cli_inputs = ["2.5"] + inputs | ||
actual_args = get_args(cli_inputs) | ||
actual_args = set_input_files(actual_args) | ||
assert actual_args.input_directory == expected_input_directory | ||
assert set(actual_args.input_file) == set(expected_input_files) | ||
|
||
sbillinge marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# This test is for existing single input file or directory absolute path not in cwd | ||
# Here we are in user_filesystem/input_dir, testing for a file or directory in user_filesystem | ||
params_input_not_cwd = [ | ||
(["good_data.chi"], [".", "good_data.chi"]), | ||
(["."], [".", ["good_data.chi", "good_data.xy", "good_data.txt", "unreadable_file.txt", "binary.pkl"]]), | ||
] | ||
|
||
|
||
@pytest.mark.parametrize("inputs, expected", params_input_not_cwd) | ||
def test_set_input_files_not_cwd(inputs, expected, user_filesystem): | ||
expected_input_directory = Path(user_filesystem) / expected[0] | ||
expected_input_files = expected[1] | ||
actual_input = [str(Path(user_filesystem) / inputs[0])] | ||
os.chdir("input_dir") | ||
|
||
cli_inputs = ["2.5"] + actual_input | ||
actual_args = get_args(cli_inputs) | ||
actual_args = set_input_files(actual_args) | ||
assert actual_args.input_directory == expected_input_directory | ||
assert set(actual_args.input_file) == set(expected_input_files) | ||
|
||
|
||
# This test covers non-existing single input file or directory, in this case we raise an error with message | ||
params_input_bad = [ | ||
(["non_existing_file.xy"], "Please specify valid input file or directory."), | ||
(["./input_dir/non_existing_file.xy"], "Please specify valid input file or directory."), | ||
(["./non_existing_dir"], "Please specify valid input file or directory."), | ||
] | ||
|
||
|
||
@pytest.mark.parametrize("inputs, msg", params_input_bad) | ||
def test_set_input_files_bad(inputs, msg, user_filesystem): | ||
cli_inputs = ["2.5"] + inputs | ||
actual_args = get_args(cli_inputs) | ||
with pytest.raises(ValueError, match=msg[0]): | ||
actual_args = set_input_files(actual_args) | ||
|
||
|
||
# Pass files to loadData and use it to check if file is valid or not | ||
def test_loadData_with_input_files(user_filesystem): | ||
sbillinge marked this conversation as resolved.
Show resolved
Hide resolved
|
||
xarray_chi, yarray_chi = loadData("good_data.chi", unpack=True) | ||
xarray_xy, yarray_xy = loadData("good_data.xy", unpack=True) | ||
xarray_txt, yarray_txt = loadData("good_data.txt", unpack=True) | ||
with pytest.raises(ValueError): | ||
xarray_txt, yarray_txt = loadData("unreadable_file.txt", unpack=True) | ||
with pytest.raises(ValueError): | ||
xarray_pkl, yarray_pkl = loadData("binary.pkl", unpack=True) | ||
|
||
|
||
params1 = [ | ||
([], ["."]), | ||
|
@@ -17,7 +119,7 @@ | |
@pytest.mark.parametrize("inputs, expected", params1) | ||
def test_set_output_directory(inputs, expected, user_filesystem): | ||
expected_output_directory = Path(user_filesystem) / expected[0] | ||
cli_inputs = ["2.5"] + inputs | ||
cli_inputs = ["2.5", "data.xy"] + inputs | ||
actual_args = get_args(cli_inputs) | ||
actual_args.output_directory = set_output_directory(actual_args) | ||
assert actual_args.output_directory == expected_output_directory | ||
|
@@ -26,7 +128,7 @@ def test_set_output_directory(inputs, expected, user_filesystem): | |
|
||
|
||
def test_set_output_directory_bad(user_filesystem): | ||
cli_inputs = ["2.5", "--output-directory", "good_data.chi"] | ||
cli_inputs = ["2.5", "data.xy", "--output-directory", "good_data.chi"] | ||
actual_args = get_args(cli_inputs) | ||
with pytest.raises(FileExistsError): | ||
actual_args.output_directory = set_output_directory(actual_args) | ||
|
@@ -45,7 +147,7 @@ def test_set_output_directory_bad(user_filesystem): | |
@pytest.mark.parametrize("inputs, expected", params2) | ||
def test_set_wavelength(inputs, expected): | ||
expected_wavelength = expected[0] | ||
cli_inputs = ["2.5"] + inputs | ||
cli_inputs = ["2.5", "data.xy"] + inputs | ||
actual_args = get_args(cli_inputs) | ||
actual_args.wavelength = set_wavelength(actual_args) | ||
assert actual_args.wavelength == expected_wavelength | ||
|
@@ -69,7 +171,7 @@ def test_set_wavelength(inputs, expected): | |
|
||
@pytest.mark.parametrize("inputs, msg", params3) | ||
def test_set_wavelength_bad(inputs, msg): | ||
cli_inputs = ["2.5"] + inputs | ||
cli_inputs = ["2.5", "data.xy"] + inputs | ||
actual_args = get_args(cli_inputs) | ||
with pytest.raises(ValueError, match=re.escape(msg[0])): | ||
actual_args.wavelength = set_wavelength(actual_args) | ||
|
@@ -87,12 +189,12 @@ def test_set_wavelength_bad(inputs, msg): | |
|
||
@pytest.mark.parametrize("inputs, expected", params5) | ||
def test_load_user_metadata(inputs, expected): | ||
expected_args = get_args(["2.5"]) | ||
expected_args = get_args(["2.5", "data.xy"]) | ||
for expected_pair in expected: | ||
setattr(expected_args, expected_pair[0], expected_pair[1]) | ||
delattr(expected_args, "user_metadata") | ||
|
||
cli_inputs = ["2.5"] + inputs | ||
cli_inputs = ["2.5", "data.xy"] + inputs | ||
actual_args = get_args(cli_inputs) | ||
actual_args = load_user_metadata(actual_args) | ||
assert actual_args == expected_args | ||
|
@@ -129,7 +231,7 @@ def test_load_user_metadata(inputs, expected): | |
|
||
@pytest.mark.parametrize("inputs, msg", params6) | ||
def test_load_user_metadata_bad(inputs, msg): | ||
cli_inputs = ["2.5"] + inputs | ||
cli_inputs = ["2.5", "data.xy"] + inputs | ||
actual_args = get_args(cli_inputs) | ||
with pytest.raises(ValueError, match=msg[0]): | ||
actual_args = load_user_metadata(actual_args) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,61 @@ | ||
import glob | ||
import os | ||
from pathlib import Path | ||
|
||
WAVELENGTHS = {"Mo": 0.71, "Ag": 0.59, "Cu": 1.54} | ||
known_sources = [key for key in WAVELENGTHS.keys()] | ||
|
||
|
||
def set_input_files(args): | ||
""" | ||
Set input directory and files | ||
|
||
Parameters | ||
---------- | ||
args argparse.Namespace | ||
the arguments from the parser | ||
|
||
It is implemented as this: | ||
If input is a file, we first try to read it as a file list and store all listed file names. | ||
If any filename is invalid, then proceed to treat it as a data file. | ||
Otherwise if we have a directory, glob all files within it. | ||
|
||
Returns | ||
------- | ||
args argparse.Namespace | ||
|
||
""" | ||
|
||
if not Path(args.input).exists(): | ||
raise ValueError("Please specify valid input file or directory.") | ||
|
||
if not Path(args.input).is_dir(): | ||
input_dir = Path.cwd() / Path(args.input).parent | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder whether this will fail if the user gives a filename with a path that doesn't include cwd? For example, I think a valid test would be:
Please could you add test for this situation and make sure it passes? |
||
file_names = [] | ||
with open(args.input, "r") as f: | ||
for line in f: | ||
if not os.path.isfile(line.strip()): | ||
file_names = [] | ||
break | ||
else: | ||
file_name = line.strip() | ||
file_names.append(file_name) | ||
|
||
if len(file_names) > 0: | ||
input_file_name = file_names | ||
else: | ||
input_file_name = Path(args.input).name | ||
|
||
else: | ||
input_dir = Path(args.input).resolve() | ||
input_files = [file for file in glob.glob(str(input_dir) + "/*", recursive=True) if os.path.isfile(file)] | ||
input_file_name = [os.path.basename(input_file_path) for input_file_path in input_files] | ||
|
||
setattr(args, "input_directory", input_dir) | ||
setattr(args, "input_file", input_file_name) | ||
return args | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. otherwise, I think this looks good. |
||
|
||
|
||
def set_output_directory(args): | ||
""" | ||
set the output directory based on the given input arguments | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
to make it clearer maybe
"The filename(s) or folder(s) of the datafile(s) to load. Required.....
The next line is good except doesn't the file with the list of files need to have a specific name?
Then I would give examples of valid inputs.