Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

creating function main #4

Draft
wants to merge 18 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 30 additions & 22 deletions diffpy/snmf/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from diffpy.utils.parsers.loaddata import loadData


def initialize_variables(data_input, component_amount, data_type, sparsity=1, smoothness=1e18):
def initialize_variables(data_input, number_of_components, data_type, sparsity=1, smoothness=1e18):
"""Determines the variables and initial values used in the SNMF algorithm.

Parameters
Expand All @@ -13,7 +13,7 @@ def initialize_variables(data_input, component_amount, data_type, sparsity=1, sm
The observed or simulated PDF or XRD data provided by the user. Has dimensions R x N where R is the signal length
and N is the number of PDF/XRD signals.

component_amount: int
number_of_components: int
The number of component signals the user would like to decompose 'data_input' into.

data_type: str
Expand All @@ -40,27 +40,30 @@ def initialize_variables(data_input, component_amount, data_type, sparsity=1, sm

"""
signal_length = data_input.shape[0]
moment_amount = data_input.shape[1]
number_of_moments = data_input.shape[1]

component_matrix_guess = np.random.rand(signal_length, component_amount)
weight_matrix_guess = np.random.rand(component_amount, moment_amount)
stretching_matrix_guess = np.ones(component_amount, moment_amount) + np.random.randn(component_amount,
moment_amount) * 1e-3
component_matrix_guess = np.random.rand(signal_length, number_of_components)
weight_matrix_guess = np.random.rand(number_of_components, number_of_moments)
stretching_matrix_guess = np.ones((number_of_components, number_of_moments)) + np.random.randn(number_of_components,
number_of_moments) * 1e-3

diagonals = [np.ones(moment_amount - 2), -2 * np.ones(moment_amount - 2), np.ones(moment_amount - 2)]
smoothness_term = .25 * scipy.sparse.diags(diagonals, [0, 1, 2], shape=(moment_amount - 2, moment_amount))
diagonals = [np.ones(number_of_moments - 2), -2 * np.ones(number_of_moments - 2), np.ones(number_of_moments - 2)]
smoothness_term = .25 * scipy.sparse.diags(diagonals, [0, 1, 2], shape=(number_of_moments - 2, number_of_moments))

hessian_helper_matrix = scipy.sparse.block_diag([smoothness_term.T @ smoothness_term] * component_amount)
sequence = np.arange(moment_amount * component_amount).reshape(component_amount, moment_amount).T.flatten()
hessian_helper_matrix = scipy.sparse.block_diag([smoothness_term.T @ smoothness_term] * number_of_components)
sequence = np.arange(number_of_moments * number_of_components).reshape(number_of_components,
number_of_moments).T.flatten()

hessian_helper_matrix = hessian_helper_matrix.tocsr()
hessian_helper_matrix = hessian_helper_matrix[sequence, :][:, sequence]

return {
"signal_length": signal_length,
"moment_amount": moment_amount,
"component_matrix_guess": component_matrix_guess,
"weight_matrix_guess": weight_matrix_guess,
"stretching_matrix_guess": stretching_matrix_guess,
"component_amount": component_amount,
"number_of_moments": number_of_moments,
"component_matrix": component_matrix_guess,
"weights_matrix": weight_matrix_guess,
"stretching_factor_matrix": stretching_matrix_guess,
"number_of_components": number_of_components,
"data_type": data_type,
"smoothness": smoothness,
"sparsity": sparsity,
Expand Down Expand Up @@ -91,18 +94,17 @@ def load_input_signals(file_path=None):

"""

if file_path is None:
directory_path = Path.cwd()
else:
directory_path = Path(file_path)
directory_path = Path(file_path)

values_list = []
grid_list = []
current_grid = []
file_extension = ""
for item in directory_path.iterdir():
if item.is_file():
file_extension = Path(item).suffix
data = loadData(item.resolve())
if current_grid and current_grid != data[:, 0]:
if len(current_grid) != 0 and (current_grid != data[:, 0]).any():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a comment line would help here with the intent. I.e., why do we ignore and not handle if the data are on a different grid. If I remember correctly t here was a physics reason (mathematically it should be straightforward). Just recording for future people.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add a comment line to explain this

print(f"{item.name} was ignored as it is not on a compatible grid.")
continue
else:
Expand All @@ -113,4 +115,10 @@ def load_input_signals(file_path=None):
grid_array = np.column_stack(grid_list)
grid_vector = np.unique(grid_array, axis=1)
values_array = np.column_stack(values_list)
return grid_vector, values_array
if file_extension in {'.gr', '.chi'}:
data_type = 'pdf'
elif file_extension in {'iq', '.xy', '.xye', '.xrd'}:
data_type = 'xrd'
else:
data_type = None
return grid_vector, values_array, data_type
64 changes: 64 additions & 0 deletions diffpy/snmf/stretchednmfapp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import numpy as np
import argparse
from pathlib import Path

from diffpy.snmf.io import load_input_signals, initialize_variables
from diffpy.snmf.subroutines import update_weights_matrix, get_residual_matrix, objective_function


def create_parser():
parser = argparse.ArgumentParser(
prog="stretched_nmf",
description="Stretched Nonnegative Matrix Factorization"
)

parser.add_argument('-i', '--input-directory', type=str, default=None,
help="Directory containing experimental data. Defaults to current working directory.")
parser.add_argument('-o', '--output-directory', type=str,
help="The directory where the results will be written. Defaults to '<input_directory>/snmf_results'.")
parser.add_argument('-t', '--data-type', type=str, choices=['powder_diffraction', 'pdf'],
help="The type of the experimental data.")
parser.add_argument('-l', '--lift', type=float, default=1,
help="The factor that determines how much the data is lifted. By default, the data will be vertically translated to make the minimum value 0.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about "The lifting factor. Data will be lifted by lifted data = data - min(data)*lift. Default is 1"

btw, it occurs to me that if the min(data) is positive, the "lift" will "lower" the data to zero if lift = 1 and will lower it to below zero if lift > 1. This is probably undesirable behavior. Make sure you have tests for this eventuality that give the behavior you want.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will make this changes.

The same thought occurred to me. If the min(data) is positive, it's not apparent to me that the data would need to be lifted. My idea is that if the minimum is positive, then do nothing to the data.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is reasonable, though why not allow the user to lift the data if she wants to?

Maybe make sure the lift is a lift, so it adds the absolute value of min(data)*lift to the data, something like that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok.

parser.add_argument('components', type=int,
help="The number of component signals for the NMF decomposition. Must be an integer greater than 0.")
parser.add_argument('-v', '--version', action='version', help='Print the software version number')
args = parser.parse_args()
return args


def main():
args = create_parser()
if args.input_directory is None:
args.input_directory = Path.cwd()

grid, data_input, data_type = load_input_signals(args.input_directory)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is maybe clearer to put the default logic here that turns input_directory from None to cwd. Or else, make it an optional argument in load_input_data.

Again, it is about code readability.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will do this.

if args.data_type:
data_type = args.data_type
variables = initialize_variables(data_input, args.components, data_type)

if variables["data_type"] == 'pdf':
lifted_data = data_input - np.ndarray.min(data_input[:]) # Will later use the lift_data function in subroutines

maxiter = 300

for out_iter in range(maxiter):
variables["weights_matrix"] = update_weights_matrix(variables["number_of_components"],
variables["signal_length"],
variables["stretching_factor_matrix"],
variables["component_matrix"], lifted_data,
variables["number_of_moments"], variables["weights_matrix"],
None)

residual_matrix = get_residual_matrix(variables["component_matrix"], variables["weights_matrix"],
variables["stretching_factor_matrix"], lifted_data,
variables["number_of_moments"], variables["number_of_components"],
variables["signal_length"])
fun = objective_function(residual_matrix, variables["stretching_factor_matrix"], variables["smoothness"],
variables["smoothness_term"], variables["component_matrix"], variables["sparsity"])

return 1


if __name__ == "__main__":
main()
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

# Installation script for diffpy.pdfmorph
# Installation script for diffpy.snmf

"""diffpy.snmf - a package implementing the stretched NMF algorithm.

Expand Down Expand Up @@ -28,7 +28,7 @@
entry_points={
# define console_scripts here, see setuptools docs for details.
'console_scripts': [
'pdfmorph = diffpy.pdfmorph.pdfmorphapp:main',
'stretched_nmf = diffpy.snmf.stretchednmfapp:main',
],
},
test_suite='tests',
Expand Down