Skip to content

Commit 2ffed0c

Browse files
committed
Merge branch 'main' of github.com:diffpy/diffpy.snmf into create_main
2 parents 36fce4d + a5c21f8 commit 2ffed0c

17 files changed

+602
-1
lines changed

.codecov.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# codecov can find this file anywhere in the repo, so we don't need to clutter
2+
# the root folder.
3+
#comment: false
4+
5+
codecov:
6+
notify:
7+
require_ci_to_pass: no
8+
9+
coverage:
10+
status:
11+
patch:
12+
default:
13+
target: '70'
14+
if_no_uploads: error
15+
if_not_found: success
16+
if_ci_failed: failure
17+
project:
18+
default: false
19+
library:
20+
target: auto
21+
if_no_uploads: error
22+
if_not_found: success
23+
if_ci_failed: failure
24+
paths: '!*/tests/.*'
25+
26+
tests:
27+
target: 97.9%
28+
paths: '*/tests/.*'
29+
if_not_found: success
30+
31+
flags:
32+
tests:
33+
paths:
34+
- tests/

.coveragerc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[run]
2+
source =
3+
diffpy/snmf/
4+
[report]
5+
omit =
6+
*/python?.?/*
7+
*/site-packages/nose/*
8+
# ignore _version.py and versioneer.py
9+
*version.*
10+
*_version.py
11+
12+
exclude_lines =
13+
if __name__ == '__main__':

.github/workflows/main.yml

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
workflow_dispatch:
9+
10+
jobs:
11+
miniconda:
12+
name: Miniconda ${{ matrix.os }}
13+
runs-on: ${{ matrix.os }}
14+
strategy:
15+
matrix:
16+
os: ["ubuntu-latest"]
17+
steps:
18+
- name: check out diffpy.snmf
19+
uses: actions/checkout@v3
20+
with:
21+
repository: diffpy/diffpy.snmf
22+
# for bookkeeping have diffpy.snmf at the same level as everything else in the
23+
# directory tree
24+
path: .
25+
26+
- name: initialize miniconda
27+
# this uses a marketplace action that sets up miniconda in a way that makes
28+
# it easier to use. I tried setting it up without this and it was a pain
29+
uses: conda-incubator/setup-miniconda@v2
30+
with:
31+
activate-environment: test
32+
# environment.yml file is needed by this action. Because I don't want
33+
# maintain this but rather maintain the requirements files it just has
34+
# basic things in it like conda and pip
35+
environment-file: ./environment.yml
36+
python-version: 3
37+
auto-activate-base: false
38+
39+
- name: install diffpy.snmf requirements
40+
shell: bash -l {0}
41+
run: |
42+
conda config --set always_yes yes --set changeps1 no
43+
conda config --add channels conda-forge
44+
conda activate test
45+
conda install --file requirements/run.txt
46+
conda install --file requirements/test.txt
47+
pip install .
48+
49+
- name: Validate diffpy.snmf
50+
shell: bash -l {0}
51+
run: |
52+
conda activate test
53+
coverage run run_tests.py
54+
coverage report -m
55+
56+
- name: Upload coverage reports to Codecov
57+
uses: codecov/codecov-action@v3
58+
# with:
59+
# token: ${{ secrets.CODECOV_TOKEN }}

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
# diffpy.snmf
1+
# diffpy.snmf
2+
3+
[![test](https://github.com/diffpy/diffpy.snmf/actions/workflows/main.yml/badge.svg)](https://github.com/diffpy/diffpy.snmf/actions/workflows/main.yml)
4+
[![codecov](https://codecov.io/gh/diffpy/diffpy.snmf/branch/main/graph/badge.svg)](https://codecov.io/gh/diffpy/diffpy.snmf)

diffpy/snmf/factorizers.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import numpy as np
2+
import scipy.optimize
3+
4+
5+
def lsqnonneg(stretched_component_matrix, target_signal):
6+
"""Finds the weights of stretched component signals under one-sided constraint.
7+
8+
Solves ``argmin_x || Ax - b ||_2`` for ``x>=0`` where A is the stretched_component_matrix and b is the target_signal
9+
vector. Finds the weights of component signals given undecomposed signal data and stretched components under a
10+
one-sided constraint on the weights.
11+
12+
Parameters
13+
----------
14+
stretched_component_matrix: 2d array like
15+
The component matrix where each column contains a stretched component signal. Has dimensions R x C where R is
16+
the length of the signal and C is the number of components. Does not need to be nonnegative. Corresponds with 'A'
17+
from the objective function.
18+
19+
target_signal: 1d array like
20+
The signal that is used as reference against which weight factors will be determined. Any column from the matrix
21+
of the entire, unfactorized input data could be used. Has length R. Does not need to be nonnegative. Corresponds
22+
with 'b' from the objective function.
23+
24+
Returns
25+
-------
26+
1d array like
27+
The vector containing component signal weights at a moment. Has length C.
28+
29+
"""
30+
stretched_component_matrix = np.asarray(stretched_component_matrix)
31+
target_signal = np.asarray(target_signal)
32+
return scipy.optimize.nnls(stretched_component_matrix, target_signal)[0]

diffpy/snmf/io.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import numpy as np
2+
import scipy.sparse
3+
from pathlib import Path
4+
from diffpy.utils.parsers.loaddata import loadData
5+
6+
7+
def initialize_variables(data_input, component_amount, data_type, sparsity=1, smoothness=1e18):
8+
"""Determines the variables and initial values used in the SNMF algorithm.
9+
10+
Parameters
11+
----------
12+
data_input: 2d array like
13+
The observed or simulated PDF or XRD data provided by the user. Has dimensions R x N where R is the signal length
14+
and N is the number of PDF/XRD signals.
15+
16+
component_amount: int
17+
The number of component signals the user would like to decompose 'data_input' into.
18+
19+
data_type: str
20+
The type of data the user has passed into the program. Can assume the value of 'PDF' or 'XRD.'
21+
22+
sparsity: float, optional
23+
The regularization parameter that behaves as the coefficient of a "sparseness" regularization term that enhances
24+
the ability to decompose signals in the case of sparse data e.g. X-ray Diffraction data. A non-zero value
25+
indicates sparsity in the data; greater magnitudes indicate greater amounts of sparsity.
26+
27+
smoothness: float, optional
28+
The regularization parameter that behaves as the coefficient of a "smoothness" term that ensures that component
29+
signal weightings change smoothly with time. Assumes a default value of 1e18.
30+
31+
Returns
32+
-------
33+
dictionary
34+
The collection of the names and values of the constants used in the algorithm. Contains the number of observed PDF
35+
/XRD patterns, the length of each pattern, the type of the data, the number of components the user would like to
36+
decompose the data into, an initial guess for the component matrix, and initial guess for the weight factor matrix
37+
,an initial guess for the stretching factor matrix, a parameter controlling smoothness of the solution, a
38+
parameter controlling sparseness of the solution, the matrix representing the smoothness term, and a matrix used
39+
to construct a hessian matrix.
40+
41+
"""
42+
signal_length = data_input.shape[0]
43+
moment_amount = data_input.shape[1]
44+
45+
component_matrix_guess = np.random.rand(signal_length, component_amount)
46+
weight_matrix_guess = np.random.rand(component_amount, moment_amount)
47+
stretching_matrix_guess = np.ones(component_amount, moment_amount) + np.random.randn(component_amount,
48+
moment_amount) * 1e-3
49+
50+
diagonals = [np.ones(moment_amount - 2), -2 * np.ones(moment_amount - 2), np.ones(moment_amount - 2)]
51+
smoothness_term = .25 * scipy.sparse.diags(diagonals, [0, 1, 2], shape=(moment_amount - 2, moment_amount))
52+
53+
hessian_helper_matrix = scipy.sparse.block_diag([smoothness_term.T @ smoothness_term] * component_amount)
54+
sequence = np.arange(moment_amount * component_amount).reshape(component_amount, moment_amount).T.flatten()
55+
hessian_helper_matrix = hessian_helper_matrix[sequence, :][:, sequence]
56+
57+
return {
58+
"signal_length": signal_length,
59+
"moment_amount": moment_amount,
60+
"component_matrix_guess": component_matrix_guess,
61+
"weight_matrix_guess": weight_matrix_guess,
62+
"stretching_matrix_guess": stretching_matrix_guess,
63+
"component_amount": component_amount,
64+
"data_type": data_type,
65+
"smoothness": smoothness,
66+
"sparsity": sparsity,
67+
"smoothness_term": smoothness_term,
68+
"hessian_helper_matrix": hessian_helper_matrix
69+
}
70+
71+
72+
def load_input_signals(file_path=None):
73+
"""Processes a directory of a series of PDF/XRD patterns into a usable format.
74+
75+
Constructs a 2d array out of a directory of PDF/XRD patterns containing each files dependent variable column in a
76+
new column. Constructs a 1d array containing the grid values.
77+
78+
Parameters
79+
----------
80+
file_path: str or Path object, optional
81+
The path to the directory containing the input XRD/PDF data. If no path is specified, defaults to the current
82+
working directory. Accepts a string or a pathlib.Path object. Input data not on the same grid as the first file
83+
read will be ignored.
84+
85+
Returns
86+
-------
87+
tuple
88+
The tuple whose first element is an R x M 2d array made of PDF/XRD patterns as each column; R is the length of the
89+
signal and M is the number of patterns. The tuple contains a 1d array containing the values of the grid points as
90+
its second element; Has length R.
91+
92+
"""
93+
94+
if file_path is None:
95+
directory_path = Path.cwd()
96+
else:
97+
directory_path = Path(file_path)
98+
99+
values_list = []
100+
grid_list = []
101+
current_grid = []
102+
for item in directory_path.iterdir():
103+
if item.is_file():
104+
data = loadData(item.resolve())
105+
if current_grid and current_grid != data[:, 0]:
106+
print(f"{item.name} was ignored as it is not on a compatible grid.")
107+
continue
108+
else:
109+
grid_list.append(data[:, 0])
110+
current_grid = grid_list[-1]
111+
values_list.append(data[:, 1])
112+
113+
grid_array = np.column_stack(grid_list)
114+
grid_vector = np.unique(grid_array, axis=1)
115+
values_array = np.column_stack(values_list)
116+
return grid_vector, values_array

diffpy/snmf/optimizers.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import numpy as np
2+
import cvxpy
3+
4+
5+
def get_weights(stretched_component_gram_matrix, linear_coefficient, lower_bound, upper_bound):
6+
"""Finds the weights of stretched component signals under a two-sided constraint
7+
8+
Solves min J(y) = (linear_coefficient)' * y + (1/2) * y' * (quadratic coefficient) * y where lower_bound <= y <=
9+
upper_bound and stretched_component_gram_matrix is symmetric positive definite. Finds the weightings of stretched
10+
component signals under a two-sided constraint.
11+
12+
Parameters
13+
----------
14+
stretched_component_gram_matrix: 2d array like
15+
The Gram matrix constructed from the stretched component matrix. It is a square positive definite matrix. It has
16+
dimensions C x C where C is the number of component signals. Must be symmetric positive definite.
17+
18+
linear_coefficient: 1d array like
19+
The vector containing the product of the stretched component matrix and the transpose of the observed data matrix.
20+
Has length C.
21+
22+
lower_bound: 1d array like
23+
The lower bound on the values of the output weights. Has the same dimensions of the function output. Each
24+
element in 'lower_bound' determines the minimum value the corresponding element in the function output may take.
25+
26+
upper_bound: 1d array like
27+
The upper bound on the values of the output weights. Has the same dimensions of the function output. Each element
28+
in 'upper_bound' determines the maximum value the corresponding element in the function output may take.
29+
30+
Returns
31+
-------
32+
1d array like
33+
The vector containing the weightings of the components needed to reconstruct a given input signal from the input
34+
set. Has length C
35+
36+
"""
37+
stretched_component_gram_matrix = np.asarray(stretched_component_gram_matrix)
38+
linear_coefficient = np.asarray(linear_coefficient)
39+
upper_bound = np.asarray(upper_bound)
40+
lower_bound = np.asarray(lower_bound)
41+
42+
problem_size = max(linear_coefficient.shape)
43+
solution_variable = cvxpy.Variable(problem_size)
44+
45+
objective = cvxpy.Minimize(
46+
linear_coefficient.T @ solution_variable + 0.5 * cvxpy.quad_form(solution_variable,
47+
stretched_component_gram_matrix))
48+
constraints = [lower_bound <= solution_variable, solution_variable <= upper_bound]
49+
50+
cvxpy.Problem(objective, constraints).solve()
51+
52+
return solution_variable.value

diffpy/snmf/polynomials.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import numpy as np
2+
3+
4+
def rooth(linear_coefficient, constant_term):
5+
"""
6+
Returns the largest real root of x^3+(linear_coefficient) * x + constant_term. If there are no real roots return 0.
7+
8+
Parameters
9+
----------
10+
linear_coefficient: nd array like of floats
11+
The matrix coefficient of the linear term
12+
constant_term: 0d array like, 1d array like of floats or scalar
13+
The constant scalar term of the problem
14+
15+
Returns
16+
-------
17+
ndarray of floats
18+
The largest real root of x^3+(linear_coefficient) * x + constant_term if roots are real, else return 0 array
19+
20+
21+
"""
22+
linear_coefficient = np.asarray(linear_coefficient)
23+
constant_term = np.asarray(constant_term)
24+
solution = np.empty_like(linear_coefficient, dtype=np.float64)
25+
26+
for index, value in np.ndenumerate(linear_coefficient):
27+
inputs = [1, 0, value, constant_term]
28+
roots = np.roots(inputs)
29+
if ((constant_term / 2) ** 2 + (value / 3) ** 3) < 0: # Discriminant of depressed cubic equation
30+
solution[index] = max(np.real(roots))
31+
else:
32+
solution[index] = 0
33+
return solution

0 commit comments

Comments
 (0)