From d8d4e1122c0b9ceaf44e79401f38b66350775b8a Mon Sep 17 00:00:00 2001 From: John Halloran Date: Fri, 6 Jun 2025 12:45:48 -0700 Subject: [PATCH 1/4] feat: Add random state feature. --- src/diffpy/snmf/snmf_class.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/diffpy/snmf/snmf_class.py b/src/diffpy/snmf/snmf_class.py index 9d58e5f..150f654 100644 --- a/src/diffpy/snmf/snmf_class.py +++ b/src/diffpy/snmf/snmf_class.py @@ -4,8 +4,20 @@ class SNMFOptimizer: - def __init__(self, MM, Y0=None, X0=None, A=None, rho=1e12, eta=610, max_iter=500, tol=5e-7, components=None): - print("Initializing SNMF Optimizer") + def __init__( + self, + MM, + Y0=None, + X0=None, + A=None, + rho=1e12, + eta=610, + max_iter=500, + tol=5e-7, + components=None, + random_state=None, + ): + self.MM = MM self.X0 = X0 self.Y0 = Y0 @@ -15,23 +27,22 @@ def __init__(self, MM, Y0=None, X0=None, A=None, rho=1e12, eta=610, max_iter=500 # Capture matrix dimensions self.N, self.M = MM.shape self.num_updates = 0 + self.rng = np.random.default_rng(random_state) if Y0 is None: if components is None: raise ValueError("Must provide either Y0 or a number of components.") else: self.K = components - self.Y0 = np.random.beta(a=2.5, b=1.5, size=(self.K, self.M)) # This is untested + self.Y0 = self.rng.beta(a=2.5, b=1.5, size=(self.K, self.M)) else: self.K = Y0.shape[0] - # Initialize A, X0 if not provided if self.A is None: - self.A = np.ones((self.K, self.M)) + np.random.randn(self.K, self.M) * 1e-3 # Small perturbation + self.A = np.ones((self.K, self.M)) + self.rng.normal(0, 1e-3, size=(self.K, self.M)) if self.X0 is None: - self.X0 = np.random.rand(self.N, self.K) # Ensures values in [0,1] + self.X0 = self.rng.random((self.N, self.K)) - # Initialize solution matrices to be iterated on self.X = np.maximum(0, self.X0) self.Y = np.maximum(0, self.Y0) From ae45726407c90ff5f7c0121bf9959b96f0ea0f6b Mon Sep 17 00:00:00 2001 From: John Halloran Date: Sat, 7 Jun 2025 23:21:52 -0700 Subject: [PATCH 2/4] Add class docstring --- src/diffpy/snmf/snmf_class.py | 58 +++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/diffpy/snmf/snmf_class.py b/src/diffpy/snmf/snmf_class.py index 150f654..a40c251 100644 --- a/src/diffpy/snmf/snmf_class.py +++ b/src/diffpy/snmf/snmf_class.py @@ -17,6 +17,64 @@ def __init__( components=None, random_state=None, ): + """Run sNMF based on an ndarray, parameters, and either a number + of components or a set of initial guess matrices. + + Currently instantiating the SNMFOptimizer class runs all the analysis + immediately. The results can then be accessed as instance attributes + of the class (X, Y, and A). Eventually, this will be changed such + that __init__ only prepares for the optimization, which will can then + be done using fit_transform. + + Parameters + ---------- + MM: ndarray + A numpy array containing the data to be decomposed. Rows correspond + to different samples/angles, while columns correspond to different + conditions with different stretching. Currently, there is no option + to treat the first column (commonly containing 2theta angles, sample + index, etc) differently, so if present it must be stripped in advance. + Y0: ndarray + A numpy array containing initial guesses for the component weights + at each stretching condition, with number of rows equal to the assumed + number of components and number of columns equal to the number of + conditions (same number of columns as MM). Must be provided if components + is not provided. Will override components if both are provided. + X0: ndarray + A numpy array containing initial guesses for the intensities of each + component per row/sample/angle. Has rows equal to the rows of MM and + columns equal to n_components or the number of rows of Y0. + A: ndarray + A numpy array containing initial guesses for the stretching factor for + each component, at each condition. Has number of rows equal to n_components + or the number of rows of Y0, and columns equal to the number of conditions + (columns of MM). + rho: float + A stretching factor that influences the decomposition. Zero corresponds to + no stretching present. Relatively insensitive and typically adjusted in + powers of 10. + eta: float + A sparsity factor than influences the decomposition. Should be set to zero + for non sparse data such as PDF. Can be used to improve results for sparse + data such as XRD, but due to instability, should be used only after first + selecting the best value for rho. + max_iter: int + The maximum number of times to update each of A, X, and Y before stopping + the optimization. + tol: float + The minimum fractional improvement in the objective function to allow + without terminating the optimization. Note that a minimum of 20 updates + are run before this parameter is checked. + components: int + The number of components to attempt to extract from MM. Note that this will + be overridden by Y0 if that is provided, but must be provided if no Y0 is + provided. + random_state: int + Used to set a reproducible seed for the initial matrices used in the + optimization. Due to the non-convex nature of the problem, results may vary + even with the same initial guesses, so this does not make the program + deterministic. + """ self.MM = MM self.X0 = X0 From 3d7c8b6a7a9273b71aa49a2db82f5fb0a4feaeb1 Mon Sep 17 00:00:00 2001 From: John Halloran Date: Sun, 8 Jun 2025 01:13:02 -0700 Subject: [PATCH 3/4] components->n_components --- src/diffpy/snmf/main.py | 21 +-------------------- src/diffpy/snmf/snmf_class.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 27 deletions(-) diff --git a/src/diffpy/snmf/main.py b/src/diffpy/snmf/main.py index 7afacf6..a512f67 100644 --- a/src/diffpy/snmf/main.py +++ b/src/diffpy/snmf/main.py @@ -7,27 +7,8 @@ Y0 = np.loadtxt("input/W0.txt", dtype=float) N, M = MM.shape -# Convert to DataFrames for display -# df_X = pd.DataFrame(X, columns=[f"Comp_{i+1}" for i in range(X.shape[1])]) -# df_Y = pd.DataFrame(Y, columns=[f"Sample_{i+1}" for i in range(Y.shape[1])]) -# df_MM = pd.DataFrame(MM, columns=[f"Sample_{i+1}" for i in range(MM.shape[1])]) -# df_Y0 = pd.DataFrame(Y0, columns=[f"Sample_{i+1}" for i in range(Y0.shape[1])]) - -# Print the matrices -""" -print("Feature Matrix (X):\n", df_X, "\n") -print("Coefficient Matrix (Y):\n", df_Y, "\n") -print("Data Matrix (MM):\n", df_MM, "\n") -print("Initial Guess (Y0):\n", df_Y0, "\n") -""" - - -my_model = snmf_class.SNMFOptimizer(MM=MM, Y0=Y0, X0=X0, A=A0, components=2) +my_model = snmf_class.SNMFOptimizer(MM=MM, Y0=Y0, X0=X0, A=A0, n_components=2) print("Done") -# print(f"My final guess for X: {my_model.X}") -# print(f"My final guess for Y: {my_model.Y}") -# print(f"Compare to true X: {X_norm}") -# print(f"Compare to true Y: {Y_norm}") np.savetxt("my_norm_X.txt", my_model.X, fmt="%.6g", delimiter=" ") np.savetxt("my_norm_Y.txt", my_model.Y, fmt="%.6g", delimiter=" ") np.savetxt("my_norm_A.txt", my_model.A, fmt="%.6g", delimiter=" ") diff --git a/src/diffpy/snmf/snmf_class.py b/src/diffpy/snmf/snmf_class.py index a40c251..da0fcc2 100644 --- a/src/diffpy/snmf/snmf_class.py +++ b/src/diffpy/snmf/snmf_class.py @@ -14,7 +14,7 @@ def __init__( eta=610, max_iter=500, tol=5e-7, - components=None, + n_components=None, random_state=None, ): """Run sNMF based on an ndarray, parameters, and either a number @@ -38,8 +38,9 @@ def __init__( A numpy array containing initial guesses for the component weights at each stretching condition, with number of rows equal to the assumed number of components and number of columns equal to the number of - conditions (same number of columns as MM). Must be provided if components - is not provided. Will override components if both are provided. + conditions (same number of columns as MM). Must be provided if + n_components is not provided. Will override n_components if both are + provided. X0: ndarray A numpy array containing initial guesses for the intensities of each component per row/sample/angle. Has rows equal to the rows of MM and @@ -65,7 +66,7 @@ def __init__( The minimum fractional improvement in the objective function to allow without terminating the optimization. Note that a minimum of 20 updates are run before this parameter is checked. - components: int + n_components: int The number of components to attempt to extract from MM. Note that this will be overridden by Y0 if that is provided, but must be provided if no Y0 is provided. @@ -88,10 +89,10 @@ def __init__( self.rng = np.random.default_rng(random_state) if Y0 is None: - if components is None: - raise ValueError("Must provide either Y0 or a number of components.") + if n_components is None: + raise ValueError("Must provide either Y0 or n_components.") else: - self.K = components + self.K = n_components self.Y0 = self.rng.beta(a=2.5, b=1.5, size=(self.K, self.M)) else: self.K = Y0.shape[0] From a0483b4b855ebdca2765c2da493edd346101280c Mon Sep 17 00:00:00 2001 From: John Halloran Date: Mon, 9 Jun 2025 15:56:44 -0700 Subject: [PATCH 4/4] Updated docstring --- src/diffpy/snmf/snmf_class.py | 70 +++++++++++++++++------------------ 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/src/diffpy/snmf/snmf_class.py b/src/diffpy/snmf/snmf_class.py index da0fcc2..6ab1fb4 100644 --- a/src/diffpy/snmf/snmf_class.py +++ b/src/diffpy/snmf/snmf_class.py @@ -4,6 +4,18 @@ class SNMFOptimizer: + """A self-contained implementation of the stretched NMF algorithm (sNMF), + including sparse stretched NMF. + + Instantiating the SNMFOptimizer class runs all the analysis immediately. + The results matrices can then be accessed as instance attributes + of the class (X, Y, and A). + + For more information on sNMF, please reference: + Gu, R., Rakita, Y., Lan, L. et al. Stretched non-negative matrix factorization. + npj Comput Mater 10, 193 (2024). https://doi.org/10.1038/s41524-024-01377-5 + """ + def __init__( self, MM, @@ -17,48 +29,33 @@ def __init__( n_components=None, random_state=None, ): - """Run sNMF based on an ndarray, parameters, and either a number - of components or a set of initial guess matrices. - - Currently instantiating the SNMFOptimizer class runs all the analysis - immediately. The results can then be accessed as instance attributes - of the class (X, Y, and A). Eventually, this will be changed such - that __init__ only prepares for the optimization, which will can then - be done using fit_transform. + """Initialize an instance of SNMF and run the optimization Parameters ---------- MM: ndarray - A numpy array containing the data to be decomposed. Rows correspond - to different samples/angles, while columns correspond to different - conditions with different stretching. Currently, there is no option - to treat the first column (commonly containing 2theta angles, sample - index, etc) differently, so if present it must be stripped in advance. + The array containing the data to be decomposed. Shape is (length_of_signal, + number_of_conditions). Y0: ndarray - A numpy array containing initial guesses for the component weights - at each stretching condition, with number of rows equal to the assumed - number of components and number of columns equal to the number of - conditions (same number of columns as MM). Must be provided if - n_components is not provided. Will override n_components if both are - provided. + The array containing initial guesses for the component weights + at each stretching condition. Shape is (number of components, number of + conditions) Must be provided if n_components is not provided. Will override + n_components if both are provided. X0: ndarray - A numpy array containing initial guesses for the intensities of each - component per row/sample/angle. Has rows equal to the rows of MM and - columns equal to n_components or the number of rows of Y0. + The array containing initial guesses for the intensities of each component per + row/sample/angle. Shape is (length_of_signal, number_of_components). A: ndarray - A numpy array containing initial guesses for the stretching factor for - each component, at each condition. Has number of rows equal to n_components - or the number of rows of Y0, and columns equal to the number of conditions - (columns of MM). + The array containing initial guesses for the stretching factor for each component, + at each condition. Shape is (number_of_components, number_of_conditions). rho: float - A stretching factor that influences the decomposition. Zero corresponds to - no stretching present. Relatively insensitive and typically adjusted in - powers of 10. + The float which sets a stretching factor that influences the decomposition. + Zero corresponds to no stretching present. Relatively insensitive and typically + adjusted in powers of 10. eta: float - A sparsity factor than influences the decomposition. Should be set to zero - for non sparse data such as PDF. Can be used to improve results for sparse - data such as XRD, but due to instability, should be used only after first - selecting the best value for rho. + The integer which sets a sparsity factor than influences the decomposition. + Should be set to zero for non sparse data such as PDF. Can be used to improve + results for sparse data such as XRD, but due to instability, should be used + only after first selecting the best value for rho. max_iter: int The maximum number of times to update each of A, X, and Y before stopping the optimization. @@ -71,10 +68,9 @@ def __init__( be overridden by Y0 if that is provided, but must be provided if no Y0 is provided. random_state: int - Used to set a reproducible seed for the initial matrices used in the - optimization. Due to the non-convex nature of the problem, results may vary - even with the same initial guesses, so this does not make the program - deterministic. + The integer which acts as a reproducible seed for the initial matrices used in + the optimization. Due to the non-convex nature of the problem, results may vary + even with the same initial guesses, so this does not make the program deterministic. """ self.MM = MM