From 95fc07eb74f97c3012af5bb0a319e797ff8e1455 Mon Sep 17 00:00:00 2001
From: stevenhua0320 <r.hua@mail.utoronto.ca>
Date: Mon, 12 Aug 2024 11:20:37 +0800
Subject: [PATCH 01/14] add test cases to test files and make edition to make
 sure the behavior of the test pass.

---
 src/diffpy/srmise/dataclusters.py            |  2 +
 src/diffpy/srmise/tests/test_dataclusters.py | 85 +++++++++++++++++++-
 2 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/src/diffpy/srmise/dataclusters.py b/src/diffpy/srmise/dataclusters.py
index e5a14e6..69584fc 100644
--- a/src/diffpy/srmise/dataclusters.py
+++ b/src/diffpy/srmise/dataclusters.py
@@ -140,6 +140,8 @@ def setdata(self, x, y, res):
         self.x = x
         self.y = y
         self.res = res
+        if x.size > 0 and res == 0:
+            raise ValueError("Make trivial clustering, please make positive resolution.")
         # If x sequence size is empty, set the object into Initialized state.
         if x.size == 0 and res == 0:
             self.data_order = np.array([])
diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
index 0ea6b42..eef300f 100644
--- a/src/diffpy/srmise/tests/test_dataclusters.py
+++ b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -1,5 +1,5 @@
 from copy import copy
-
+import pytest
 import numpy as np
 
 from diffpy.srmise.dataclusters import DataClusters
@@ -32,3 +32,86 @@ def test___eq__():
             print(f"not-equal test failed on {attr_key}")
             assert not expected == actual
         attributes.update({attr_key: reset})
+
+    # In the set data test, we test for these cases.
+    # (1) x and y are non-empty array values, and res is positive (the most generic case)
+    # (2) x and y are non-empty array values, and res is 0 (will produce a msg that makes trivial clustering)
+    # (3) x and y are non-empty array values, and res is negative (will produce a ValueError,
+    # msg = please enter a non-negative res value)
+    # (4, 5) One of x and y is empty array, and res is positive
+    # (produce ValueError & msg "Sequences x and y must have the same length.", something like that)
+    # (6) Both x and y are empty array, and res is zero.
+
+@pytest.mark.parametrize(
+        "inputs, expected",
+        [
+            (
+                    # case (1)
+                    {
+                        "input_x": np.array([1, 2, 3]),
+                        "input_y": np.array([3, 2, 1]),
+                        "input_res": 4,
+                    },
+                    DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 4),
+            ),
+            (
+                    # case (6)
+                    {
+                        "input_x": np.array([]),
+                        "input_y": np.array([]),
+                        "input_res": 0,
+                    },
+                    DataClusters(np.array([]), np.array([]), 0),
+            ),
+        ],
+    )
+def test_set_data(inputs, expected):
+    actual = DataClusters(x=inputs["input_x"], y=inputs["input_y"], res=inputs["input_res"])
+    assert actual == expected
+
+
+@pytest.mark.parametrize(
+            "inputs, msg",
+            [
+                (
+                        # case (4)
+                        {
+                            "input_x": np.array([]),
+                            "input_y": np.array([3, 2]),
+                            "input_res": 4,
+                        },
+                        "Sequences x and y must have the same length.",
+                ),
+                (
+                        # case (5)
+                        {
+                            "input_x": np.array([1, 2]),
+                            "input_y": np.array([]),
+                            "input_res": 4,
+                        },
+                        "Sequences x and y must have the same length.",
+                ),
+                (
+                        # case (3)
+                        {
+                            "input_x": np.array([1]),
+                            "input_y": np.array([3]),
+                            "input_res": -1,
+                        },
+                        "Resolution res must be non-negative.",
+                ),
+                (
+                        # case (2)
+                        {
+                            "input_x": np.array([1, 2, 3]),
+                            "input_y": np.array([3, 2, 1]),
+                            "input_res": 0,
+                        },
+                        "Make trivial clustering, please make positive resolution.",
+                ),
+            ],
+        )
+def test_set_data_order_bad(inputs, msg):
+    with pytest.raises(ValueError, match=msg):
+        DataClusters(x=inputs["input_x"], y=inputs["input_y"], res=inputs["input_res"])
+

From 28c6ea7243019e8e83b589c7e0c686bad8f735fe Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 12 Aug 2024 03:24:05 +0000
Subject: [PATCH 02/14] [pre-commit.ci] auto fixes from pre-commit hooks

---
 src/diffpy/srmise/tests/test_dataclusters.py | 129 ++++++++++---------
 1 file changed, 65 insertions(+), 64 deletions(-)

diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
index eef300f..2a52ce6 100644
--- a/src/diffpy/srmise/tests/test_dataclusters.py
+++ b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -1,6 +1,7 @@
 from copy import copy
-import pytest
+
 import numpy as np
+import pytest
 
 from diffpy.srmise.dataclusters import DataClusters
 
@@ -42,76 +43,76 @@ def test___eq__():
     # (produce ValueError & msg "Sequences x and y must have the same length.", something like that)
     # (6) Both x and y are empty array, and res is zero.
 
+
 @pytest.mark.parametrize(
-        "inputs, expected",
-        [
-            (
-                    # case (1)
-                    {
-                        "input_x": np.array([1, 2, 3]),
-                        "input_y": np.array([3, 2, 1]),
-                        "input_res": 4,
-                    },
-                    DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 4),
-            ),
-            (
-                    # case (6)
-                    {
-                        "input_x": np.array([]),
-                        "input_y": np.array([]),
-                        "input_res": 0,
-                    },
-                    DataClusters(np.array([]), np.array([]), 0),
-            ),
-        ],
-    )
+    "inputs, expected",
+    [
+        (
+            # case (1)
+            {
+                "input_x": np.array([1, 2, 3]),
+                "input_y": np.array([3, 2, 1]),
+                "input_res": 4,
+            },
+            DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 4),
+        ),
+        (
+            # case (6)
+            {
+                "input_x": np.array([]),
+                "input_y": np.array([]),
+                "input_res": 0,
+            },
+            DataClusters(np.array([]), np.array([]), 0),
+        ),
+    ],
+)
 def test_set_data(inputs, expected):
     actual = DataClusters(x=inputs["input_x"], y=inputs["input_y"], res=inputs["input_res"])
     assert actual == expected
 
 
 @pytest.mark.parametrize(
-            "inputs, msg",
-            [
-                (
-                        # case (4)
-                        {
-                            "input_x": np.array([]),
-                            "input_y": np.array([3, 2]),
-                            "input_res": 4,
-                        },
-                        "Sequences x and y must have the same length.",
-                ),
-                (
-                        # case (5)
-                        {
-                            "input_x": np.array([1, 2]),
-                            "input_y": np.array([]),
-                            "input_res": 4,
-                        },
-                        "Sequences x and y must have the same length.",
-                ),
-                (
-                        # case (3)
-                        {
-                            "input_x": np.array([1]),
-                            "input_y": np.array([3]),
-                            "input_res": -1,
-                        },
-                        "Resolution res must be non-negative.",
-                ),
-                (
-                        # case (2)
-                        {
-                            "input_x": np.array([1, 2, 3]),
-                            "input_y": np.array([3, 2, 1]),
-                            "input_res": 0,
-                        },
-                        "Make trivial clustering, please make positive resolution.",
-                ),
-            ],
-        )
+    "inputs, msg",
+    [
+        (
+            # case (4)
+            {
+                "input_x": np.array([]),
+                "input_y": np.array([3, 2]),
+                "input_res": 4,
+            },
+            "Sequences x and y must have the same length.",
+        ),
+        (
+            # case (5)
+            {
+                "input_x": np.array([1, 2]),
+                "input_y": np.array([]),
+                "input_res": 4,
+            },
+            "Sequences x and y must have the same length.",
+        ),
+        (
+            # case (3)
+            {
+                "input_x": np.array([1]),
+                "input_y": np.array([3]),
+                "input_res": -1,
+            },
+            "Resolution res must be non-negative.",
+        ),
+        (
+            # case (2)
+            {
+                "input_x": np.array([1, 2, 3]),
+                "input_y": np.array([3, 2, 1]),
+                "input_res": 0,
+            },
+            "Make trivial clustering, please make positive resolution.",
+        ),
+    ],
+)
 def test_set_data_order_bad(inputs, msg):
     with pytest.raises(ValueError, match=msg):
         DataClusters(x=inputs["input_x"], y=inputs["input_y"], res=inputs["input_res"])
-

From ce1d97fec4a701d99653e3989e92f5eadca38e9e Mon Sep 17 00:00:00 2001
From: stevenhua0320 <r.hua@mail.utoronto.ca>
Date: Mon, 12 Aug 2024 11:35:11 +0800
Subject: [PATCH 03/14] change case in test__eq__ to be compatible with the
 behavior of setdata

---
 src/diffpy/srmise/tests/test_dataclusters.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
index 2a52ce6..14b301e 100644
--- a/src/diffpy/srmise/tests/test_dataclusters.py
+++ b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -16,8 +16,8 @@ def test_clear():
 
 
 def test___eq__():
-    actual = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 0)
-    expected = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 0)
+    actual = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 1)
+    expected = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 1)
     assert expected == actual
     attributes = vars(actual)
     for attr_key, attr_val in attributes.items():

From 0faa2ca9f7390569dcb43e6a8fe2fb23b8726186 Mon Sep 17 00:00:00 2001
From: stevenhua0320 <r.hua@mail.utoronto.ca>
Date: Tue, 13 Aug 2024 13:56:10 +0800
Subject: [PATCH 04/14] delete text and redundant tests

---
 src/diffpy/srmise/dataclusters.py            |  5 +-
 src/diffpy/srmise/tests/test_dataclusters.py | 64 ++++----------------
 2 files changed, 17 insertions(+), 52 deletions(-)

diff --git a/src/diffpy/srmise/dataclusters.py b/src/diffpy/srmise/dataclusters.py
index 69584fc..62429f6 100644
--- a/src/diffpy/srmise/dataclusters.py
+++ b/src/diffpy/srmise/dataclusters.py
@@ -135,7 +135,10 @@ def setdata(self, x, y, res):
         if len(x) != len(y):
             raise ValueError("Sequences x and y must have the same length.")
         if res < 0:
-            raise ValueError("Resolution res must be non-negative.")
+            raise ValueError(
+                "Resolution is the Determines how closely clusters are formed in the clustering algorithm. "
+                "Please set it to be non-negative."
+            )
         # Test for sorting?
         self.x = x
         self.y = y
diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
index 14b301e..b96eda4 100644
--- a/src/diffpy/srmise/tests/test_dataclusters.py
+++ b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -34,41 +34,22 @@ def test___eq__():
             assert not expected == actual
         attributes.update({attr_key: reset})
 
-    # In the set data test, we test for these cases.
-    # (1) x and y are non-empty array values, and res is positive (the most generic case)
-    # (2) x and y are non-empty array values, and res is 0 (will produce a msg that makes trivial clustering)
-    # (3) x and y are non-empty array values, and res is negative (will produce a ValueError,
-    # msg = please enter a non-negative res value)
-    # (4, 5) One of x and y is empty array, and res is positive
-    # (produce ValueError & msg "Sequences x and y must have the same length.", something like that)
-    # (6) Both x and y are empty array, and res is zero.
-
 
 @pytest.mark.parametrize(
     "inputs, expected",
     [
         (
-            # case (1)
             {
-                "input_x": np.array([1, 2, 3]),
-                "input_y": np.array([3, 2, 1]),
-                "input_res": 4,
+                "x": np.array([1, 2, 3]),
+                "y": np.array([3, 2, 1]),
+                "res": 4,
             },
             DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 4),
         ),
-        (
-            # case (6)
-            {
-                "input_x": np.array([]),
-                "input_y": np.array([]),
-                "input_res": 0,
-            },
-            DataClusters(np.array([]), np.array([]), 0),
-        ),
     ],
 )
 def test_set_data(inputs, expected):
-    actual = DataClusters(x=inputs["input_x"], y=inputs["input_y"], res=inputs["input_res"])
+    actual = DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"])
     assert actual == expected
 
 
@@ -76,43 +57,24 @@ def test_set_data(inputs, expected):
     "inputs, msg",
     [
         (
-            # case (4)
-            {
-                "input_x": np.array([]),
-                "input_y": np.array([3, 2]),
-                "input_res": 4,
-            },
-            "Sequences x and y must have the same length.",
-        ),
-        (
-            # case (5)
             {
-                "input_x": np.array([1, 2]),
-                "input_y": np.array([]),
-                "input_res": 4,
+                "x": np.array([1]),
+                "y": np.array([3, 2]),
+                "res": 4,
             },
             "Sequences x and y must have the same length.",
         ),
         (
-            # case (3)
-            {
-                "input_x": np.array([1]),
-                "input_y": np.array([3]),
-                "input_res": -1,
-            },
-            "Resolution res must be non-negative.",
-        ),
-        (
-            # case (2)
             {
-                "input_x": np.array([1, 2, 3]),
-                "input_y": np.array([3, 2, 1]),
-                "input_res": 0,
+                "x": np.array([1]),
+                "y": np.array([3]),
+                "res": -1,
             },
-            "Make trivial clustering, please make positive resolution.",
+            "Resolution is the Determines how closely clusters are formed in the clustering algorithm. "
+            "Please set it to be non-negative.",
         ),
     ],
 )
 def test_set_data_order_bad(inputs, msg):
     with pytest.raises(ValueError, match=msg):
-        DataClusters(x=inputs["input_x"], y=inputs["input_y"], res=inputs["input_res"])
+        DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"])

From 6a011a27370660118339a066877337e43f9cf223 Mon Sep 17 00:00:00 2001
From: Simon Billinge <sbillinge@users.noreply.github.com>
Date: Tue, 13 Aug 2024 03:11:32 -0400
Subject: [PATCH 05/14] tweaking error message in DataClusters

---
 src/diffpy/srmise/dataclusters.py | 73 ++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 31 deletions(-)

diff --git a/src/diffpy/srmise/dataclusters.py b/src/diffpy/srmise/dataclusters.py
index 62429f6..6789f48 100644
--- a/src/diffpy/srmise/dataclusters.py
+++ b/src/diffpy/srmise/dataclusters.py
@@ -22,41 +22,55 @@
 
 
 class DataClusters:
-    """Find clusters corresponding to peaks in numerical x-, y-value arrays.
+    """Find clusters corresponding to peaks in the PDF (y-array)
 
-    DataClusters determines which points, given a pair of x- and y-value
-    sequences, roughly correspond to which visible peaks in that data.  This
-    division is contiguous, with borders between clusters near relative
+    DataClusters determines which points in inter-atomic distane, r,
+    correspond to peaks in the PDF.  The division between clusters
+    is contiguous, with borders between clusters likely near relative
     minima in the data.
 
     Clusters are iteratively formed around points with the largest
-    y-coordinates.  New clusters are added only when the unclustered data
+    PDF values.  New clusters are added only when the unclustered data
     point under consideration is greater than a given distance (the
     'resolution') from the nearest existing cluster.
 
     Data members
-    x - sequence of x coordinates.
-    y - sequence of y values
-    res - clustering 'resolution'
-    data_order - array of x, y indices ordered by decreasing y
-    clusters - array of cluster ranges
-    current_idx - index of data_order currently considered
+    ------------
+    x : array
+      The array of r values.
+    y : sequence of y values
+      The array of PDF values, G(r)
+    res : int
+      The clustering resolution, i.e., the number of points another point has to 
+      be away from the center of an existing cluster to before a new cluster is
+      formed.  A value of zero allows every point to be cluster.
+    data_order : array
+      The array of x, y indices ordered by decreasing y
+    clusters : 
+      The array of cluster ranges
+    current_idx - int
+      The index of data_order currently considered
     """
 
     def __init__(self, x, y, res):
-        """Initializes the data to be clustered, and the 'resolution' to use.
+        """Constructor
 
         Parameters
-        x - numeric sequence of x-value sorted in ascending order
-        y - corresponding sequence of y-values
-        res - clustering 'resolution'
+        ----------
+        x : array
+          The array of r values.
+        y : sequence of y values
+          The array of PDF values, G(r)
+        res : int
+          The clustering resolution, i.e., the number of points another point has to 
+          be away from the center of an existing cluster to before a new cluster is
+          formed.  A value of zero allows every point to be cluster.
         """
         # Track internal state of clustering.
         self.INIT = 0
         self.READY = 1
         self.CLUSTERING = 2
         self.DONE = 3
-
         self.clear()
         self.setdata(x, y, res)
 
@@ -124,36 +138,33 @@ def setdata(self, x, y, res):
         """Assign data members for x- and y-coordinates, and resolution.
 
         Parameters
-        x - numeric sequence of x-value sorted in ascending order
-        y - corresponding sequence of y-values
-        res - clustering 'resolution'
+        ----------
+        x : array
+          The array of r values.
+        y : sequence of y values
+          The array of PDF values, G(r)
+        res : int
+          The clustering resolution, i.e., the number of points another point has to 
+          be away from the center of an existing cluster to before a new cluster is
+          formed.  A value of zero allows every point to be cluster.
         """
-        # Test for error conditions
-        # 1) Length mismatch
-        # 2) Bound errors for res
-        # 3) r isn't sorted?
         if len(x) != len(y):
             raise ValueError("Sequences x and y must have the same length.")
         if res < 0:
             raise ValueError(
-                "Resolution is the Determines how closely clusters are formed in the clustering algorithm. "
-                "Please set it to be non-negative."
+                "Value of resolution parameter is less than zero.  Please rerun specifying a non-negative res"
             )
-        # Test for sorting?
         self.x = x
         self.y = y
         self.res = res
-        if x.size > 0 and res == 0:
-            raise ValueError("Make trivial clustering, please make positive resolution.")
-        # If x sequence size is empty, set the object into Initialized state.
-        if x.size == 0 and res == 0:
+        if x.size == 0:
             self.data_order = np.array([])
             self.clusters = np.array([[]])
             self.current_idx = 0
             self.lastpoint_idx = None
             self.status = self.INIT
         else:
-            self.data_order = self.y.argsort()  # Defines order of clustering
+            self.data_order = self.y.argsort()
             self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]])
             self.current_idx = len(self.data_order) - 1
             self.lastpoint_idx = self.data_order[-1]

From 83eac4c507762b2bc0ccb784269a428c2cb762fd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 07:11:41 +0000
Subject: [PATCH 06/14] [pre-commit.ci] auto fixes from pre-commit hooks

---
 src/diffpy/srmise/dataclusters.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/diffpy/srmise/dataclusters.py b/src/diffpy/srmise/dataclusters.py
index 6789f48..f961492 100644
--- a/src/diffpy/srmise/dataclusters.py
+++ b/src/diffpy/srmise/dataclusters.py
@@ -41,12 +41,12 @@ class DataClusters:
     y : sequence of y values
       The array of PDF values, G(r)
     res : int
-      The clustering resolution, i.e., the number of points another point has to 
+      The clustering resolution, i.e., the number of points another point has to
       be away from the center of an existing cluster to before a new cluster is
       formed.  A value of zero allows every point to be cluster.
     data_order : array
       The array of x, y indices ordered by decreasing y
-    clusters : 
+    clusters :
       The array of cluster ranges
     current_idx - int
       The index of data_order currently considered
@@ -62,7 +62,7 @@ def __init__(self, x, y, res):
         y : sequence of y values
           The array of PDF values, G(r)
         res : int
-          The clustering resolution, i.e., the number of points another point has to 
+          The clustering resolution, i.e., the number of points another point has to
           be away from the center of an existing cluster to before a new cluster is
           formed.  A value of zero allows every point to be cluster.
         """
@@ -144,7 +144,7 @@ def setdata(self, x, y, res):
         y : sequence of y values
           The array of PDF values, G(r)
         res : int
-          The clustering resolution, i.e., the number of points another point has to 
+          The clustering resolution, i.e., the number of points another point has to
           be away from the center of an existing cluster to before a new cluster is
           formed.  A value of zero allows every point to be cluster.
         """

From 9f17d258207136a152d0dd57723d303b6dbd2578 Mon Sep 17 00:00:00 2001
From: stevenhua0320 <r.hua@mail.utoronto.ca>
Date: Tue, 13 Aug 2024 16:06:00 +0800
Subject: [PATCH 07/14] update test for checking implicit attributes for
 setdata function

---
 src/diffpy/srmise/tests/test_dataclusters.py | 26 +++++++++++++++++---
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
index b96eda4..f2baf24 100644
--- a/src/diffpy/srmise/tests/test_dataclusters.py
+++ b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -44,13 +44,32 @@ def test___eq__():
                 "y": np.array([3, 2, 1]),
                 "res": 4,
             },
-            DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 4),
+            {
+                "x": np.array([1, 2, 3]),
+                "y": np.array([3, 2, 1]),
+                "res": 4,
+                "data_order": [2, 1, 0],
+                "clusters": np.array([[0, 0]]),
+                "current_idx": 2,
+                "lastpoint_idx": 0,
+                "INIT": 0,
+                "READY": 1,
+                "CLUSTERING": 2,
+                "DONE": 3,
+                "lastcluster_idx": None,
+                "status": 1,
+            }
         ),
     ],
 )
 def test_set_data(inputs, expected):
     actual = DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"])
-    assert actual == expected
+    attributes = vars(actual)
+    for attr_key, attr_val in attributes.items():
+        if isinstance(attr_val, np.ndarray):
+            assert np.array_equal(attr_val, expected[attr_key])
+        else:
+            assert attr_val == expected[attr_key]
 
 
 @pytest.mark.parametrize(
@@ -70,8 +89,7 @@ def test_set_data(inputs, expected):
                 "y": np.array([3]),
                 "res": -1,
             },
-            "Resolution is the Determines how closely clusters are formed in the clustering algorithm. "
-            "Please set it to be non-negative.",
+            "Value of resolution parameter is less than zero.  Please rerun specifying a non-negative res",
         ),
     ],
 )

From 9d84a9f6b136bfec7fb8f66c90143b14a35abfda Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 08:06:19 +0000
Subject: [PATCH 08/14] [pre-commit.ci] auto fixes from pre-commit hooks

---
 src/diffpy/srmise/tests/test_dataclusters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
index f2baf24..2b9dea4 100644
--- a/src/diffpy/srmise/tests/test_dataclusters.py
+++ b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -58,7 +58,7 @@ def test___eq__():
                 "DONE": 3,
                 "lastcluster_idx": None,
                 "status": 1,
-            }
+            },
         ),
     ],
 )

From 321f47dd48e9a5006877250ec017e45ba83a6b29 Mon Sep 17 00:00:00 2001
From: stevenhua0320 <r.hua@mail.utoronto.ca>
Date: Tue, 13 Aug 2024 16:17:12 +0800
Subject: [PATCH 09/14] update test for setdata function

---
 src/diffpy/srmise/tests/test_dataclusters.py | 27 ++++----------------
 1 file changed, 5 insertions(+), 22 deletions(-)

diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
index f2baf24..7ea5306 100644
--- a/src/diffpy/srmise/tests/test_dataclusters.py
+++ b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -44,32 +44,15 @@ def test___eq__():
                 "y": np.array([3, 2, 1]),
                 "res": 4,
             },
-            {
-                "x": np.array([1, 2, 3]),
-                "y": np.array([3, 2, 1]),
-                "res": 4,
-                "data_order": [2, 1, 0],
-                "clusters": np.array([[0, 0]]),
-                "current_idx": 2,
-                "lastpoint_idx": 0,
-                "INIT": 0,
-                "READY": 1,
-                "CLUSTERING": 2,
-                "DONE": 3,
-                "lastcluster_idx": None,
-                "status": 1,
-            }
+            DataClusters(x=np.array([1, 2, 3]), y=np.array([3, 2, 1]), res=4),
         ),
     ],
 )
 def test_set_data(inputs, expected):
-    actual = DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"])
-    attributes = vars(actual)
-    for attr_key, attr_val in attributes.items():
-        if isinstance(attr_val, np.ndarray):
-            assert np.array_equal(attr_val, expected[attr_key])
-        else:
-            assert attr_val == expected[attr_key]
+    actual = DataClusters(x=np.array([]), y=np.array([]), res=0)
+    actual.setdata(x=inputs["x"], y=inputs["y"], res=inputs["res"])
+    assert expected == actual
+
 
 
 @pytest.mark.parametrize(

From 3ef8a5b8af0bdedc00a48078c32ba1404bffdaaa Mon Sep 17 00:00:00 2001
From: stevenhua0320 <r.hua@mail.utoronto.ca>
Date: Tue, 13 Aug 2024 16:22:41 +0800
Subject: [PATCH 10/14] update setdata test to right format.

---
 src/diffpy/srmise/tests/test_dataclusters.py | 26 +++-----------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
index 2b9dea4..4b17a09 100644
--- a/src/diffpy/srmise/tests/test_dataclusters.py
+++ b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -44,32 +44,14 @@ def test___eq__():
                 "y": np.array([3, 2, 1]),
                 "res": 4,
             },
-            {
-                "x": np.array([1, 2, 3]),
-                "y": np.array([3, 2, 1]),
-                "res": 4,
-                "data_order": [2, 1, 0],
-                "clusters": np.array([[0, 0]]),
-                "current_idx": 2,
-                "lastpoint_idx": 0,
-                "INIT": 0,
-                "READY": 1,
-                "CLUSTERING": 2,
-                "DONE": 3,
-                "lastcluster_idx": None,
-                "status": 1,
-            },
+            DataClusters(x=np.array([1, 2, 3]), y=np.array([3, 2, 1]), res=4),
         ),
     ],
 )
 def test_set_data(inputs, expected):
-    actual = DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"])
-    attributes = vars(actual)
-    for attr_key, attr_val in attributes.items():
-        if isinstance(attr_val, np.ndarray):
-            assert np.array_equal(attr_val, expected[attr_key])
-        else:
-            assert attr_val == expected[attr_key]
+    actual = DataClusters(x=np.array([]), y=np.array([]), res=0)
+    actual.setdata(x=inputs["x"], y=inputs["y"], res=inputs["res"])
+    assert expected == actual
 
 
 @pytest.mark.parametrize(

From cbe85f10c6a9fa138c88aa7962e7716e89e21104 Mon Sep 17 00:00:00 2001
From: stevenhua0320 <r.hua@mail.utoronto.ca>
Date: Tue, 13 Aug 2024 22:55:22 +0800
Subject: [PATCH 11/14] update to constructor test & make setdata clear
 function private

---
 src/diffpy/srmise/dataclusters.py            |  8 ++--
 src/diffpy/srmise/tests/test_dataclusters.py | 39 +++++++++++++-------
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/src/diffpy/srmise/dataclusters.py b/src/diffpy/srmise/dataclusters.py
index f961492..b881f17 100644
--- a/src/diffpy/srmise/dataclusters.py
+++ b/src/diffpy/srmise/dataclusters.py
@@ -71,8 +71,8 @@ def __init__(self, x, y, res):
         self.READY = 1
         self.CLUSTERING = 2
         self.DONE = 3
-        self.clear()
-        self.setdata(x, y, res)
+        self._clear()
+        self._setdata(x, y, res)
 
         return
 
@@ -101,7 +101,7 @@ def __eq__(self, other):
             and self.DONE == other.DONE
         )
 
-    def clear(self):
+    def _clear(self):
         """
         Clear all data and reset the cluster object to a transient initial state.
 
@@ -134,7 +134,7 @@ def reset_clusters(self):
             self.status = self.READY
         return
 
-    def setdata(self, x, y, res):
+    def _setdata(self, x, y, res):
         """Assign data members for x- and y-coordinates, and resolution.
 
         Parameters
diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
index 4b17a09..a6a81d8 100644
--- a/src/diffpy/srmise/tests/test_dataclusters.py
+++ b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -6,15 +6,6 @@
 from diffpy.srmise.dataclusters import DataClusters
 
 
-def test_clear():
-    # Initialize DataClusters with input parameters
-    actual = DataClusters(x=np.array([1, 2, 3]), y=np.array([3, 2, 1]), res=4)
-    expected = DataClusters(x=np.array([]), y=np.array([]), res=0)
-    # Perform the clear operation
-    actual.clear()
-    assert actual == expected
-
-
 def test___eq__():
     actual = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 1)
     expected = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 1)
@@ -44,14 +35,34 @@ def test___eq__():
                 "y": np.array([3, 2, 1]),
                 "res": 4,
             },
-            DataClusters(x=np.array([1, 2, 3]), y=np.array([3, 2, 1]), res=4),
+            {
+                "x": np.array([1, 2, 3]),
+                "y": np.array([3, 2, 1]),
+                "res": 4,
+                "data_order": [2, 1, 0],
+                "clusters": np.array([[0, 0]]),
+                "current_idx": 2,
+                "lastpoint_idx": 0,
+                "INIT": 0,
+                "READY": 1,
+                "CLUSTERING": 2,
+                "DONE": 3,
+                "lastcluster_idx": None,
+                "status": 1,
+            },
         ),
     ],
 )
-def test_set_data(inputs, expected):
-    actual = DataClusters(x=np.array([]), y=np.array([]), res=0)
-    actual.setdata(x=inputs["x"], y=inputs["y"], res=inputs["res"])
-    assert expected == actual
+def test_DataClusters_constructor(inputs, expected):
+    actual = DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"])
+    attributes = vars(actual)
+    for attr_key, attr_val in attributes.items():
+        if isinstance(attr_val, np.ndarray):
+            assert np.array_equal(attr_val, expected[attr_key])
+        else:
+            assert attr_val == expected[attr_key]
+    actual._clear()
+    assert actual == DataClusters(x=np.array([]), y=np.array([]), res=0)
 
 
 @pytest.mark.parametrize(

From 759a7e80f0550b7697f18e51add1ca3ea1d15634 Mon Sep 17 00:00:00 2001
From: Simon Billinge <sbillinge@users.noreply.github.com>
Date: Tue, 13 Aug 2024 11:52:28 -0400
Subject: [PATCH 12/14] final tweaks to tests by Simon

---
 src/diffpy/srmise/tests/test_dataclusters.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
index a6a81d8..0e9b860 100644
--- a/src/diffpy/srmise/tests/test_dataclusters.py
+++ b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -55,14 +55,12 @@ def test___eq__():
 )
 def test_DataClusters_constructor(inputs, expected):
     actual = DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"])
-    attributes = vars(actual)
-    for attr_key, attr_val in attributes.items():
+    actual_attributes = vars(actual)
+    for attr_key, atual_attr_val in actual_attributes.items():
         if isinstance(attr_val, np.ndarray):
-            assert np.array_equal(attr_val, expected[attr_key])
+            assert np.array_equal(actual_attr_val, expected[attr_key])
         else:
-            assert attr_val == expected[attr_key]
-    actual._clear()
-    assert actual == DataClusters(x=np.array([]), y=np.array([]), res=0)
+            assert actual_attr_val == expected[attr_key]
 
 
 @pytest.mark.parametrize(

From 6f29c6057d031d2d24eeefc823a3942431f834f7 Mon Sep 17 00:00:00 2001
From: Simon Billinge <sbillinge@users.noreply.github.com>
Date: Tue, 13 Aug 2024 11:55:17 -0400
Subject: [PATCH 13/14] fix actual_attribute typo

---
 src/diffpy/srmise/tests/test_dataclusters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
index 0e9b860..65f322b 100644
--- a/src/diffpy/srmise/tests/test_dataclusters.py
+++ b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -56,7 +56,7 @@ def test___eq__():
 def test_DataClusters_constructor(inputs, expected):
     actual = DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"])
     actual_attributes = vars(actual)
-    for attr_key, atual_attr_val in actual_attributes.items():
+    for attr_key, actual_attr_val in actual_attributes.items():
         if isinstance(attr_val, np.ndarray):
             assert np.array_equal(actual_attr_val, expected[attr_key])
         else:

From d102f5f7b4d5568304b4258307c0ccda5d9caabc Mon Sep 17 00:00:00 2001
From: Simon Billinge <sbillinge@users.noreply.github.com>
Date: Tue, 13 Aug 2024 11:56:55 -0400
Subject: [PATCH 14/14] final refactor of actual_attr

---
 src/diffpy/srmise/tests/test_dataclusters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
index 65f322b..c9fa8a7 100644
--- a/src/diffpy/srmise/tests/test_dataclusters.py
+++ b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -57,7 +57,7 @@ def test_DataClusters_constructor(inputs, expected):
     actual = DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"])
     actual_attributes = vars(actual)
     for attr_key, actual_attr_val in actual_attributes.items():
-        if isinstance(attr_val, np.ndarray):
+        if isinstance(actual_attr_val, np.ndarray):
             assert np.array_equal(actual_attr_val, expected[attr_key])
         else:
             assert actual_attr_val == expected[attr_key]