add test for dataclusters (#54)

stevenhua0320 · web-flow · commit 89c8c0becaf9 · 2024-08-10T12:25:44.000-04:00
* add test for dataclusters

* define eq method in dataclusters.py

* change parametrization form

* add one more case and change reference name to actual

* delete comment

* add two more tests for DataClusters class function.

* change in docstring for clearer explanation for clear method, remove duplicated case for testing behavior, remove other tests.

* change clear method docstring into numpydoc format. Delete dtype for numpy array.

* remove block

* Make edition to condition on res, refactor for setdata to make behavior of the test passed.

* change condition on res

* add condition on x and res are incompatible, update test.

* revert change in setdata method.
diff --git a/src/diffpy/srmise/dataclusters.py b/src/diffpy/srmise/dataclusters.py
@@ -68,12 +68,36 @@ def __init__(self, x, y, res):
     def __iter__(self):
         return self
 
+    def __eq__(self, other):
+        if not isinstance(other, DataClusters):
+            return False
+        return (
+            np.array_equal(self.x, other.x)
+            and np.array_equal(self.y, other.y)
+            and np.array_equal(self.data_order, other.data_order)
+            and np.array_equal(self.clusters, other.clusters)
+            and self.res == other.res
+            and self.current_idx == other.current_idx
+            and self.lastcluster_idx == other.lastcluster_idx
+            and self.lastpoint_idx == other.lastpoint_idx
+            and self.status == other.status
+        )
+
     def clear(self):
-        """Clear all members, including user data."""
+        """
+        Clear all data and reset the cluster object to a transient initial state.
+
+        The purpose of this method is to provide a clean state before creating new clustering operations.
+        The object is updated in-place and no new instance is returned.
+
+        Returns
+        -------
+        None
+        """
         self.x = np.array([])
         self.y = np.array([])
-        self.data_order = np.array([], dtype=np.int32)
-        self.clusters = np.array([[]], dtype=np.int32)
+        self.data_order = np.array([])
+        self.clusters = np.array([[]])
         self.res = 0
         self.current_idx = 0
         self.lastcluster_idx = None
@@ -106,21 +130,26 @@ def setdata(self, x, y, res):
         # 3) r isn't sorted?
         if len(x) != len(y):
             raise ValueError("Sequences x and y must have the same length.")
-        if res <= 0:
-            raise ValueError("Resolution res must be greater than 0.")
+        if res < 0:
+            raise ValueError("Resolution res must be non-negative.")
         # Test for sorting?
-
         self.x = x
         self.y = y
         self.res = res
-
-        self.data_order = self.y.argsort()  # Defines order of clustering
-        self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]])
-        self.current_idx = len(self.data_order) - 1
-        self.lastcluster_idx = 0
-        self.lastpoint_idx = self.data_order[-1]
-
-        self.status = self.READY
+        # If x sequence size is empty, set the object into Initialized state.
+        if x.size == 0 and res == 0:
+            self.data_order = np.array([])
+            self.clusters = np.array([[]])
+            self.current_idx = 0
+            self.lastpoint_idx = None
+            self.status = self.INIT
+        else:
+            self.data_order = self.y.argsort()  # Defines order of clustering
+            self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]])
+            self.current_idx = len(self.data_order) - 1
+            self.lastpoint_idx = self.data_order[-1]
+            self.status = self.READY
+        self.lastcluster_idx = None
         return
 
     def next(self):
diff --git a/src/diffpy/srmise/pdfpeakextraction.py b/src/diffpy/srmise/pdfpeakextraction.py
@@ -119,7 +119,7 @@ def setvars(self, quiet=False, **kwds):
         quiet: [False] Log changes quietly.
 
         Keywords
-        cres: The clustering resolution, must be > 0.
+        cres: The clustering resolution, must be >= 0.
         effective_dy: The uncertainties actually used during extraction
         dg: Alias for effective_dy
         pf: Sequence of PeakFunctionBase subclass instances.
diff --git a/src/diffpy/srmise/tests/test_dataclusters.py b/src/diffpy/srmise/tests/test_dataclusters.py
@@ -0,0 +1,12 @@
+import numpy as np
+
+from diffpy.srmise.dataclusters import DataClusters
+
+
+def test_clear():
+    # Initialize DataClusters with input parameters
+    actual = DataClusters(x=np.array([1, 2, 3]), y=np.array([3, 2, 1]), res=4)
+    expected = DataClusters(x=np.array([]), y=np.array([]), res=0)
+    # Perform the clear operation
+    actual.clear()
+    assert actual == expected