Skip to content

Commit d1bf3d4

Browse files
stevenhua0320pre-commit-ci[bot]sbillinge
authored
Add set data test cases (#61)
* add test cases to test files and make edition to make sure the behavior of the test pass. * [pre-commit.ci] auto fixes from pre-commit hooks * change case in test__eq__ to be compatible with the behavior of setdata * delete text and redundant tests * tweaking error message in DataClusters * [pre-commit.ci] auto fixes from pre-commit hooks * update test for checking implicit attributes for setdata function * [pre-commit.ci] auto fixes from pre-commit hooks * update test for setdata function * update setdata test to right format. * update to constructor test & make setdata clear function private * final tweaks to tests by Simon * fix actual_attribute typo * final refactor of actual_attr --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Billinge <[email protected]>
1 parent 5be09d5 commit d1bf3d4

File tree

2 files changed

+114
-43
lines changed

2 files changed

+114
-43
lines changed

src/diffpy/srmise/dataclusters.py

Lines changed: 48 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -22,43 +22,57 @@
2222

2323

2424
class DataClusters:
25-
"""Find clusters corresponding to peaks in numerical x-, y-value arrays.
25+
"""Find clusters corresponding to peaks in the PDF (y-array)
2626
27-
DataClusters determines which points, given a pair of x- and y-value
28-
sequences, roughly correspond to which visible peaks in that data. This
29-
division is contiguous, with borders between clusters near relative
27+
DataClusters determines which points in inter-atomic distane, r,
28+
correspond to peaks in the PDF. The division between clusters
29+
is contiguous, with borders between clusters likely near relative
3030
minima in the data.
3131
3232
Clusters are iteratively formed around points with the largest
33-
y-coordinates. New clusters are added only when the unclustered data
33+
PDF values. New clusters are added only when the unclustered data
3434
point under consideration is greater than a given distance (the
3535
'resolution') from the nearest existing cluster.
3636
3737
Data members
38-
x - sequence of x coordinates.
39-
y - sequence of y values
40-
res - clustering 'resolution'
41-
data_order - array of x, y indices ordered by decreasing y
42-
clusters - array of cluster ranges
43-
current_idx - index of data_order currently considered
38+
------------
39+
x : array
40+
The array of r values.
41+
y : sequence of y values
42+
The array of PDF values, G(r)
43+
res : int
44+
The clustering resolution, i.e., the number of points another point has to
45+
be away from the center of an existing cluster to before a new cluster is
46+
formed. A value of zero allows every point to be cluster.
47+
data_order : array
48+
The array of x, y indices ordered by decreasing y
49+
clusters :
50+
The array of cluster ranges
51+
current_idx - int
52+
The index of data_order currently considered
4453
"""
4554

4655
def __init__(self, x, y, res):
47-
"""Initializes the data to be clustered, and the 'resolution' to use.
56+
"""Constructor
4857
4958
Parameters
50-
x - numeric sequence of x-value sorted in ascending order
51-
y - corresponding sequence of y-values
52-
res - clustering 'resolution'
59+
----------
60+
x : array
61+
The array of r values.
62+
y : sequence of y values
63+
The array of PDF values, G(r)
64+
res : int
65+
The clustering resolution, i.e., the number of points another point has to
66+
be away from the center of an existing cluster to before a new cluster is
67+
formed. A value of zero allows every point to be cluster.
5368
"""
5469
# Track internal state of clustering.
5570
self.INIT = 0
5671
self.READY = 1
5772
self.CLUSTERING = 2
5873
self.DONE = 3
59-
60-
self.clear()
61-
self.setdata(x, y, res)
74+
self._clear()
75+
self._setdata(x, y, res)
6276

6377
return
6478

@@ -87,7 +101,7 @@ def __eq__(self, other):
87101
and self.DONE == other.DONE
88102
)
89103

90-
def clear(self):
104+
def _clear(self):
91105
"""
92106
Clear all data and reset the cluster object to a transient initial state.
93107
@@ -120,35 +134,37 @@ def reset_clusters(self):
120134
self.status = self.READY
121135
return
122136

123-
def setdata(self, x, y, res):
137+
def _setdata(self, x, y, res):
124138
"""Assign data members for x- and y-coordinates, and resolution.
125139
126140
Parameters
127-
x - numeric sequence of x-value sorted in ascending order
128-
y - corresponding sequence of y-values
129-
res - clustering 'resolution'
141+
----------
142+
x : array
143+
The array of r values.
144+
y : sequence of y values
145+
The array of PDF values, G(r)
146+
res : int
147+
The clustering resolution, i.e., the number of points another point has to
148+
be away from the center of an existing cluster to before a new cluster is
149+
formed. A value of zero allows every point to be cluster.
130150
"""
131-
# Test for error conditions
132-
# 1) Length mismatch
133-
# 2) Bound errors for res
134-
# 3) r isn't sorted?
135151
if len(x) != len(y):
136152
raise ValueError("Sequences x and y must have the same length.")
137153
if res < 0:
138-
raise ValueError("Resolution res must be non-negative.")
139-
# Test for sorting?
154+
raise ValueError(
155+
"Value of resolution parameter is less than zero. Please rerun specifying a non-negative res"
156+
)
140157
self.x = x
141158
self.y = y
142159
self.res = res
143-
# If x sequence size is empty, set the object into Initialized state.
144-
if x.size == 0 and res == 0:
160+
if x.size == 0:
145161
self.data_order = np.array([])
146162
self.clusters = np.array([[]])
147163
self.current_idx = 0
148164
self.lastpoint_idx = None
149165
self.status = self.INIT
150166
else:
151-
self.data_order = self.y.argsort() # Defines order of clustering
167+
self.data_order = self.y.argsort()
152168
self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]])
153169
self.current_idx = len(self.data_order) - 1
154170
self.lastpoint_idx = self.data_order[-1]
Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,14 @@
11
from copy import copy
22

33
import numpy as np
4+
import pytest
45

56
from diffpy.srmise.dataclusters import DataClusters
67

78

8-
def test_clear():
9-
# Initialize DataClusters with input parameters
10-
actual = DataClusters(x=np.array([1, 2, 3]), y=np.array([3, 2, 1]), res=4)
11-
expected = DataClusters(x=np.array([]), y=np.array([]), res=0)
12-
# Perform the clear operation
13-
actual.clear()
14-
assert actual == expected
15-
16-
179
def test___eq__():
18-
actual = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 0)
19-
expected = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 0)
10+
actual = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 1)
11+
expected = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 1)
2012
assert expected == actual
2113
attributes = vars(actual)
2214
for attr_key, attr_val in attributes.items():
@@ -32,3 +24,66 @@ def test___eq__():
3224
print(f"not-equal test failed on {attr_key}")
3325
assert not expected == actual
3426
attributes.update({attr_key: reset})
27+
28+
29+
@pytest.mark.parametrize(
30+
"inputs, expected",
31+
[
32+
(
33+
{
34+
"x": np.array([1, 2, 3]),
35+
"y": np.array([3, 2, 1]),
36+
"res": 4,
37+
},
38+
{
39+
"x": np.array([1, 2, 3]),
40+
"y": np.array([3, 2, 1]),
41+
"res": 4,
42+
"data_order": [2, 1, 0],
43+
"clusters": np.array([[0, 0]]),
44+
"current_idx": 2,
45+
"lastpoint_idx": 0,
46+
"INIT": 0,
47+
"READY": 1,
48+
"CLUSTERING": 2,
49+
"DONE": 3,
50+
"lastcluster_idx": None,
51+
"status": 1,
52+
},
53+
),
54+
],
55+
)
56+
def test_DataClusters_constructor(inputs, expected):
57+
actual = DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"])
58+
actual_attributes = vars(actual)
59+
for attr_key, actual_attr_val in actual_attributes.items():
60+
if isinstance(actual_attr_val, np.ndarray):
61+
assert np.array_equal(actual_attr_val, expected[attr_key])
62+
else:
63+
assert actual_attr_val == expected[attr_key]
64+
65+
66+
@pytest.mark.parametrize(
67+
"inputs, msg",
68+
[
69+
(
70+
{
71+
"x": np.array([1]),
72+
"y": np.array([3, 2]),
73+
"res": 4,
74+
},
75+
"Sequences x and y must have the same length.",
76+
),
77+
(
78+
{
79+
"x": np.array([1]),
80+
"y": np.array([3]),
81+
"res": -1,
82+
},
83+
"Value of resolution parameter is less than zero. Please rerun specifying a non-negative res",
84+
),
85+
],
86+
)
87+
def test_set_data_order_bad(inputs, msg):
88+
with pytest.raises(ValueError, match=msg):
89+
DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"])

0 commit comments

Comments
 (0)