|
22 | 22 |
|
23 | 23 |
|
24 | 24 | class DataClusters:
|
25 |
| - """Find clusters corresponding to peaks in numerical x-, y-value arrays. |
| 25 | + """Find clusters corresponding to peaks in the PDF (y-array) |
26 | 26 |
|
27 |
| - DataClusters determines which points, given a pair of x- and y-value |
28 |
| - sequences, roughly correspond to which visible peaks in that data. This |
29 |
| - division is contiguous, with borders between clusters near relative |
| 27 | + DataClusters determines which points in inter-atomic distane, r, |
| 28 | + correspond to peaks in the PDF. The division between clusters |
| 29 | + is contiguous, with borders between clusters likely near relative |
30 | 30 | minima in the data.
|
31 | 31 |
|
32 | 32 | Clusters are iteratively formed around points with the largest
|
33 |
| - y-coordinates. New clusters are added only when the unclustered data |
| 33 | + PDF values. New clusters are added only when the unclustered data |
34 | 34 | point under consideration is greater than a given distance (the
|
35 | 35 | 'resolution') from the nearest existing cluster.
|
36 | 36 |
|
37 | 37 | Data members
|
38 |
| - x - sequence of x coordinates. |
39 |
| - y - sequence of y values |
40 |
| - res - clustering 'resolution' |
41 |
| - data_order - array of x, y indices ordered by decreasing y |
42 |
| - clusters - array of cluster ranges |
43 |
| - current_idx - index of data_order currently considered |
| 38 | + ------------ |
| 39 | + x : array |
| 40 | + The array of r values. |
| 41 | + y : sequence of y values |
| 42 | + The array of PDF values, G(r) |
| 43 | + res : int |
| 44 | + The clustering resolution, i.e., the number of points another point has to |
| 45 | + be away from the center of an existing cluster to before a new cluster is |
| 46 | + formed. A value of zero allows every point to be cluster. |
| 47 | + data_order : array |
| 48 | + The array of x, y indices ordered by decreasing y |
| 49 | + clusters : |
| 50 | + The array of cluster ranges |
| 51 | + current_idx - int |
| 52 | + The index of data_order currently considered |
44 | 53 | """
|
45 | 54 |
|
46 | 55 | def __init__(self, x, y, res):
|
47 |
| - """Initializes the data to be clustered, and the 'resolution' to use. |
| 56 | + """Constructor |
48 | 57 |
|
49 | 58 | Parameters
|
50 |
| - x - numeric sequence of x-value sorted in ascending order |
51 |
| - y - corresponding sequence of y-values |
52 |
| - res - clustering 'resolution' |
| 59 | + ---------- |
| 60 | + x : array |
| 61 | + The array of r values. |
| 62 | + y : sequence of y values |
| 63 | + The array of PDF values, G(r) |
| 64 | + res : int |
| 65 | + The clustering resolution, i.e., the number of points another point has to |
| 66 | + be away from the center of an existing cluster to before a new cluster is |
| 67 | + formed. A value of zero allows every point to be cluster. |
53 | 68 | """
|
54 | 69 | # Track internal state of clustering.
|
55 | 70 | self.INIT = 0
|
56 | 71 | self.READY = 1
|
57 | 72 | self.CLUSTERING = 2
|
58 | 73 | self.DONE = 3
|
59 |
| - |
60 |
| - self.clear() |
61 |
| - self.setdata(x, y, res) |
| 74 | + self._clear() |
| 75 | + self._setdata(x, y, res) |
62 | 76 |
|
63 | 77 | return
|
64 | 78 |
|
@@ -87,7 +101,7 @@ def __eq__(self, other):
|
87 | 101 | and self.DONE == other.DONE
|
88 | 102 | )
|
89 | 103 |
|
90 |
| - def clear(self): |
| 104 | + def _clear(self): |
91 | 105 | """
|
92 | 106 | Clear all data and reset the cluster object to a transient initial state.
|
93 | 107 |
|
@@ -120,35 +134,37 @@ def reset_clusters(self):
|
120 | 134 | self.status = self.READY
|
121 | 135 | return
|
122 | 136 |
|
123 |
| - def setdata(self, x, y, res): |
| 137 | + def _setdata(self, x, y, res): |
124 | 138 | """Assign data members for x- and y-coordinates, and resolution.
|
125 | 139 |
|
126 | 140 | Parameters
|
127 |
| - x - numeric sequence of x-value sorted in ascending order |
128 |
| - y - corresponding sequence of y-values |
129 |
| - res - clustering 'resolution' |
| 141 | + ---------- |
| 142 | + x : array |
| 143 | + The array of r values. |
| 144 | + y : sequence of y values |
| 145 | + The array of PDF values, G(r) |
| 146 | + res : int |
| 147 | + The clustering resolution, i.e., the number of points another point has to |
| 148 | + be away from the center of an existing cluster to before a new cluster is |
| 149 | + formed. A value of zero allows every point to be cluster. |
130 | 150 | """
|
131 |
| - # Test for error conditions |
132 |
| - # 1) Length mismatch |
133 |
| - # 2) Bound errors for res |
134 |
| - # 3) r isn't sorted? |
135 | 151 | if len(x) != len(y):
|
136 | 152 | raise ValueError("Sequences x and y must have the same length.")
|
137 | 153 | if res < 0:
|
138 |
| - raise ValueError("Resolution res must be non-negative.") |
139 |
| - # Test for sorting? |
| 154 | + raise ValueError( |
| 155 | + "Value of resolution parameter is less than zero. Please rerun specifying a non-negative res" |
| 156 | + ) |
140 | 157 | self.x = x
|
141 | 158 | self.y = y
|
142 | 159 | self.res = res
|
143 |
| - # If x sequence size is empty, set the object into Initialized state. |
144 |
| - if x.size == 0 and res == 0: |
| 160 | + if x.size == 0: |
145 | 161 | self.data_order = np.array([])
|
146 | 162 | self.clusters = np.array([[]])
|
147 | 163 | self.current_idx = 0
|
148 | 164 | self.lastpoint_idx = None
|
149 | 165 | self.status = self.INIT
|
150 | 166 | else:
|
151 |
| - self.data_order = self.y.argsort() # Defines order of clustering |
| 167 | + self.data_order = self.y.argsort() |
152 | 168 | self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]])
|
153 | 169 | self.current_idx = len(self.data_order) - 1
|
154 | 170 | self.lastpoint_idx = self.data_order[-1]
|
|
0 commit comments