Skip to content

Add set data test cases #61

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 44 additions & 28 deletions src/diffpy/srmise/dataclusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,41 +22,55 @@


class DataClusters:
"""Find clusters corresponding to peaks in numerical x-, y-value arrays.
"""Find clusters corresponding to peaks in the PDF (y-array)

DataClusters determines which points, given a pair of x- and y-value
sequences, roughly correspond to which visible peaks in that data. This
division is contiguous, with borders between clusters near relative
DataClusters determines which points in inter-atomic distane, r,
correspond to peaks in the PDF. The division between clusters
is contiguous, with borders between clusters likely near relative
minima in the data.

Clusters are iteratively formed around points with the largest
y-coordinates. New clusters are added only when the unclustered data
PDF values. New clusters are added only when the unclustered data
point under consideration is greater than a given distance (the
'resolution') from the nearest existing cluster.

Data members
x - sequence of x coordinates.
y - sequence of y values
res - clustering 'resolution'
data_order - array of x, y indices ordered by decreasing y
clusters - array of cluster ranges
current_idx - index of data_order currently considered
------------
x : array
The array of r values.
y : sequence of y values
The array of PDF values, G(r)
res : int
The clustering resolution, i.e., the number of points another point has to
be away from the center of an existing cluster to before a new cluster is
formed. A value of zero allows every point to be cluster.
data_order : array
The array of x, y indices ordered by decreasing y
clusters :
The array of cluster ranges
current_idx - int
The index of data_order currently considered
"""

def __init__(self, x, y, res):
"""Initializes the data to be clustered, and the 'resolution' to use.
"""Constructor

Parameters
x - numeric sequence of x-value sorted in ascending order
y - corresponding sequence of y-values
res - clustering 'resolution'
----------
x : array
The array of r values.
y : sequence of y values
The array of PDF values, G(r)
res : int
The clustering resolution, i.e., the number of points another point has to
be away from the center of an existing cluster to before a new cluster is
formed. A value of zero allows every point to be cluster.
"""
# Track internal state of clustering.
self.INIT = 0
self.READY = 1
self.CLUSTERING = 2
self.DONE = 3

self.clear()
self.setdata(x, y, res)

Expand Down Expand Up @@ -124,31 +138,33 @@ def setdata(self, x, y, res):
"""Assign data members for x- and y-coordinates, and resolution.

Parameters
x - numeric sequence of x-value sorted in ascending order
y - corresponding sequence of y-values
res - clustering 'resolution'
----------
x : array
The array of r values.
y : sequence of y values
The array of PDF values, G(r)
res : int
The clustering resolution, i.e., the number of points another point has to
be away from the center of an existing cluster to before a new cluster is
formed. A value of zero allows every point to be cluster.
"""
# Test for error conditions
# 1) Length mismatch
# 2) Bound errors for res
# 3) r isn't sorted?
if len(x) != len(y):
raise ValueError("Sequences x and y must have the same length.")
if res < 0:
raise ValueError("Resolution res must be non-negative.")
# Test for sorting?
raise ValueError(
"Value of resolution parameter is less than zero. Please rerun specifying a non-negative res"
)
self.x = x
self.y = y
self.res = res
# If x sequence size is empty, set the object into Initialized state.
if x.size == 0 and res == 0:
if x.size == 0:
self.data_order = np.array([])
self.clusters = np.array([[]])
self.current_idx = 0
self.lastpoint_idx = None
self.status = self.INIT
else:
self.data_order = self.y.argsort() # Defines order of clustering
self.data_order = self.y.argsort()
self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]])
self.current_idx = len(self.data_order) - 1
self.lastpoint_idx = self.data_order[-1]
Expand Down
50 changes: 48 additions & 2 deletions src/diffpy/srmise/tests/test_dataclusters.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from copy import copy

import numpy as np
import pytest

from diffpy.srmise.dataclusters import DataClusters

Expand All @@ -15,8 +16,8 @@ def test_clear():


def test___eq__():
actual = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 0)
expected = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 0)
actual = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 1)
expected = DataClusters(np.array([1, 2, 3]), np.array([3, 2, 1]), 1)
assert expected == actual
attributes = vars(actual)
for attr_key, attr_val in attributes.items():
Expand All @@ -32,3 +33,48 @@ def test___eq__():
print(f"not-equal test failed on {attr_key}")
assert not expected == actual
attributes.update({attr_key: reset})


@pytest.mark.parametrize(
"inputs, expected",
[
(
{
"x": np.array([1, 2, 3]),
"y": np.array([3, 2, 1]),
"res": 4,
},
DataClusters(x=np.array([1, 2, 3]), y=np.array([3, 2, 1]), res=4),
),
],
)
def test_set_data(inputs, expected):
actual = DataClusters(x=np.array([]), y=np.array([]), res=0)
actual.setdata(x=inputs["x"], y=inputs["y"], res=inputs["res"])
assert expected == actual


@pytest.mark.parametrize(
"inputs, msg",
[
(
{
"x": np.array([1]),
"y": np.array([3, 2]),
"res": 4,
},
"Sequences x and y must have the same length.",
),
(
{
"x": np.array([1]),
"y": np.array([3]),
"res": -1,
},
"Value of resolution parameter is less than zero. Please rerun specifying a non-negative res",
),
],
)
def test_set_data_order_bad(inputs, msg):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a good test, nice job.

with pytest.raises(ValueError, match=msg):
DataClusters(x=inputs["x"], y=inputs["y"], res=inputs["res"])
Loading