-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpam.py
45 lines (33 loc) · 1.54 KB
/
pam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/bin/env python
import numpy as np
import random
def cluster(distances, k=3):
m = distances.shape[0] # number of points
# Pick k random medoids.
curr_medoids = np.array([-1] * k)
while not len(np.unique(curr_medoids)) == k:
curr_medoids = np.array([random.randint(0, m - 1) for _ in range(k)])
old_medoids = np.array([-1] * k) # Doesn't matter what we initialize these to.
new_medoids = np.array([-1] * k)
# Until the medoids stop updating, do the following:
while not ((old_medoids == curr_medoids).all()):
# Assign each point to cluster with closest medoid.
clusters = assign_points_to_clusters(curr_medoids, distances)
# Update cluster medoids to be lowest cost point.
for curr_medoid in curr_medoids:
cluster = np.where(clusters == curr_medoid)[0]
new_medoids[curr_medoids == curr_medoid] = compute_new_medoid(cluster, distances)
old_medoids[:] = curr_medoids[:]
curr_medoids[:] = new_medoids[:]
return clusters, curr_medoids
def assign_points_to_clusters(medoids, distances):
distances_to_medoids = distances[:, medoids]
clusters = medoids[np.argmin(distances_to_medoids, axis=1)]
clusters[medoids] = medoids
return clusters
def compute_new_medoid(cluster, distances):
mask = np.ones(distances.shape)
mask[np.ix_(cluster, cluster)] = 0.
cluster_distances = np.ma.masked_array(data=distances, mask=mask, fill_value=10e9)
costs = cluster_distances.sum(axis=1)
return costs.argmin(axis=0, fill_value=10e9)