Skip to content

Commit 3eab606

Browse files
authored
Merge pull request deutranium#67 from blackfly19/master
KMeans in python
2 parents a573f11 + ed24920 commit 3eab606

File tree

1 file changed

+64
-0
lines changed

1 file changed

+64
-0
lines changed
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import random
2+
import itertools
3+
import numpy as np
4+
from sklearn.datasets import make_blobs
5+
6+
class KMeans:
7+
def __init__(self,k):
8+
self.k = k
9+
self.means = None
10+
11+
def form_clusters(self,data,indexes):
12+
#assigning clusters to points
13+
14+
clusters = [[] for i in range(self.k)]
15+
means = []
16+
for point,index in zip(data,indexes):
17+
clusters[index].append(point)
18+
for cluster in clusters:
19+
if cluster:
20+
means.append(np.mean(cluster,axis=0).tolist())
21+
else:
22+
means.append(random.choice(data))
23+
return means
24+
25+
def classify(self,vector):
26+
27+
diff = []
28+
#calculating euclidean distance for each point from each center
29+
for mean in self.means:
30+
diff.append(np.linalg.norm(mean - vector))
31+
return np.argmin(diff)
32+
33+
@property
34+
def centers(self):
35+
return self.means
36+
37+
def fit(self,data):
38+
39+
#random initialization of means
40+
cluster_index = [random.randrange(self.k) for _ in data]
41+
for i in itertools.count():
42+
#computing means and reassigning centers
43+
self.means = self.form_clusters(data,cluster_index)
44+
new_cluster_index = [self.classify(val) for val in data]
45+
46+
#if difference between old centers and new centers is 0 then break the loop
47+
differences = 0
48+
for ind,new_ind in zip(cluster_index,new_cluster_index):
49+
if ind != new_ind:
50+
differences = differences + 1
51+
52+
if differences == 0:
53+
return
54+
55+
#calculating means for new cluster centers
56+
cluster_index = new_cluster_index
57+
self.means = self.form_clusters(data,cluster_index)
58+
59+
#generating random dataset
60+
X,y= make_blobs(n_samples=150,n_features=2,centers=3)
61+
clustering = KMeans(k=3)
62+
clustering.fit(X)
63+
print(clustering.centers)
64+

0 commit comments

Comments
 (0)