Skip to content

Commit 99b7a18

Browse files
committed
add clustering algos tutorial
1 parent c03bf85 commit 99b7a18

15 files changed

+251
-0
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
100100
- [Dimensionality Reduction: Using Feature Selection in Python](https://www.thepythoncode.com/article/dimensionality-reduction-feature-selection). ([code](machine-learning/dimensionality-reduction-feature-selection))
101101
- [A Guide to Explainable AI Using Python](https://www.thepythoncode.com/article/explainable-ai-model-python). ([code](machine-learning/explainable-ai))
102102
- [Autoencoders for Dimensionality Reduction using TensorFlow in Python](https://www.thepythoncode.com/article/feature-extraction-dimensionality-reduction-autoencoders-python-keras). ([code](machine-learning/feature-extraction-autoencoders))
103+
- [Exploring the Different Types of Clustering Algorithms in Machine Learning with Python](https://www.thepythoncode.com/article/clustering-algorithms-in-machine-learning-with-python). ([code](machine-learning/clustering-algorithms))
103104

104105
- ### [General Python Topics](https://www.thepythoncode.com/topic/general-python-topics)
105106
- [How to Make Facebook Messenger bot in Python](https://www.thepythoncode.com/article/make-bot-fbchat-python). ([code](general/messenger-bot))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# [Exploring the Different Types of Clustering Algorithms in Machine Learning with Python](https://www.thepythoncode.com/article/clustering-algorithms-in-machine-learning-with-python)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import numpy as np
2+
from sklearn.datasets import make_classification
3+
from sklearn.cluster import AffinityPropagation
4+
from matplotlib import pyplot
5+
6+
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
7+
n_redundant=0, n_clusters_per_class=1, random_state=10)
8+
9+
# initialize the model
10+
m = AffinityPropagation(damping=0.9)
11+
# fit the model
12+
m.fit(X)
13+
# predict the cluster for each data point
14+
p = m.predict(X)
15+
# unique clusters
16+
cl = np.unique(p)
17+
# plot the data points and cluster centers
18+
for c in cl:
19+
r = np.where(c == p)
20+
pyplot.title('Affinity Propagation Clustering')
21+
pyplot.scatter(X[r, 0], X[r, 1])
22+
# show the plot
23+
pyplot.show()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import numpy as np
2+
from sklearn.datasets import make_classification
3+
from sklearn.cluster import AgglomerativeClustering
4+
from matplotlib import pyplot
5+
6+
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
7+
n_redundant=0, n_clusters_per_class=1, random_state=10)
8+
# init the model with 3 clusters
9+
m = AgglomerativeClustering(n_clusters=3)
10+
# predict the cluster for each data point after fitting the model
11+
p = m.fit_predict(X)
12+
# unique clusters
13+
cl = np.unique(p)
14+
# plot the data points and cluster centers
15+
for c in cl:
16+
r = np.where(c == p)
17+
pyplot.title('Agglomerative Clustering')
18+
pyplot.scatter(X[r, 0], X[r, 1])
19+
# show the plot
20+
pyplot.show()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import numpy as np
2+
from sklearn.datasets import make_classification
3+
from sklearn.cluster import Birch
4+
from matplotlib import pyplot
5+
6+
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
7+
n_redundant=0, n_clusters_per_class=1, random_state=10)
8+
# init the model with 2 clusters
9+
m = Birch(threshold=0.05, n_clusters=2)
10+
# predict the cluster for each data point after fitting the model
11+
p = m.fit_predict(X)
12+
# unique clusters
13+
cl = np.unique(p)
14+
# plot the data points and cluster centers
15+
for c in cl:
16+
r = np.where(c == p)
17+
pyplot.title('Birch Clustering')
18+
pyplot.scatter(X[r, 0], X[r, 1])
19+
# show the plot
20+
pyplot.show()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import numpy as np
2+
from sklearn.datasets import make_classification
3+
from sklearn.cluster import DBSCAN
4+
from matplotlib import pyplot
5+
6+
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
7+
n_redundant=0, n_clusters_per_class=1, random_state=10)
8+
# init the model
9+
m = DBSCAN(eps=0.05, min_samples=10)
10+
# predict the cluster for each data point after fitting the model
11+
p = m.fit_predict(X)
12+
# unique clusters
13+
cl = np.unique(p)
14+
# plot the data points and cluster centers
15+
for c in cl:
16+
r = np.where(c == p)
17+
pyplot.title('DBSCAN Clustering')
18+
pyplot.scatter(X[r, 0], X[r, 1])
19+
# show the plot
20+
pyplot.show()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import numpy as np
2+
from sklearn.datasets import make_classification
3+
from sklearn.mixture import GaussianMixture
4+
from matplotlib import pyplot
5+
6+
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
7+
n_redundant=0, n_clusters_per_class=1, random_state=10)
8+
# init the model with 2 components
9+
m = GaussianMixture(n_components=2)
10+
# predict the cluster for each data point after fitting the model
11+
p = m.fit_predict(X)
12+
# unique clusters
13+
cl = np.unique(p)
14+
# plot the data points and cluster centers
15+
for c in cl:
16+
r = np.where(c == p)
17+
pyplot.title('Gaussian Mixture Clustering')
18+
pyplot.scatter(X[r, 0], X[r, 1])
19+
# show the plot
20+
pyplot.show()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import numpy as np
2+
from sklearn.datasets import make_classification
3+
from sklearn.cluster import KMeans
4+
from matplotlib import pyplot
5+
6+
# 2 features, 2 informative, 0 redundant, 1 cluster per class
7+
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
8+
n_redundant=0, n_clusters_per_class=1, random_state=10)
9+
10+
# 2 clusters
11+
m = KMeans(n_clusters=2)
12+
# fit the model
13+
m.fit(X)
14+
# predict the cluster for each data point
15+
p = m.predict(X)
16+
# unique clusters
17+
cl = np.unique(p)
18+
# plot the data points and cluster centers
19+
for c in cl:
20+
r = np.where(c == p)
21+
pyplot.title('K-means (No. of Clusters = 3)')
22+
pyplot.scatter(X[r, 0], X[r, 1])
23+
# show the plot
24+
pyplot.show()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import numpy as np
2+
from sklearn.datasets import make_classification
3+
from sklearn.cluster import MeanShift
4+
from matplotlib import pyplot
5+
6+
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
7+
n_redundant=0, n_clusters_per_class=1, random_state=10)
8+
# init the model
9+
m = MeanShift()
10+
# predict the cluster for each data point after fitting the model
11+
p = m.fit_predict(X)
12+
# unique clusters
13+
cl = np.unique(p)
14+
# plot the data points and cluster centers
15+
for c in cl:
16+
r = np.where(c == p)
17+
pyplot.title('Mean Shift Clustering')
18+
pyplot.scatter(X[r, 0], X[r, 1])
19+
# show the plot
20+
pyplot.show()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from sklearn import metrics
2+
3+
y_true = [5, 3, 5, 4, 4, 5]
4+
y_pred = [3, 5, 5, 4, 3, 4]
5+
# homogeneity: each cluster contains only members of a single class.
6+
print(metrics.homogeneity_score(y_true, y_pred))
7+
# completeness: all members of a given class are assigned to the same cluster.
8+
print(metrics.completeness_score(y_true, y_pred))
9+
# v-measure: harmonic mean of homogeneity and completeness
10+
print(metrics.v_measure_score(y_true, y_pred))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import numpy as np
2+
from sklearn.datasets import make_classification
3+
from sklearn.cluster import MiniBatchKMeans
4+
from matplotlib import pyplot
5+
6+
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
7+
n_redundant=0, n_clusters_per_class=1, random_state=10)
8+
# 3 clusters
9+
m = MiniBatchKMeans(n_clusters=3)
10+
# fit the model
11+
m.fit(X)
12+
# predict the cluster for each data point
13+
p = m.predict(X)
14+
# unique clusters
15+
cl = np.unique(p)
16+
# plot the data points and cluster centers
17+
for c in cl:
18+
r = np.where(c == p)
19+
pyplot.title('Mini Batch K-means')
20+
pyplot.scatter(X[r, 0], X[r, 1])
21+
# show the plot
22+
pyplot.show()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import numpy as np
2+
from sklearn.datasets import make_classification
3+
from sklearn.cluster import OPTICS
4+
from matplotlib import pyplot
5+
6+
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
7+
n_redundant=0, n_clusters_per_class=1, random_state=10)
8+
9+
# init the model
10+
m = OPTICS(eps=0.5, min_samples=10)
11+
# predict the cluster for each data point after fitting the model
12+
p = m.fit_predict(X)
13+
# unique clusters
14+
cl = np.unique(p)
15+
# plot the data points and cluster centers
16+
for c in cl:
17+
r = np.where(c == p)
18+
pyplot.title('OPTICS Clustering')
19+
pyplot.scatter(X[r, 0], X[r, 1])
20+
# show the plot
21+
pyplot.show()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
scikit-learn
2+
numpy
3+
matplotlib
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import numpy as np
2+
from sklearn.datasets import make_classification
3+
from sklearn.cluster import SpectralClustering
4+
from matplotlib import pyplot
5+
6+
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
7+
n_redundant=0, n_clusters_per_class=1, random_state=10)
8+
# init the model with 3 clusters
9+
m = SpectralClustering(n_clusters=3)
10+
# predict the cluster for each data point after fitting the model
11+
p = m.fit_predict(X)
12+
# unique clusters
13+
cl = np.unique(p)
14+
# plot the data points and cluster centers
15+
for c in cl:
16+
r = np.where(c == p)
17+
pyplot.title('Spectral Clustering')
18+
pyplot.scatter(X[r, 0], X[r, 1])
19+
# show the plot
20+
pyplot.show()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import numpy as np
2+
from sklearn.datasets import make_classification
3+
from sklearn.cluster import MiniBatchKMeans
4+
from sklearn.cluster import KMeans
5+
from matplotlib import pyplot
6+
import timeit
7+
8+
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
9+
n_redundant=0, n_clusters_per_class=1, random_state=10)
10+
# start timer for Mini Batch K-Means
11+
t1_mkm = timeit.default_timer()
12+
m = MiniBatchKMeans(n_clusters=2)
13+
m.fit(X)
14+
p = m.predict(X)
15+
# stop timer for Mini Batch K-Means
16+
t2_mkm = timeit.default_timer()
17+
# start timer for K-Means
18+
t1_km = timeit.default_timer()
19+
m = KMeans(n_clusters=2)
20+
m.fit(X)
21+
p = m.predict(X)
22+
# stop timer for K-Means
23+
t2_km = timeit.default_timer()
24+
# print time difference
25+
print("Time difference between Mini Batch K-Means and K-Means = ",
26+
(t2_km-t1_km)-(t2_mkm-t1_mkm))

0 commit comments

Comments
 (0)