Skip to content

Commit 02a58af

Browse files
optimize code
2 parents fc63509 + 2069c15 commit 02a58af

39 files changed

+1492
-1492
lines changed

agnes.py

+33-33
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,34 @@
1-
import numpy as np
2-
import distance
3-
4-
class Agnes:
5-
def fit(self, X, cluster_number):
6-
'''
7-
Parameters
8-
----------
9-
X : shape (data_number, feature_number)
10-
Training data
11-
cluster_number : The number of clusters
12-
13-
Returns
14-
-------
15-
y : shape (data_number,)
16-
Predicted cluster label per sample.
17-
'''
18-
data_number = X.shape[0]
19-
20-
clusters = [[i] for i in range(data_number)]
21-
for j in reversed(range(cluster_number, data_number)):
22-
centers = np.array([np.mean(X[cluster], axis=0).ravel() for cluster in clusters])
23-
distances = np.apply_along_axis(distance.euclidean_distance, 1, centers, centers)
24-
near_indexes = np.unravel_index(np.argmin(distances + np.diag(np.full(j + 1, np.inf))), distances.shape)
25-
26-
clusters[near_indexes[0]].extend(clusters[near_indexes[1]])
27-
28-
del clusters[near_indexes[1]]
29-
30-
y = np.zeros(data_number)
31-
for i in range(len(clusters)):
32-
y[clusters[i]] = i
33-
1+
import numpy as np
2+
import distance
3+
4+
class Agnes:
5+
def fit(self, X, n_clusters):
6+
'''
7+
Parameters
8+
----------
9+
X : shape (n_samples, n_features)
10+
Training data
11+
n_clusters : The number of clusters
12+
13+
Returns
14+
-------
15+
y : shape (n_samples,)
16+
Predicted cluster label per sample.
17+
'''
18+
n_samples = X.shape[0]
19+
20+
clusters = [[i] for i in range(n_samples)]
21+
for j in reversed(range(n_clusters, n_samples)):
22+
centers = np.array([np.mean(X[cluster], axis=0).ravel() for cluster in clusters])
23+
distances = np.apply_along_axis(distance.euclidean_distance, 1, centers, centers)
24+
near_indexes = np.unravel_index(np.argmin(distances + np.diag(np.full(j + 1, np.inf))), distances.shape)
25+
26+
clusters[near_indexes[0]].extend(clusters[near_indexes[1]])
27+
28+
del clusters[near_indexes[1]]
29+
30+
y = np.zeros(n_samples)
31+
for i in range(len(clusters)):
32+
y[clusters[i]] = i
33+
3434
return y

bisecting_kmeans.py

+39-39
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,40 @@
1-
import numpy as np
2-
import k_means
3-
4-
class BisectingKMeans:
5-
def fit(self, X, cluster_number):
6-
'''
7-
Parameters
8-
----------
9-
X : shape (data_number, feature_number)
10-
Training data
11-
cluster_number : The number of clusters
12-
13-
Returns
14-
-------
15-
y : shape (data_number,)
16-
Predicted cluster label per sample.
17-
'''
18-
data_number = X.shape[0]
19-
20-
data = X
21-
clusters = []
22-
while True:
23-
model = k_means.KMeans()
24-
label = model.fit(data, 2, 100)
25-
26-
clusters.append(np.flatnonzero(label == 0))
27-
clusters.append(np.flatnonzero(label == 1))
28-
29-
if len(clusters) == cluster_number:
30-
break
31-
32-
sse = [np.var(data[cluster]) for cluster in clusters]
33-
data = data[clusters[np.argmax(sse)]]
34-
del clusters[np.argmax(sse)]
35-
36-
y = np.zeros(data_number)
37-
for i in range(len(clusters)):
38-
y[clusters[i]] = i
39-
1+
import numpy as np
2+
import k_means
3+
4+
class BisectingKMeans:
5+
def fit(self, X, n_clusters):
6+
'''
7+
Parameters
8+
----------
9+
X : shape (n_samples, n_features)
10+
Training data
11+
n_clusters : The number of clusters
12+
13+
Returns
14+
-------
15+
y : shape (n_samples,)
16+
Predicted cluster label per sample.
17+
'''
18+
n_samples = X.shape[0]
19+
20+
data = X
21+
clusters = []
22+
while True:
23+
model = k_means.KMeans()
24+
label = model.fit(data, 2, 100)
25+
26+
clusters.append(np.flatnonzero(label == 0))
27+
clusters.append(np.flatnonzero(label == 1))
28+
29+
if len(clusters) == n_clusters:
30+
break
31+
32+
sse = [np.var(data[cluster]) for cluster in clusters]
33+
data = data[clusters[np.argmax(sse)]]
34+
del clusters[np.argmax(sse)]
35+
36+
y = np.zeros(n_samples)
37+
for i in range(len(clusters)):
38+
y[clusters[i]] = i
39+
4040
return y

collaborative_filtering.py

+74-74
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,75 @@
1-
import numpy as np
2-
import matplotlib.pyplot as plt
3-
4-
class CollaborativeFiltering:
5-
def fit(self, X, y, dimension, learning_rate, epochs):
6-
'''
7-
Parameters
8-
----------
9-
X : shape (data_number, 2)
10-
Training data, column 1 is user id, column 2 is item id
11-
y : shape (data_number, 1)
12-
Rating
13-
learning_rate : learning rate
14-
epochs : The number of epochs
15-
'''
16-
data_number = X.shape[0]
17-
user_id = X[:, 0]
18-
item_id = X[:, 1]
19-
20-
self.__user_items = np.unique(user_id)
21-
self.__item_items = np.unique(item_id)
22-
23-
user_number = len(self.__user_items)
24-
item_number = len(self.__item_items)
25-
26-
self.__user_vector = np.random.uniform(size=(user_number, dimension))
27-
self.__user_bias = np.zeros((user_number, 1))
28-
self.__item_vector = np.random.uniform(size=(item_number, dimension))
29-
self.__item_bias = np.zeros((item_number, 1))
30-
31-
loss = []
32-
for _ in range(epochs):
33-
index = np.random.randint(0, data_number)
34-
35-
user_index = np.flatnonzero(self.__user_items == user_id[index])
36-
item_index = np.flatnonzero(self.__item_items == item_id[index])
37-
38-
r = (self.__user_vector[user_index].dot(self.__item_vector[item_index].T) + self.__user_bias[user_index] + self.__item_bias[item_index] - y[index])
39-
40-
loss.append(r.ravel() ** 2)
41-
42-
user_vector_new = self.__user_vector[user_index] - learning_rate * r * self.__item_vector[item_index]
43-
self.__user_bias[user_index] -= learning_rate * r
44-
item_vector_new = self.__item_vector[item_index] - learning_rate * r * self.__user_vector[user_index]
45-
self.__item_bias[item_index] -= learning_rate * r
46-
47-
self.__user_vector[user_index] = user_vector_new
48-
self.__item_vector[item_index] = item_vector_new
49-
50-
plt.plot(loss)
51-
plt.show()
52-
53-
def predict(self, X):
54-
'''
55-
Parameters
56-
----------
57-
X : shape (data_number, 2)
58-
Predicting data, column 1 is user id, column 2 is item id
59-
60-
Returns
61-
-------
62-
y : shape (data_number, 1)
63-
Predicted rating per sample.
64-
'''
65-
data_number = X.shape[0]
66-
user_id = X[:, 0]
67-
item_id = X[:, 1]
68-
69-
y = np.zeros((data_number, 1))
70-
for i in range(data_number):
71-
user_index = np.flatnonzero(self.__user_items == user_id[i])
72-
item_index = np.flatnonzero(self.__item_items == item_id[i])
73-
y[i] = self.__user_vector[user_index].dot(self.__item_vector[item_index].T) + self.__user_bias[user_index] + self.__item_bias[item_index]
74-
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
4+
class CollaborativeFiltering:
5+
def fit(self, X, y, dimension, learning_rate, epochs):
6+
'''
7+
Parameters
8+
----------
9+
X : shape (n_samples, 2)
10+
Training data, column 1 is user id, column 2 is item id
11+
y : shape (n_samples,)
12+
Rating
13+
learning_rate : learning rate
14+
epochs : The number of epochs
15+
'''
16+
n_samples = X.shape[0]
17+
user_id = X[:, 0]
18+
item_id = X[:, 1]
19+
20+
self.__user_items = np.unique(user_id)
21+
self.__item_items = np.unique(item_id)
22+
23+
n_users = len(self.__user_items)
24+
n_items = len(self.__item_items)
25+
26+
self.__user_vector = np.random.uniform(size=(n_users, dimension))
27+
self.__user_bias = np.zeros((n_users, 1))
28+
self.__item_vector = np.random.uniform(size=(n_items, dimension))
29+
self.__item_bias = np.zeros((n_items, 1))
30+
31+
loss = []
32+
for _ in range(epochs):
33+
index = np.random.randint(0, n_samples)
34+
35+
user_index = np.flatnonzero(self.__user_items == user_id[index])
36+
item_index = np.flatnonzero(self.__item_items == item_id[index])
37+
38+
r = (self.__user_vector[user_index].dot(self.__item_vector[item_index].T) + self.__user_bias[user_index] + self.__item_bias[item_index] - y[index])
39+
40+
loss.append(r.ravel() ** 2)
41+
42+
user_vector_new = self.__user_vector[user_index] - learning_rate * r * self.__item_vector[item_index]
43+
self.__user_bias[user_index] -= learning_rate * r
44+
item_vector_new = self.__item_vector[item_index] - learning_rate * r * self.__user_vector[user_index]
45+
self.__item_bias[item_index] -= learning_rate * r
46+
47+
self.__user_vector[user_index] = user_vector_new
48+
self.__item_vector[item_index] = item_vector_new
49+
50+
plt.plot(loss)
51+
plt.show()
52+
53+
def predict(self, X):
54+
'''
55+
Parameters
56+
----------
57+
X : shape (n_samples, 2)
58+
Predicting data, column 1 is user id, column 2 is item id
59+
60+
Returns
61+
-------
62+
y : shape (n_samples,)
63+
Predicted rating per sample.
64+
'''
65+
n_samples = X.shape[0]
66+
user_id = X[:, 0]
67+
item_id = X[:, 1]
68+
69+
y = np.zeros(n_samples)
70+
for i in range(n_samples):
71+
user_index = np.flatnonzero(self.__user_items == user_id[i])
72+
item_index = np.flatnonzero(self.__item_items == item_id[i])
73+
y[i] = self.__user_vector[user_index].dot(self.__item_vector[item_index].T) + self.__user_bias[user_index] + self.__item_bias[item_index]
74+
7575
return y

0 commit comments

Comments
 (0)