Skip to content

Commit c243310

Browse files
optimize code
2 parents e551219 + 0fc64d9 commit c243310

File tree

4 files changed

+60
-52
lines changed

4 files changed

+60
-52
lines changed

README.md

+3
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@
7272
### [random_forest.py](random_forest.py)
7373
random_forest algorithm including bagging, random features, oob verification, feature selection
7474

75+
### [gbdt.py](gbdt.py)
76+
gradient boost decision tree algorithm
77+
7578
## Dimensionality Reduction
7679

7780
### [linear_discriminant_analysis.py](linear_discriminant_analysis.py)

gbdt.py

+53-48
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,54 @@
1-
import numpy as np
2-
import decision_tree
3-
4-
class GBDT:
5-
def __sigmoid(self, x):
6-
return 1 / (1 + np.exp(-x))
7-
8-
def __softmax(self, x):
9-
return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)
10-
11-
def __init__(self, loss):
12-
self.__models = []
13-
self.__alpha = []
14-
self.__loss = loss
15-
16-
def fit(self, X, y, epochs, learning_rate):
17-
self.__learning_rate = learning_rate
18-
19-
residual = y
20-
for _ in range(epochs):
21-
model = decision_tree.Cart('regression')
22-
model.fit(X, residual)
23-
self.__models.append(model)
24-
25-
alpha = np.mean(residual / (model.predict(X) + 1e-8), axis=0)
26-
self.__alpha.append(alpha)
27-
28-
residual = y - self.score(X)
29-
30-
def predict(self, X, classes=None):
31-
if self.__loss == 'mse':
32-
return self.score(X)
33-
elif self.__loss == 'binary_crossentropy':
34-
return np.around(self.score(X))
35-
elif self.__loss == 'categorical_crossentropy':
36-
return classes[np.argmax(self.score(X), axis=1)].reshape((-1, 1))
37-
38-
def score(self, X):
39-
h = 0
40-
for alpha, model in zip(self.__alpha, self.__models):
41-
h += self.__learning_rate * model.predict(X) * alpha
42-
43-
if self.__loss == 'mse':
44-
return h
45-
elif self.__loss == 'binary_crossentropy':
46-
return self.__sigmoid(h)
47-
elif self.__loss == 'categorical_crossentropy':
48-
return self.__softmax(h)
1+
import numpy as np
2+
import decision_tree_cart
3+
import scipy
4+
5+
class GBDT:
6+
def __init__(self, loss):
7+
self.__models = []
8+
self.__alpha = []
9+
self.__loss = loss
10+
11+
def fit(self, X, y, epochs, learning_rate):
12+
'''
13+
Parameters
14+
----------
15+
X : shape (n_samples, n_features)
16+
Training data
17+
y : shape (n_samples,)
18+
Target values
19+
epochs : The number of epochs
20+
learning_rate : Learning rate
21+
'''
22+
self.__learning_rate = learning_rate
23+
24+
residual = y
25+
for _ in range(epochs):
26+
model = decision_tree_cart.CART('regression')
27+
model.fit(X, residual)
28+
self.__models.append(model)
29+
30+
alpha = np.mean(residual / (model.predict(X) + 1e-8), axis=0)
31+
self.__alpha.append(alpha)
32+
33+
residual = y - self.score(X)
34+
35+
def predict(self, X):
36+
'''
37+
Parameters
38+
----------
39+
X : shape (n_samples, n_features)
40+
Predicting data
41+
42+
Returns
43+
-------
44+
y : shape (n_samples,)
45+
Predicted value per sample.
46+
'''
47+
if self.__loss == 'regression':
48+
return self.score(X)
49+
elif self.__loss == 'classification':
50+
return np.around(self.score(X))
51+
52+
def score(self, X):
53+
return self.__learning_rate * sum([model.predict(X) * alpha for alpha, model in zip(self.__alpha, self.__models)])
4954

support_vector_data_description.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,10 @@ def __qp(self, X, kernel, C):
3434
self.__a_support = alpha[support_items]
3535

3636
free_items = np.flatnonzero(self.__a_support < C)
37-
self.__X_free = self.__X_support[free_items]
37+
X_free = self.__X_support[free_items]
38+
39+
self.__center = self.__a_support.dot(self.__X_support)
40+
self.__radius = np.mean(distance.euclidean_distance(self.__center, X_free))
3841

3942
def fit(self, X, kernel_func, C, sigma=1):
4043
'''
@@ -51,8 +54,6 @@ def fit(self, X, kernel_func, C, sigma=1):
5154

5255
kernel = self.__kernel_func(X, X, self.__sigma)
5356
self.__qp(X, kernel, C)
54-
self.__center = self.__a_support.dot(self.__X_support)
55-
self.__radius = np.mean(distance.euclidean_distance(self.__center, self.__X_free))
5657

5758
def predict(self, X):
5859
'''

svm.py

-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ def __qp(self, X, y, kernel, C):
2626
self.__a_support = alpha[support_items]
2727

2828
free_items = np.flatnonzero(self.__a_support < C)
29-
X_free = X[free_items]
3029
y_free = y[free_items]
3130

3231
self.__bias = y_free[0] - (self.__a_support * self.__y_support).T.dot(kernel[support_items, free_items[0]])

0 commit comments

Comments
 (0)