Skip to content

Commit e551219

Browse files
add svdd algo
2 parents 02a58af + 240a689 commit e551219

File tree

4 files changed

+98
-13
lines changed

4 files changed

+98
-13
lines changed

README.md

+7-4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
- [Unsupervised Learning](#unsupervised-learning)
88
- [Clustering](#clustering)
99
- [Dimensionality Reduction](#dimensionality-reduction)
10+
- [Abnormal Detection](#abnormal-detection)
1011
- [Others](#others)
1112
- [Tools](#tools)
1213

@@ -43,9 +44,6 @@
4344
### [naive_bayesian_for_text.py](naive_bayesian_for_text.py)
4445
naive bayesian algorithm for text classification
4546

46-
### [decision_tree_id3.py](decision_tree_id3.py)
47-
decision tree id3 algorithm
48-
4947
### [decision_tree_id3.py](decision_tree_id3.py)
5048
decision tree id3 algorithm
5149

@@ -123,7 +121,12 @@
123121

124122
### [locally_linear_embedding.py](locally_linear_embedding.py)
125123
locally linear embedding algorithm
126-
124+
125+
## abnormal detection
126+
127+
### [support_vector_data_description.py](support_vector_data_description.py)
128+
support vector data description algorithm
129+
127130
## Others
128131

129132
### [ica.py](ica.py)

rbf_network.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def fit(self, X, y, units, epochs, optimizer):
1515
----------
1616
X : shape (n_samples, n_features)
1717
Training data
18-
y : shape (n_samples, 1)
18+
y : shape (n_samples,)
1919
Target values, 1 or 0
2020
epochs : The number of epochs
2121
optimizer : Optimize algorithm, see also optimizer.py
@@ -29,8 +29,8 @@ def fit(self, X, y, units, epochs, optimizer):
2929
model.fit(X, self.__units, 10)
3030
self.__centers = model.centers
3131

32-
self.__sigmas = np.ones((self.__units, 1))
33-
self.__weights = np.random.randn(self.__units, 1)
32+
self.__sigmas = np.ones(self.__units)
33+
self.__weights = np.random.randn(self.__units)
3434

3535
if self.__debug:
3636
accuracy = []
@@ -47,9 +47,9 @@ def fit(self, X, y, units, epochs, optimizer):
4747
g_sigmas = np.zeros_like(self.__sigmas)
4848
g_weights = np.zeros_like(self.__weights)
4949
for i in range(self.__units):
50-
g_centers[i] = self.__weights[i] * np.mean(residual * outs[:, i].reshape((-1, 1)) * (X - self.__centers[i]), axis=0) / (self.__sigmas[i] ** 2)
51-
g_sigmas[i] = self.__weights[i] * np.mean(residual * outs[:, i].reshape((-1, 1)) * (np.linalg.norm(X - self.__centers[i], axis=1).reshape((-1, 1)) ** 2), axis=0) / (self.__sigmas[i] ** 3)
52-
g_weights[i] = np.mean(residual * outs[:, i].reshape((-1, 1)), axis=0)
50+
g_centers[i] = self.__weights[i] * np.mean((residual * outs[:, i]).reshape((-1, 1)) * (X - self.__centers[i]), axis=0) / (self.__sigmas[i] ** 2)
51+
g_sigmas[i] = self.__weights[i] * np.mean(residual * outs[:, i] * (np.linalg.norm(X - self.__centers[i], axis=1) ** 2), axis=0) / (self.__sigmas[i] ** 3)
52+
g_weights[i] = np.mean(residual * outs[:, i], axis=0)
5353

5454
g_centers, g_sigmas, g_weights = optimizer.optimize([g_centers, g_sigmas, g_weights])
5555
self.__centers -= g_centers
@@ -87,7 +87,7 @@ def predict(self, X):
8787
8888
Returns
8989
-------
90-
y : shape (n_samples, 1)
90+
y : shape (n_samples,)
9191
Predicted class label per sample, 1 or 0
9292
'''
9393
if self.__mode == 'classification':

support_vector_data_description.py

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import numpy as np
2+
import cvxopt
3+
import distance
4+
5+
class SVDD:
6+
@property
7+
def center(self):
8+
return self.__center
9+
10+
@property
11+
def radius(self):
12+
return self.__radius
13+
14+
def __qp(self, X, kernel, C):
15+
n_samples = X.shape[0]
16+
17+
P = 2 * kernel
18+
19+
q = -kernel[range(n_samples), range(n_samples)].reshape(-1, 1)
20+
21+
G = np.vstack((-np.eye(n_samples), np.eye(n_samples)))
22+
23+
h = np.hstack((np.zeros(n_samples), np.full(n_samples, C)))
24+
25+
A = np.full((1, n_samples), 1.0)
26+
27+
b = np.ones(1)
28+
29+
res = cvxopt.solvers.qp(cvxopt.matrix(P), cvxopt.matrix(q), cvxopt.matrix(G), cvxopt.matrix(h), cvxopt.matrix(A), cvxopt.matrix(b))
30+
alpha = np.array(res['x']).ravel()
31+
32+
support_items = np.flatnonzero(np.isclose(alpha, 0) == False)
33+
self.__X_support = X[support_items]
34+
self.__a_support = alpha[support_items]
35+
36+
free_items = np.flatnonzero(self.__a_support < C)
37+
self.__X_free = self.__X_support[free_items]
38+
39+
def fit(self, X, kernel_func, C, sigma=1):
40+
'''
41+
Parameters
42+
----------
43+
X : shape (n_samples, n_features)
44+
Training data
45+
kernel_func : kernel algorithm see also kernel.py
46+
C : Penalty parameter C of the error term
47+
sigma : Parameter for rbf kernel
48+
'''
49+
self.__sigma = sigma
50+
self.__kernel_func = kernel_func
51+
52+
kernel = self.__kernel_func(X, X, self.__sigma)
53+
self.__qp(X, kernel, C)
54+
self.__center = self.__a_support.dot(self.__X_support)
55+
self.__radius = np.mean(distance.euclidean_distance(self.__center, self.__X_free))
56+
57+
def predict(self, X):
58+
'''
59+
Parameters
60+
----------
61+
X : shape (n_samples, n_features)
62+
Predicting data
63+
64+
Returns
65+
-------
66+
y : shape (n_samples,)
67+
whether anormal per sample, True or False
68+
'''
69+
return self.__score(X) <= self.__radius
70+
71+
def __score(self, X):
72+
n_samples = X.shape[0]
73+
74+
scores = np.zeros(n_samples)
75+
for i in range(n_samples):
76+
x = X[i].reshape((1, -1))
77+
kernel1 = self.__kernel_func(x, x, self.__sigma)
78+
kernel2 = self.__kernel_func(x, self.__X_support, self.__sigma)
79+
kernel3 = self.__kernel_func(self.__X_support, self.__X_support, self.__sigma)
80+
scores[i] = kernel1 - 2 * self.__a_support.dot(kernel2) + self.__a_support.dot(kernel3).dot(self.__a_support.T)
81+
82+
return np.sqrt(scores)

svm.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ def __qp(self, X, y, kernel, C):
2020
res = cvxopt.solvers.qp(cvxopt.matrix(P), cvxopt.matrix(q), cvxopt.matrix(G), cvxopt.matrix(h), cvxopt.matrix(A), cvxopt.matrix(b))
2121
alpha = np.array(res['x']).ravel()
2222

23-
support_items = np.flatnonzero(alpha > 1e-6)
23+
support_items = np.flatnonzero(np.isclose(alpha, 0) == False)
2424
self.__X_support = X[support_items]
2525
self.__y_support = y[support_items]
2626
self.__a_support = alpha[support_items]
2727

28-
free_items = np.flatnonzero(self.__a_support < (C - 1e-6))
28+
free_items = np.flatnonzero(self.__a_support < C)
2929
X_free = X[free_items]
3030
y_free = y[free_items]
3131

0 commit comments

Comments
 (0)