add svdd algo

zhaoyichanghong · zhaoyichanghong · commit e55121922e26 · 2019-03-22T14:32:36.000+08:00
diff --git a/README.md b/README.md
@@ -7,6 +7,7 @@
 - [Unsupervised Learning](#unsupervised-learning)
   - [Clustering](#clustering)
   - [Dimensionality Reduction](#dimensionality-reduction) 
+  - [Abnormal Detection](#abnormal-detection)
   - [Others](#others)
  - [Tools](#tools)
  
@@ -43,9 +44,6 @@
 ### [naive_bayesian_for_text.py](naive_bayesian_for_text.py)
     naive bayesian algorithm for text classification
 
-### [decision_tree_id3.py](decision_tree_id3.py)
-    decision tree id3 algorithm
-    
 ### [decision_tree_id3.py](decision_tree_id3.py)
     decision tree id3 algorithm
 
@@ -123,7 +121,12 @@
     
 ### [locally_linear_embedding.py](locally_linear_embedding.py)
     locally linear embedding algorithm
-    
+
+## abnormal detection
+
+### [support_vector_data_description.py](support_vector_data_description.py)
+    support vector data description algorithm
+
 ## Others
 
 ### [ica.py](ica.py)
diff --git a/rbf_network.py b/rbf_network.py
@@ -15,7 +15,7 @@ def fit(self, X, y, units, epochs, optimizer):
         ----------
         X : shape (n_samples, n_features)
             Training data
-        y : shape (n_samples, 1)
+        y : shape (n_samples,)
             Target values, 1 or 0
         epochs : The number of epochs
         optimizer : Optimize algorithm, see also optimizer.py
@@ -29,8 +29,8 @@ def fit(self, X, y, units, epochs, optimizer):
         model.fit(X, self.__units, 10)        
         self.__centers = model.centers
         
-        self.__sigmas = np.ones((self.__units, 1))
-        self.__weights = np.random.randn(self.__units, 1)
+        self.__sigmas = np.ones(self.__units)
+        self.__weights = np.random.randn(self.__units)
         
         if self.__debug:
             accuracy = []
@@ -47,9 +47,9 @@ def fit(self, X, y, units, epochs, optimizer):
             g_sigmas = np.zeros_like(self.__sigmas)
             g_weights = np.zeros_like(self.__weights)
             for i in range(self.__units):
-                g_centers[i] = self.__weights[i] * np.mean(residual * outs[:, i].reshape((-1, 1)) * (X - self.__centers[i]), axis=0) / (self.__sigmas[i] ** 2)
-                g_sigmas[i] = self.__weights[i] * np.mean(residual * outs[:, i].reshape((-1, 1)) * (np.linalg.norm(X - self.__centers[i], axis=1).reshape((-1, 1)) ** 2), axis=0) / (self.__sigmas[i] ** 3)
-                g_weights[i] = np.mean(residual * outs[:, i].reshape((-1, 1)), axis=0)
+                g_centers[i] = self.__weights[i] * np.mean((residual * outs[:, i]).reshape((-1, 1)) * (X - self.__centers[i]), axis=0) / (self.__sigmas[i] ** 2)
+                g_sigmas[i] = self.__weights[i] * np.mean(residual * outs[:, i] * (np.linalg.norm(X - self.__centers[i], axis=1) ** 2), axis=0) / (self.__sigmas[i] ** 3)
+                g_weights[i] = np.mean(residual * outs[:, i], axis=0)
             
             g_centers, g_sigmas, g_weights = optimizer.optimize([g_centers, g_sigmas, g_weights])
             self.__centers -= g_centers
@@ -87,7 +87,7 @@ def predict(self, X):
 
         Returns
         -------
-        y : shape (n_samples, 1)
+        y : shape (n_samples,)
             Predicted class label per sample, 1 or 0
         '''
         if self.__mode == 'classification':
diff --git a/support_vector_data_description.py b/support_vector_data_description.py
@@ -0,0 +1,82 @@
+import numpy as np
+import cvxopt
+import distance
+
+class SVDD:
+    @property
+    def center(self):
+        return self.__center
+
+    @property
+    def radius(self):
+        return self.__radius
+
+    def __qp(self, X, kernel, C):       
+        n_samples = X.shape[0]
+
+        P = 2 * kernel
+
+        q = -kernel[range(n_samples), range(n_samples)].reshape(-1, 1)
+
+        G = np.vstack((-np.eye(n_samples), np.eye(n_samples)))
+
+        h = np.hstack((np.zeros(n_samples), np.full(n_samples, C)))
+
+        A = np.full((1, n_samples), 1.0)
+
+        b = np.ones(1)
+
+        res = cvxopt.solvers.qp(cvxopt.matrix(P), cvxopt.matrix(q), cvxopt.matrix(G), cvxopt.matrix(h), cvxopt.matrix(A), cvxopt.matrix(b))
+        alpha = np.array(res['x']).ravel()
+
+        support_items = np.flatnonzero(np.isclose(alpha, 0) == False)
+        self.__X_support = X[support_items]
+        self.__a_support = alpha[support_items]
+
+        free_items = np.flatnonzero(self.__a_support < C)
+        self.__X_free = self.__X_support[free_items]
+
+    def fit(self, X, kernel_func, C, sigma=1):
+        '''
+        Parameters
+        ----------
+        X : shape (n_samples, n_features)
+            Training data
+        kernel_func : kernel algorithm see also kernel.py
+        C : Penalty parameter C of the error term
+        sigma : Parameter for rbf kernel
+        '''
+        self.__sigma = sigma
+        self.__kernel_func = kernel_func
+
+        kernel = self.__kernel_func(X, X, self.__sigma)
+        self.__qp(X, kernel, C)
+        self.__center = self.__a_support.dot(self.__X_support)
+        self.__radius = np.mean(distance.euclidean_distance(self.__center, self.__X_free))
+        
+    def predict(self, X):
+        '''
+        Parameters
+        ----------
+        X : shape (n_samples, n_features)
+            Predicting data
+
+        Returns
+        -------
+        y : shape (n_samples,)
+            whether anormal per sample, True or False
+        '''
+        return self.__score(X) <= self.__radius
+
+    def __score(self, X):
+        n_samples = X.shape[0]
+
+        scores = np.zeros(n_samples)
+        for i in range(n_samples):
+            x = X[i].reshape((1, -1))
+            kernel1 = self.__kernel_func(x, x, self.__sigma)
+            kernel2 = self.__kernel_func(x, self.__X_support, self.__sigma)
+            kernel3 = self.__kernel_func(self.__X_support, self.__X_support, self.__sigma)
+            scores[i] = kernel1 - 2 * self.__a_support.dot(kernel2) + self.__a_support.dot(kernel3).dot(self.__a_support.T)
+
+        return np.sqrt(scores)
diff --git a/svm.py b/svm.py
@@ -20,12 +20,12 @@ def __qp(self, X, y, kernel, C):
         res = cvxopt.solvers.qp(cvxopt.matrix(P), cvxopt.matrix(q), cvxopt.matrix(G), cvxopt.matrix(h), cvxopt.matrix(A), cvxopt.matrix(b))
         alpha = np.array(res['x']).ravel()
 
-        support_items = np.flatnonzero(alpha > 1e-6)
+        support_items = np.flatnonzero(np.isclose(alpha, 0) == False)
         self.__X_support = X[support_items]
         self.__y_support = y[support_items]
         self.__a_support = alpha[support_items]
 
-        free_items = np.flatnonzero(self.__a_support < (C - 1e-6))
+        free_items = np.flatnonzero(self.__a_support < C)
         X_free = X[free_items]
         y_free = y[free_items]