Merge pull request #130 from abhinavdhere/new_ML_script

ionicc · web-flow · commit fde8a6f4dbaa · 2018-10-28T16:19:44.000+05:30
Script for MNIST classification using perceptron
diff --git a/Image_Processing/src/Geometric_transforms/people.jpg b/Image_Processing/src/Geometric_transforms/people.jpg
diff --git a/Image_Processing/src/Geometric_transforms/spherical_transform.py b/Image_Processing/src/Geometric_transforms/spherical_transform.py
@@ -0,0 +1,42 @@
+'''
+Written by Abhinav Dhere (abhitechnical41[at]gmail.com).
+NOTE: Currently some part of focussed image is being lost. Bug needs to be fixed.
+'''
+from PIL import Image
+import numpy as np
+import sys
+import pdb
+
+def sphericalTransform(im, xc, yc, rmax, rho ):
+    '''
+    Apply spherical transform on image im.
+    xc,yc - center of lens ; rmax - radius ; rho - refractive index of lens.
+    '''
+    im2 = im1
+    for i in range(0,im2.shape[0]):
+        for j in range(1,im2.shape[1]):
+            dx=i-xc
+            dy=j-yc
+            r=(dx**2+dy**2)**0.5
+            z=(rmax**2-r**2)**0.5
+            Bx=(1-(1/rho))*np.arcsin(dx/((dx**2+z**2)**0.5))
+            By=(1-(1/rho))*np.arcsin(dy/((dy**2+z**2)**0.5))
+            if r<rmax:
+                t1=int(np.round(i-z*np.tan(Bx)))
+                t2=int(np.round(j-z*np.tan(By)))
+            else:
+                t1=0
+                t2=0
+            if (t1>0 and t1<im1.shape[0]) and (t2>0 and t2<im1.shape[1]):
+                im2[i,j,0] = im1[t1,t2,0]
+                im2[i,j,1] = im1[t1,t2,1]
+                im2[i,j,2] = im1[t1,t2,2]
+    return im1
+
+filename=sys.argv[1]
+xc,yc,rmax,rho=np.ravel(map(float,sys.argv[2].split(',')))
+im1 = np.array(Image.open(filename))
+print(im1.shape)
+im_trans = sphericalTransform(im1,xc,yc,rmax,rho)
+im_out = Image.fromarray(im_trans)
+im_out.show()
diff --git a/Machine_Learning/perceptron_mnist.py b/Machine_Learning/perceptron_mnist.py
@@ -0,0 +1,122 @@
+# Author: Abhinav Dhere (abhitechnical41[at]gmail.com) 
+# Originally written as { Problem 1, Assignment 1, SM in AI (CSE 471) - 2017 IIIT Hyderabad }
+# Perceptron based classification for MNIST database ; Options available - single update ; batchwise update; with or without margin
+# Coded from scratch, dependency is only Numpy.
+# Expects data in CSV files.
+
+import numpy as np
+import sys
+import math
+import time
+
+def getConfMatrix(pLabels,labels):
+    '''
+    Obtain confusion matrix for the classification performed on test data.
+    '''
+    TN=0;FN=0;FP=0;TP=0;
+    for id in pLabels.keys():
+     #   print(id)
+     #   print str(pLabels[id])+' '+str(labels[id])
+        if pLabels[id]==0 and labels[id]==0:
+            TN+=1
+        elif pLabels[id]==0 and labels[id]==1:
+            FN+=1
+        elif pLabels[id]==1 and labels[id]==0:
+            FP+=1
+        elif pLabels[id]==1 and labels[id]==1:
+            TP+=1
+    confMat = np.array([[TN, FP],[FN,TP]])
+    return confMat
+
+def getStats(labelsP,labels):
+    c = getConfMatrix(labelsP,labels)
+    acc = (c[1,1]+c[0,0])/float(c[0,0]+c[0,1]+c[1,0]+c[1,1])
+    recall = c[1,1]/float(c[1,1]+c[1,0])
+    print("Accuracy: "+str(acc*100))
+    print("Recall: "+str(recall*100))
+
+def readFile(filename,datType):
+    '''
+    Read csv file specified by filename into two separate numpy arrays, one for features and one for header i.e. names of features.
+    '''
+    if datType==0:
+        colNos = range(1,785)
+        data = np.genfromtxt(filename,dtype=int,delimiter=',',autostrip=True,usecols=colNos)
+        labels = np.genfromtxt(filename,dtype=int,delimiter=',',autostrip=True,usecols=[0])
+        return data,labels
+    elif datType==1:
+        data = np.genfromtxt(filename,dtype=int,delimiter=',',autostrip=True)
+        return data
+
+def predict(w,x,i,margin):
+    if (np.dot(np.transpose(w),x[i])>=margin):    
+        pred = 1
+    elif (np.dot(np.transpose(w),x[i])<margin):
+        pred = 0
+    return pred
+
+def augment(data):
+    aug = np.ones((data.shape[0],1))
+    x = np.concatenate((aug,data),axis=1)
+    return x
+
+def train(data_train_file,method,margin):
+    [data,labels] = readFile(data_train_file,0)
+    x = augment(data)
+    w = np.random.rand(x.shape[1])
+    eta = 1
+    #w = w + err*x[0,:]
+    if method=='single':
+        for i in range(x.shape[0]):
+            predVal = predict(w,x,i,margin)
+            err = labels[i]-predVal
+            if err!=0:
+                w = w + (err*eta)*x[i,:]
+    elif method=='batch':
+        z = [label if label==1 else -1 for label in labels]
+        w = w + sum([z[num]*x[num] for num in range(x.shape[0])])
+        lenValue = x.shape[0]
+        oldDefaulters = range(x.shape[0])
+        while(lenValue>1):
+            defaulters = []
+            for i in oldDefaulters:
+                if (np.dot(np.transpose(w),z[i]*x[i])<=0):
+                    defaulters.append(i)
+            x_sum = sum([z[j]*x[j] for j in defaulters])
+            w = w + eta*x_sum
+            lenValue = len(defaulters)
+            oldDefaulters = defaulters
+    return w
+
+def test(w,data_test_file,margin):
+    data_test = readFile(data_test_file,1)
+    x = augment(data_test)
+    predVal = {}
+    for i in range(data_test.shape[0]):
+        predVal[i] = predict(w,x,i,margin)
+    return predVal
+
+
+def classify(trainFile,testFile,method,margin):
+    w = train(data_train_file,method,margin)
+    labels_pred = test(w,data_test_file,margin)
+    #getStats(labels_pred,labels)
+    for id in labels_pred.keys():
+        print(labels_pred[id])
+
+if __name__ == "__main__":
+#    start_time = time.time()
+    data_train_file = sys.argv[1]
+    data_test_file = sys.argv[2]
+
+# Single sample perceptron ==>    
+    # Without margin
+    classify(data_train_file,data_test_file,'single',0)
+    # With margin
+    classify(data_train_file,data_test_file,'single',6)
+# Batch perceptron ==>
+    #Without margin
+    classify(data_train_file,data_test_file,'batch',0)
+    #With margin
+    classify(data_train_file,data_test_file,'batch',6)
+#    print("--- %s seconds ---" % (time.time() - start_time))