added logistic regression algorithm for machine learning

vidhanjhawar · vidhanjhawar · commit 08c4deff104d · 2019-10-12T23:11:11.000+05:30
diff --git a/MachineLearning/Logistic regression/data.csv b/MachineLearning/Logistic regression/data.csv
@@ -0,0 +1,101 @@
+x1,x2,label
+4.5192,2.6487,1
+0.9008,1.169,1
+0.9008,3.4379,0
+0.9008,5.7857,0
+1.331,6.5355,0
+1.4069,4.9571,0
+1.6852,2.9841,0
+1.7358,5.4503,0
+1.7864,4.1876,0
+1.9383,3.6549,0
+1.9636,6.3382,0
+2.1154,1.7411,1
+2.3684,5.0163,0
+2.3937,7.2063,0
+2.419,6.8511,0
+2.4443,1.5438,1
+2.4443,1.2676,1
+2.4443,5.8449,0
+2.7227,4.5822,0
+2.8745,6.0817,0
+2.9757,2.0568,1
+3.1781,4.8979,0
+3.2034,4.4244,0
+3.2794,1.386,1
+3.33,2.7868,1
+3.4312,3.7536,0
+3.4818,5.8055,0
+4.0132,7.1668,0
+4.0891,7.5417,0
+4.1144,4.8387,0
+4.1144,5.3911,0
+4.165,1.5636,1
+4.2156,6.496,0
+4.2409,1.899,1
+4.3168,4.4244,0
+4.4686,3.0236,1
+4.4939,1.4057,1
+4.6457,8.5676,0
+4.6457,8.1676,0
+4.6711,7.226,0
+4.6711,5.8055,0
+4.6711,6.2592,0
+4.8482,1.8793,1
+4.8735,2.6093,1
+4.8988,5.5687,0
+4.9241,2.6882,1
+5.1012,6.0817,0
+5.1012,7.6009,0
+5.1518,3.5563,1
+5.1771,8.1533,0
+5.3543,3.9903,1
+5.4555,7.0484,0
+5.4808,5.2728,0
+5.5061,2.9052,1
+5.5314,3.0828,1
+5.7338,6.6538,0
+5.7591,6.0028,0
+5.8097,2.4711,1
+5.8097,3.2406,1
+5.9615,6.4565,0
+5.9868,8.5084,0
+5.9868,7.3641,0
+6.0121,3.9311,1
+6.0374,4.7598,1
+6.1387,1.7806,1
+6.2146,7.4825,0
+6.2652,4.0693,1
+6.2652,4.3849,1
+6.3917,3.8128,1
+6.4423,3.359,1
+6.5688,4.9571,1
+6.5941,5.2333,1
+6.6194,7.1471,0
+6.6447,3.8325,1
+6.6953,6.7722,0
+6.7966,3.6747,1
+6.8725,4.4441,1
+6.999,3.2406,1
+7.1508,4.7598,1
+7.2014,1.5438,1
+7.2014,7.5219,0
+7.2014,6.8314,0
+7.4038,3.8917,1
+7.581,8.3703,0
+7.6316,4.602,1
+7.6569,2.412,1
+7.6822,4.5428,1
+7.7075,5.3122,1
+7.7581,5.7265,1
+7.7581,7.1865,0
+7.7581,7.7784,0
+7.8593,3.8128,1
+7.8593,5.253,1
+8.0364,5.7857,1
+8.163,4.7401,1
+8.3148,5.7068,1
+8.5172,5.1149,1
+8.5931,7.6206,0
+8.7449,5.4109,1
+8.9221,6.5552,1
diff --git a/MachineLearning/Logistic regression/logistic_regression.py b/MachineLearning/Logistic regression/logistic_regression.py
@@ -0,0 +1,83 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+# fuction that returns sigmoid value of x
+def sigmoid(theta, X):
+    return 1/(1+np.exp(-np.dot(X, theta.T)))
+
+# Calculates cost of training model. We have to minimize this cost
+def gradient(X, theta, y):
+    m = len(y)
+    sigmoid_result = sigmoid(theta, X)
+    value = (-1/m)*np.sum(y*np.log(sigmoid_result) + (1-y)*np.log(1-sigmoid_result))
+    return value
+
+# Returns partial diffrentiation value of gradient
+def log_gradient(theta, X, y):
+    m = len(y)
+    h = sigmoid(theta, X) - y
+    result = (1/m)*np.dot(h.T, X)
+    return result
+
+# Original gradient descent algorithm which minimize the cost function (gradient)	
+def gradientDescent(X, theta, y, learning_rate=.01, converge_change=.00001):
+    cost = gradient(X, theta, y)
+    change_cost=1
+    n=1
+    while(change_cost > converge_change):
+        old_cost=cost
+        theta = theta - learning_rate*log_gradient(theta,X,y)
+        cost = gradient(X, theta, y)
+        change_cost = old_cost - cost
+        n+=1
+    print(n)
+    return theta
+	
+# Method to train our model on training data
+def fit(X_train, y_train):
+	theta = np.zeros(X_train.shape[1])
+	updated_parameters = gradientDescent(X_train, theta, y_train)
+	return updated_parameters
+
+# Method to predict output on new data or test data
+def predict(X_test, final_theta):
+	predicted_probabilities = sigmoid(final_theta, X_test)
+	predicted_value = np.where(predicted_probabilities >= .5, 1, 0)
+	return predicted_value
+
+# method to visualize logistic regression. A plot between all samples and decision boundary
+def plot_reg(X, y, theta): 
+    ''' 
+    function to plot decision boundary 
+    '''
+    # labelled observations
+    X=np.array(X)
+    x_0 = pd.DataFrame(X[np.where(y == 0)])
+    x_1 = pd.DataFrame(X[np.where(y == 1)])
+    
+    # plotting points with diff color for diff label 
+    plt.scatter(x_0.iloc[:,1], x_0.iloc[:,2], c='b', label='y = 0') 
+    plt.scatter(x_1.iloc[:,1], x_1.iloc[:, 2], c='r', label='y = 1') 
+    print(theta)
+    # plotting decision boundary 
+    x1 = np.arange(0, 10, 1) 
+    x2 = -(theta[0] + theta[1]*x1)/theta[2] 
+    plt.plot(x1, x2, c='k', label='reg line')
+  
+    plt.xlabel('x1') 
+    plt.ylabel('x2') 
+    plt.legend() 
+    plt.show() 
+
+	
+if __name__ == "__main__":
+    df = pd.read_csv("data.csv")
+    df.insert(0, 'x0', 1.0)
+    X_train = df.iloc[:,0:3]
+    y_train = df['label']
+    parameters = fit(X_train, y_train)
+    X_test = np.array([[1, 5.123, 6.872], [1, 1.239, 6.165], [1, 8.6254, 7.829], [1, 2.382, 7.525], [1, 9.282, 1.626], [1, 3.272, 5.737], [1, 6.345, 4.276], [1, 3.372, 8.238]])
+    result = predict(X_test, parameters)
+    print(result)
+    plot_reg(X_train, y_train, parameters)