1+ import numpy as np
2+ import pandas as pd
3+ import matplotlib .pyplot as plt
4+
5+ # fuction that returns sigmoid value of x
6+ def sigmoid (theta , X ):
7+ return 1 / (1 + np .exp (- np .dot (X , theta .T )))
8+
9+ # Calculates cost of training model. We have to minimize this cost
10+ def gradient (X , theta , y ):
11+ m = len (y )
12+ sigmoid_result = sigmoid (theta , X )
13+ value = (- 1 / m )* np .sum (y * np .log (sigmoid_result ) + (1 - y )* np .log (1 - sigmoid_result ))
14+ return value
15+
16+ # Returns partial diffrentiation value of gradient
17+ def log_gradient (theta , X , y ):
18+ m = len (y )
19+ h = sigmoid (theta , X ) - y
20+ result = (1 / m )* np .dot (h .T , X )
21+ return result
22+
23+ # Original gradient descent algorithm which minimize the cost function (gradient)
24+ def gradientDescent (X , theta , y , learning_rate = .01 , converge_change = .00001 ):
25+ cost = gradient (X , theta , y )
26+ change_cost = 1
27+ n = 1
28+ while (change_cost > converge_change ):
29+ old_cost = cost
30+ theta = theta - learning_rate * log_gradient (theta ,X ,y )
31+ cost = gradient (X , theta , y )
32+ change_cost = old_cost - cost
33+ n += 1
34+ print (n )
35+ return theta
36+
37+ # Method to train our model on training data
38+ def fit (X_train , y_train ):
39+ theta = np .zeros (X_train .shape [1 ])
40+ updated_parameters = gradientDescent (X_train , theta , y_train )
41+ return updated_parameters
42+
43+ # Method to predict output on new data or test data
44+ def predict (X_test , final_theta ):
45+ predicted_probabilities = sigmoid (final_theta , X_test )
46+ predicted_value = np .where (predicted_probabilities >= .5 , 1 , 0 )
47+ return predicted_value
48+
49+ # method to visualize logistic regression. A plot between all samples and decision boundary
50+ def plot_reg (X , y , theta ):
51+ '''
52+ function to plot decision boundary
53+ '''
54+ # labelled observations
55+ X = np .array (X )
56+ x_0 = pd .DataFrame (X [np .where (y == 0 )])
57+ x_1 = pd .DataFrame (X [np .where (y == 1 )])
58+
59+ # plotting points with diff color for diff label
60+ plt .scatter (x_0 .iloc [:,1 ], x_0 .iloc [:,2 ], c = 'b' , label = 'y = 0' )
61+ plt .scatter (x_1 .iloc [:,1 ], x_1 .iloc [:, 2 ], c = 'r' , label = 'y = 1' )
62+ print (theta )
63+ # plotting decision boundary
64+ x1 = np .arange (0 , 10 , 1 )
65+ x2 = - (theta [0 ] + theta [1 ]* x1 )/ theta [2 ]
66+ plt .plot (x1 , x2 , c = 'k' , label = 'reg line' )
67+
68+ plt .xlabel ('x1' )
69+ plt .ylabel ('x2' )
70+ plt .legend ()
71+ plt .show ()
72+
73+
74+ if __name__ == "__main__" :
75+ df = pd .read_csv ("data.csv" )
76+ df .insert (0 , 'x0' , 1.0 )
77+ X_train = df .iloc [:,0 :3 ]
78+ y_train = df ['label' ]
79+ parameters = fit (X_train , y_train )
80+ X_test = np .array ([[1 , 5.123 , 6.872 ], [1 , 1.239 , 6.165 ], [1 , 8.6254 , 7.829 ], [1 , 2.382 , 7.525 ], [1 , 9.282 , 1.626 ], [1 , 3.272 , 5.737 ], [1 , 6.345 , 4.276 ], [1 , 3.372 , 8.238 ]])
81+ result = predict (X_test , parameters )
82+ print (result )
83+ plot_reg (X_train , y_train , parameters )
0 commit comments