1
+ import numpy as np
2
+ import pandas as pd
3
+ import matplotlib .pyplot as plt
4
+
5
+ # fuction that returns sigmoid value of x
6
+ def sigmoid (theta , X ):
7
+ return 1 / (1 + np .exp (- np .dot (X , theta .T )))
8
+
9
+ # Calculates cost of training model. We have to minimize this cost
10
+ def gradient (X , theta , y ):
11
+ m = len (y )
12
+ sigmoid_result = sigmoid (theta , X )
13
+ value = (- 1 / m )* np .sum (y * np .log (sigmoid_result ) + (1 - y )* np .log (1 - sigmoid_result ))
14
+ return value
15
+
16
+ # Returns partial diffrentiation value of gradient
17
+ def log_gradient (theta , X , y ):
18
+ m = len (y )
19
+ h = sigmoid (theta , X ) - y
20
+ result = (1 / m )* np .dot (h .T , X )
21
+ return result
22
+
23
+ # Original gradient descent algorithm which minimize the cost function (gradient)
24
+ def gradientDescent (X , theta , y , learning_rate = .01 , converge_change = .00001 ):
25
+ cost = gradient (X , theta , y )
26
+ change_cost = 1
27
+ n = 1
28
+ while (change_cost > converge_change ):
29
+ old_cost = cost
30
+ theta = theta - learning_rate * log_gradient (theta ,X ,y )
31
+ cost = gradient (X , theta , y )
32
+ change_cost = old_cost - cost
33
+ n += 1
34
+ print (n )
35
+ return theta
36
+
37
+ # Method to train our model on training data
38
+ def fit (X_train , y_train ):
39
+ theta = np .zeros (X_train .shape [1 ])
40
+ updated_parameters = gradientDescent (X_train , theta , y_train )
41
+ return updated_parameters
42
+
43
+ # Method to predict output on new data or test data
44
+ def predict (X_test , final_theta ):
45
+ predicted_probabilities = sigmoid (final_theta , X_test )
46
+ predicted_value = np .where (predicted_probabilities >= .5 , 1 , 0 )
47
+ return predicted_value
48
+
49
+ # method to visualize logistic regression. A plot between all samples and decision boundary
50
+ def plot_reg (X , y , theta ):
51
+ '''
52
+ function to plot decision boundary
53
+ '''
54
+ # labelled observations
55
+ X = np .array (X )
56
+ x_0 = pd .DataFrame (X [np .where (y == 0 )])
57
+ x_1 = pd .DataFrame (X [np .where (y == 1 )])
58
+
59
+ # plotting points with diff color for diff label
60
+ plt .scatter (x_0 .iloc [:,1 ], x_0 .iloc [:,2 ], c = 'b' , label = 'y = 0' )
61
+ plt .scatter (x_1 .iloc [:,1 ], x_1 .iloc [:, 2 ], c = 'r' , label = 'y = 1' )
62
+ print (theta )
63
+ # plotting decision boundary
64
+ x1 = np .arange (0 , 10 , 1 )
65
+ x2 = - (theta [0 ] + theta [1 ]* x1 )/ theta [2 ]
66
+ plt .plot (x1 , x2 , c = 'k' , label = 'reg line' )
67
+
68
+ plt .xlabel ('x1' )
69
+ plt .ylabel ('x2' )
70
+ plt .legend ()
71
+ plt .show ()
72
+
73
+
74
+ if __name__ == "__main__" :
75
+ df = pd .read_csv ("data.csv" )
76
+ df .insert (0 , 'x0' , 1.0 )
77
+ X_train = df .iloc [:,0 :3 ]
78
+ y_train = df ['label' ]
79
+ parameters = fit (X_train , y_train )
80
+ X_test = np .array ([[1 , 5.123 , 6.872 ], [1 , 1.239 , 6.165 ], [1 , 8.6254 , 7.829 ], [1 , 2.382 , 7.525 ], [1 , 9.282 , 1.626 ], [1 , 3.272 , 5.737 ], [1 , 6.345 , 4.276 ], [1 , 3.372 , 8.238 ]])
81
+ result = predict (X_test , parameters )
82
+ print (result )
83
+ plot_reg (X_train , y_train , parameters )
0 commit comments