Skip to content

Commit 08c4def

Browse files
committed
added logistic regression algorithm for machine learning
1 parent c12243c commit 08c4def

File tree

2 files changed

+184
-0
lines changed

2 files changed

+184
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
x1,x2,label
2+
4.5192,2.6487,1
3+
0.9008,1.169,1
4+
0.9008,3.4379,0
5+
0.9008,5.7857,0
6+
1.331,6.5355,0
7+
1.4069,4.9571,0
8+
1.6852,2.9841,0
9+
1.7358,5.4503,0
10+
1.7864,4.1876,0
11+
1.9383,3.6549,0
12+
1.9636,6.3382,0
13+
2.1154,1.7411,1
14+
2.3684,5.0163,0
15+
2.3937,7.2063,0
16+
2.419,6.8511,0
17+
2.4443,1.5438,1
18+
2.4443,1.2676,1
19+
2.4443,5.8449,0
20+
2.7227,4.5822,0
21+
2.8745,6.0817,0
22+
2.9757,2.0568,1
23+
3.1781,4.8979,0
24+
3.2034,4.4244,0
25+
3.2794,1.386,1
26+
3.33,2.7868,1
27+
3.4312,3.7536,0
28+
3.4818,5.8055,0
29+
4.0132,7.1668,0
30+
4.0891,7.5417,0
31+
4.1144,4.8387,0
32+
4.1144,5.3911,0
33+
4.165,1.5636,1
34+
4.2156,6.496,0
35+
4.2409,1.899,1
36+
4.3168,4.4244,0
37+
4.4686,3.0236,1
38+
4.4939,1.4057,1
39+
4.6457,8.5676,0
40+
4.6457,8.1676,0
41+
4.6711,7.226,0
42+
4.6711,5.8055,0
43+
4.6711,6.2592,0
44+
4.8482,1.8793,1
45+
4.8735,2.6093,1
46+
4.8988,5.5687,0
47+
4.9241,2.6882,1
48+
5.1012,6.0817,0
49+
5.1012,7.6009,0
50+
5.1518,3.5563,1
51+
5.1771,8.1533,0
52+
5.3543,3.9903,1
53+
5.4555,7.0484,0
54+
5.4808,5.2728,0
55+
5.5061,2.9052,1
56+
5.5314,3.0828,1
57+
5.7338,6.6538,0
58+
5.7591,6.0028,0
59+
5.8097,2.4711,1
60+
5.8097,3.2406,1
61+
5.9615,6.4565,0
62+
5.9868,8.5084,0
63+
5.9868,7.3641,0
64+
6.0121,3.9311,1
65+
6.0374,4.7598,1
66+
6.1387,1.7806,1
67+
6.2146,7.4825,0
68+
6.2652,4.0693,1
69+
6.2652,4.3849,1
70+
6.3917,3.8128,1
71+
6.4423,3.359,1
72+
6.5688,4.9571,1
73+
6.5941,5.2333,1
74+
6.6194,7.1471,0
75+
6.6447,3.8325,1
76+
6.6953,6.7722,0
77+
6.7966,3.6747,1
78+
6.8725,4.4441,1
79+
6.999,3.2406,1
80+
7.1508,4.7598,1
81+
7.2014,1.5438,1
82+
7.2014,7.5219,0
83+
7.2014,6.8314,0
84+
7.4038,3.8917,1
85+
7.581,8.3703,0
86+
7.6316,4.602,1
87+
7.6569,2.412,1
88+
7.6822,4.5428,1
89+
7.7075,5.3122,1
90+
7.7581,5.7265,1
91+
7.7581,7.1865,0
92+
7.7581,7.7784,0
93+
7.8593,3.8128,1
94+
7.8593,5.253,1
95+
8.0364,5.7857,1
96+
8.163,4.7401,1
97+
8.3148,5.7068,1
98+
8.5172,5.1149,1
99+
8.5931,7.6206,0
100+
8.7449,5.4109,1
101+
8.9221,6.5552,1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import numpy as np
2+
import pandas as pd
3+
import matplotlib.pyplot as plt
4+
5+
# fuction that returns sigmoid value of x
6+
def sigmoid(theta, X):
7+
return 1/(1+np.exp(-np.dot(X, theta.T)))
8+
9+
# Calculates cost of training model. We have to minimize this cost
10+
def gradient(X, theta, y):
11+
m = len(y)
12+
sigmoid_result = sigmoid(theta, X)
13+
value = (-1/m)*np.sum(y*np.log(sigmoid_result) + (1-y)*np.log(1-sigmoid_result))
14+
return value
15+
16+
# Returns partial diffrentiation value of gradient
17+
def log_gradient(theta, X, y):
18+
m = len(y)
19+
h = sigmoid(theta, X) - y
20+
result = (1/m)*np.dot(h.T, X)
21+
return result
22+
23+
# Original gradient descent algorithm which minimize the cost function (gradient)
24+
def gradientDescent(X, theta, y, learning_rate=.01, converge_change=.00001):
25+
cost = gradient(X, theta, y)
26+
change_cost=1
27+
n=1
28+
while(change_cost > converge_change):
29+
old_cost=cost
30+
theta = theta - learning_rate*log_gradient(theta,X,y)
31+
cost = gradient(X, theta, y)
32+
change_cost = old_cost - cost
33+
n+=1
34+
print(n)
35+
return theta
36+
37+
# Method to train our model on training data
38+
def fit(X_train, y_train):
39+
theta = np.zeros(X_train.shape[1])
40+
updated_parameters = gradientDescent(X_train, theta, y_train)
41+
return updated_parameters
42+
43+
# Method to predict output on new data or test data
44+
def predict(X_test, final_theta):
45+
predicted_probabilities = sigmoid(final_theta, X_test)
46+
predicted_value = np.where(predicted_probabilities >= .5, 1, 0)
47+
return predicted_value
48+
49+
# method to visualize logistic regression. A plot between all samples and decision boundary
50+
def plot_reg(X, y, theta):
51+
'''
52+
function to plot decision boundary
53+
'''
54+
# labelled observations
55+
X=np.array(X)
56+
x_0 = pd.DataFrame(X[np.where(y == 0)])
57+
x_1 = pd.DataFrame(X[np.where(y == 1)])
58+
59+
# plotting points with diff color for diff label
60+
plt.scatter(x_0.iloc[:,1], x_0.iloc[:,2], c='b', label='y = 0')
61+
plt.scatter(x_1.iloc[:,1], x_1.iloc[:, 2], c='r', label='y = 1')
62+
print(theta)
63+
# plotting decision boundary
64+
x1 = np.arange(0, 10, 1)
65+
x2 = -(theta[0] + theta[1]*x1)/theta[2]
66+
plt.plot(x1, x2, c='k', label='reg line')
67+
68+
plt.xlabel('x1')
69+
plt.ylabel('x2')
70+
plt.legend()
71+
plt.show()
72+
73+
74+
if __name__ == "__main__":
75+
df = pd.read_csv("data.csv")
76+
df.insert(0, 'x0', 1.0)
77+
X_train = df.iloc[:,0:3]
78+
y_train = df['label']
79+
parameters = fit(X_train, y_train)
80+
X_test = np.array([[1, 5.123, 6.872], [1, 1.239, 6.165], [1, 8.6254, 7.829], [1, 2.382, 7.525], [1, 9.282, 1.626], [1, 3.272, 5.737], [1, 6.345, 4.276], [1, 3.372, 8.238]])
81+
result = predict(X_test, parameters)
82+
print(result)
83+
plot_reg(X_train, y_train, parameters)

0 commit comments

Comments
 (0)