|
| 1 | +# Logistic regression implemented from Scratch in Python |
| 2 | +import sys |
| 3 | +import numpy as np |
| 4 | +import matplotlib.pyplot as plt |
| 5 | + |
| 6 | + |
| 7 | +def sigmoid(scores): |
| 8 | + return 1 / (1 + np.exp(-scores)) |
| 9 | + |
| 10 | + |
| 11 | +def log_likelihood(features, target, weights): |
| 12 | + scores = np.dot(features, weights) |
| 13 | + ll = np.sum(target * scores - np.log(1 + np.exp(scores))) |
| 14 | + return ll |
| 15 | + |
| 16 | + |
| 17 | +def logistic_regression(features,target,num_steps,learning_rate, |
| 18 | + add_intercept=False): |
| 19 | + |
| 20 | + if add_intercept: |
| 21 | + intercept = np.ones((features.shape[0], 1)) |
| 22 | + features = np.hstack((intercept, features)) |
| 23 | + |
| 24 | + weights = np.zeros(features.shape[1]) |
| 25 | + |
| 26 | + for step in range(1,num_steps): |
| 27 | + scores = np.dot(features, weights) |
| 28 | + predictions = sigmoid(scores) |
| 29 | + |
| 30 | + # Update weights with gradient |
| 31 | + output_error_signal = target - predictions |
| 32 | + gradient = np.dot(features.T, output_error_signal) |
| 33 | + weights += learning_rate * gradient |
| 34 | + |
| 35 | + # Print log-likelihood every so often |
| 36 | + if step % 10000 == 0: |
| 37 | + print (log_likelihood(features, target, weights)) |
| 38 | + |
| 39 | + return weights |
| 40 | + |
| 41 | + |
| 42 | +np.random.seed(12) |
| 43 | +num_observations = 5000 |
| 44 | + |
| 45 | +x1 = np.random.multivariate_normal([0, 0], [[1, .75], [.75, 1]], |
| 46 | + num_observations) |
| 47 | +x2 = np.random.multivariate_normal([1, 4], [[1, .75], [.75, 1]], |
| 48 | + num_observations) |
| 49 | + |
| 50 | +simulated_separableish_features = np.vstack((x1, x2)).astype(np.float32) |
| 51 | +simulated_labels = np.hstack((np.zeros(num_observations), |
| 52 | + np.ones(num_observations))) |
| 53 | + |
| 54 | +plt.figure(figsize=(12, 8)) |
| 55 | +plt.scatter( |
| 56 | + simulated_separableish_features[:, 0], |
| 57 | + simulated_separableish_features[:, 1], |
| 58 | + c=simulated_labels, |
| 59 | + alpha=.4) |
| 60 | + |
| 61 | +plt.show() |
| 62 | + |
| 63 | +# Running the model |
| 64 | + |
| 65 | +weights = logistic_regression(simulated_separableish_features,simulated_labels,num_steps=300000, |
| 66 | + learning_rate=5e-5, |
| 67 | + add_intercept=True) |
| 68 | + |
| 69 | +print(weights) |
0 commit comments