|
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | + |
| 4 | + |
| 5 | +class RidgeRegression: |
| 6 | + def __init__(self, alpha=0.001, lambda_=0.1, iterations=1000): |
| 7 | + """ |
| 8 | + Ridge Regression Constructor |
| 9 | + :param alpha: Learning rate for gradient descent |
| 10 | + :param lambda_: Regularization parameter (L2 regularization) |
| 11 | + :param iterations: Number of iterations for gradient descent |
| 12 | + """ |
| 13 | + self.alpha = alpha |
| 14 | + self.lambda_ = lambda_ |
| 15 | + self.iterations = iterations |
| 16 | + self.theta = None |
| 17 | + |
| 18 | + def feature_scaling(self, X): |
| 19 | + """ |
| 20 | + Normalize features to have mean 0 and standard deviation 1 |
| 21 | + :param X: Input features, shape (m, n) |
| 22 | + :return: Scaled features, mean, and std for each feature |
| 23 | + """ |
| 24 | + mean = np.mean(X, axis=0) |
| 25 | + std = np.std(X, axis=0) |
| 26 | + |
| 27 | + # Avoid division by zero for constant features (std = 0) |
| 28 | + std[std == 0] = 1 # Set std=1 for constant features to avoid NaN |
| 29 | + |
| 30 | + X_scaled = (X - mean) / std |
| 31 | + return X_scaled, mean, std |
| 32 | + |
| 33 | + def fit(self, X, y): |
| 34 | + """ |
| 35 | + Fit the Ridge Regression model to the training data |
| 36 | + :param X: Input features, shape (m, n) |
| 37 | + :param y: Target values, shape (m,) |
| 38 | + """ |
| 39 | + X_scaled, mean, std = self.feature_scaling(X) # Normalize features |
| 40 | + m, n = X_scaled.shape |
| 41 | + self.theta = np.zeros(n) # Initialize weights to zeros |
| 42 | + |
| 43 | + for i in range(self.iterations): |
| 44 | + predictions = X_scaled.dot(self.theta) |
| 45 | + error = predictions - y |
| 46 | + |
| 47 | + # Compute gradient with L2 regularization |
| 48 | + gradient = (X_scaled.T.dot(error) + self.lambda_ * self.theta) / m |
| 49 | + self.theta -= self.alpha * gradient # Update weights |
| 50 | + |
| 51 | + def predict(self, X): |
| 52 | + """ |
| 53 | + Predict values using the trained model |
| 54 | + :param X: Input features, shape (m, n) |
| 55 | + :return: Predicted values, shape (m,) |
| 56 | + """ |
| 57 | + X_scaled, _, _ = self.feature_scaling(X) # Scale features using training data |
| 58 | + return X_scaled.dot(self.theta) |
| 59 | + |
| 60 | + def compute_cost(self, X, y): |
| 61 | + """ |
| 62 | + Compute the cost function with regularization |
| 63 | + :param X: Input features, shape (m, n) |
| 64 | + :param y: Target values, shape (m,) |
| 65 | + :return: Computed cost |
| 66 | + """ |
| 67 | + X_scaled, _, _ = self.feature_scaling(X) # Scale features using training data |
| 68 | + m = len(y) |
| 69 | + predictions = X_scaled.dot(self.theta) |
| 70 | + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( |
| 71 | + self.lambda_ / (2 * m) |
| 72 | + ) * np.sum(self.theta**2) |
| 73 | + return cost |
| 74 | + |
| 75 | + def mean_absolute_error(self, y_true, y_pred): |
| 76 | + """ |
| 77 | + Compute Mean Absolute Error (MAE) between true and predicted values |
| 78 | + :param y_true: Actual target values, shape (m,) |
| 79 | + :param y_pred: Predicted target values, shape (m,) |
| 80 | + :return: MAE |
| 81 | + """ |
| 82 | + return np.mean(np.abs(y_true - y_pred)) |
| 83 | + |
| 84 | + |
| 85 | +# Example usage |
| 86 | +if __name__ == "__main__": |
| 87 | + # Load dataset |
| 88 | + df = pd.read_csv( |
| 89 | + "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/master/Week1/ADRvsRating.csv" |
| 90 | + ) |
| 91 | + X = df[["Rating"]].values # Feature: Rating |
| 92 | + y = df["ADR"].values # Target: ADR |
| 93 | + y = (y - np.mean(y)) / np.std(y) |
| 94 | + |
| 95 | + # Add bias term (intercept) to the feature matrix |
| 96 | + X = np.c_[np.ones(X.shape[0]), X] # Add intercept term |
| 97 | + |
| 98 | + # Initialize and train the Ridge Regression model |
| 99 | + model = RidgeRegression(alpha=0.01, lambda_=0.1, iterations=1000) |
| 100 | + model.fit(X, y) |
| 101 | + |
| 102 | + # Predictions |
| 103 | + predictions = model.predict(X) |
| 104 | + |
| 105 | + # Results |
| 106 | + print("Optimized Weights:", model.theta) |
| 107 | + print("Cost:", model.compute_cost(X, y)) |
| 108 | + print("Mean Absolute Error:", model.mean_absolute_error(y, predictions)) |
0 commit comments