|
| 1 | +# Required imports to run this file |
| 2 | +import matplotlib.pyplot as plt |
| 3 | +import numpy as np |
| 4 | + |
| 5 | + |
| 6 | +# weighted matrix |
| 7 | +def weighted_matrix(point: np.mat, training_data_x: np.mat, bandwidth: float) -> np.mat: |
| 8 | + """ |
| 9 | + Calculate the weight for every point in the |
| 10 | + data set. It takes training_point , query_point, and tau |
| 11 | + Here Tau is not a fixed value it can be varied depends on output. |
| 12 | + tau --> bandwidth |
| 13 | + xmat -->Training data |
| 14 | + point --> the x where we want to make predictions |
| 15 | + """ |
| 16 | + # m is the number of training samples |
| 17 | + m, n = np.shape(training_data_x) |
| 18 | + # Initializing weights as identity matrix |
| 19 | + weights = np.mat(np.eye((m))) |
| 20 | + # calculating weights for all training examples [x(i)'s] |
| 21 | + for j in range(m): |
| 22 | + diff = point - training_data[j] |
| 23 | + weights[j, j] = np.exp(diff * diff.T / (-2.0 * bandwidth ** 2)) |
| 24 | + return weights |
| 25 | + |
| 26 | + |
| 27 | +def local_weight( |
| 28 | + point: np.mat, training_data_x: np.mat, training_data_y: np.mat, bandwidth: float |
| 29 | +) -> np.mat: |
| 30 | + """ |
| 31 | + Calculate the local weights using the weight_matrix function on training data. |
| 32 | + Return the weighted matrix. |
| 33 | + """ |
| 34 | + weight = weighted_matrix(point, training_data_x, bandwidth) |
| 35 | + W = (training_data.T * (weight * training_data)).I * ( |
| 36 | + training_data.T * weight * training_data_y.T |
| 37 | + ) |
| 38 | + return W |
| 39 | + |
| 40 | + |
| 41 | +def local_weight_regression( |
| 42 | + training_data_x: np.mat, training_data_y: np.mat, bandwidth: float |
| 43 | +) -> np.mat: |
| 44 | + """ |
| 45 | + Calculate predictions for each data point on axis. |
| 46 | + """ |
| 47 | + m, n = np.shape(training_data_x) |
| 48 | + ypred = np.zeros(m) |
| 49 | + |
| 50 | + for i, item in enumerate(training_data_x): |
| 51 | + ypred[i] = item * local_weight( |
| 52 | + item, training_data_x, training_data_y, bandwidth |
| 53 | + ) |
| 54 | + |
| 55 | + return ypred |
| 56 | + |
| 57 | + |
| 58 | +def load_data(dataset_name: str, cola_name: str, colb_name: str) -> np.mat: |
| 59 | + """ |
| 60 | + Function used for loading data from the seaborn splitting into x and y points |
| 61 | + """ |
| 62 | + import seaborn as sns |
| 63 | + |
| 64 | + data = sns.load_dataset(dataset_name) |
| 65 | + col_a = np.array(data[cola_name]) # total_bill |
| 66 | + col_b = np.array(data[colb_name]) # tip |
| 67 | + |
| 68 | + mcol_a = np.mat(col_a) |
| 69 | + mcol_b = np.mat(col_b) |
| 70 | + |
| 71 | + m = np.shape(mcol_b)[1] |
| 72 | + one = np.ones((1, m), dtype=int) |
| 73 | + |
| 74 | + # horizontal stacking |
| 75 | + training_data = np.hstack((one.T, mcol_a.T)) |
| 76 | + |
| 77 | + return training_data, mcol_b, col_a, col_b |
| 78 | + |
| 79 | + |
| 80 | +def get_preds(training_data: np.mat, mcol_b: np.mat, tau: float) -> np.ndarray: |
| 81 | + """ |
| 82 | + Get predictions with minimum error for each training data |
| 83 | + """ |
| 84 | + ypred = local_weight_regression(training_data, mcol_b, tau) |
| 85 | + return ypred |
| 86 | + |
| 87 | + |
| 88 | +def plot_preds( |
| 89 | + training_data: np.mat, |
| 90 | + predictions: np.ndarray, |
| 91 | + col_x: np.ndarray, |
| 92 | + col_y: np.ndarray, |
| 93 | + cola_name: str, |
| 94 | + colb_name: str, |
| 95 | +) -> plt.plot: |
| 96 | + """ |
| 97 | + This function used to plot predictions and display the graph |
| 98 | + """ |
| 99 | + xsort = training_data.copy() |
| 100 | + xsort.sort(axis=0) |
| 101 | + plt.scatter(col_x, col_y, color="blue") |
| 102 | + plt.plot( |
| 103 | + xsort[:, 1], |
| 104 | + predictions[training_data[:, 1].argsort(0)], |
| 105 | + color="yellow", |
| 106 | + linewidth=5, |
| 107 | + ) |
| 108 | + plt.title("Local Weighted Regression") |
| 109 | + plt.xlabel(cola_name) |
| 110 | + plt.ylabel(colb_name) |
| 111 | + plt.show() |
| 112 | + |
| 113 | + |
| 114 | +if __name__ == "__main__": |
| 115 | + training_data, mcol_b, col_a, col_b = load_data("tips", "total_bill", "tip") |
| 116 | + predictions = get_preds(training_data, mcol_b, 0.5) |
| 117 | + plot_preds(training_data, predictions, col_a, col_b, "total_bill", "tip") |
0 commit comments