|
| 1 | +""" |
| 2 | +Polynomial regression is a type of regression analysis that models the relationship |
| 3 | +between a predictor x and the response y as an mth-degree polynomial: |
| 4 | +
|
| 5 | +y = β₀ + β₁x + β₂x² + ... + βₘxᵐ + ε |
| 6 | +
|
| 7 | +By treating x, x², ..., xᵐ as distinct variables, we see that polynomial regression is a |
| 8 | +special case of multiple linear regression. Therefore, we can use ordinary least squares |
| 9 | +(OLS) estimation to estimate the vector of model parameters β = (β₀, β₁, β₂, ..., βₘ) |
| 10 | +for polynomial regression: |
| 11 | +
|
| 12 | +β = (XᵀX)⁻¹Xᵀy = X⁺y |
| 13 | +
|
| 14 | +where X is the design matrix, y is the response vector, and X⁺ denotes the Moore–Penrose |
| 15 | +pseudoinverse of X. In the case of polynomial regression, the design matrix is |
| 16 | +
|
| 17 | + |1 x₁ x₁² ⋯ x₁ᵐ| |
| 18 | +X = |1 x₂ x₂² ⋯ x₂ᵐ| |
| 19 | + |⋮ ⋮ ⋮ ⋱ ⋮ | |
| 20 | + |1 xₙ xₙ² ⋯ xₙᵐ| |
| 21 | +
|
| 22 | +In OLS estimation, inverting XᵀX to compute X⁺ can be very numerically unstable. This |
| 23 | +implementation sidesteps this need to invert XᵀX by computing X⁺ using singular value |
| 24 | +decomposition (SVD): |
| 25 | +
|
| 26 | +β = VΣ⁺Uᵀy |
| 27 | +
|
| 28 | +where UΣVᵀ is an SVD of X. |
| 29 | +
|
| 30 | +References: |
| 31 | + - https://en.wikipedia.org/wiki/Polynomial_regression |
| 32 | + - https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_inverse |
| 33 | + - https://en.wikipedia.org/wiki/Numerical_methods_for_linear_least_squares |
| 34 | + - https://en.wikipedia.org/wiki/Singular_value_decomposition |
| 35 | +""" |
| 36 | + |
| 37 | +import matplotlib.pyplot as plt |
| 38 | +import numpy as np |
| 39 | + |
| 40 | + |
| 41 | +class PolynomialRegression: |
| 42 | + __slots__ = "degree", "params" |
| 43 | + |
| 44 | + def __init__(self, degree: int) -> None: |
| 45 | + """ |
| 46 | + @raises ValueError: if the polynomial degree is negative |
| 47 | + """ |
| 48 | + if degree < 0: |
| 49 | + raise ValueError("Polynomial degree must be non-negative") |
| 50 | + |
| 51 | + self.degree = degree |
| 52 | + self.params = None |
| 53 | + |
| 54 | + @staticmethod |
| 55 | + def _design_matrix(data: np.ndarray, degree: int) -> np.ndarray: |
| 56 | + """ |
| 57 | + Constructs a polynomial regression design matrix for the given input data. For |
| 58 | + input data x = (x₁, x₂, ..., xₙ) and polynomial degree m, the design matrix is |
| 59 | + the Vandermonde matrix |
| 60 | +
|
| 61 | + |1 x₁ x₁² ⋯ x₁ᵐ| |
| 62 | + X = |1 x₂ x₂² ⋯ x₂ᵐ| |
| 63 | + |⋮ ⋮ ⋮ ⋱ ⋮ | |
| 64 | + |1 xₙ xₙ² ⋯ xₙᵐ| |
| 65 | +
|
| 66 | + Reference: https://en.wikipedia.org/wiki/Vandermonde_matrix |
| 67 | +
|
| 68 | + @param data: the input predictor values x, either for model fitting or for |
| 69 | + prediction |
| 70 | + @param degree: the polynomial degree m |
| 71 | + @returns: the Vandermonde matrix X (see above) |
| 72 | + @raises ValueError: if input data is not N x 1 |
| 73 | +
|
| 74 | + >>> x = np.array([0, 1, 2]) |
| 75 | + >>> PolynomialRegression._design_matrix(x, degree=0) |
| 76 | + array([[1], |
| 77 | + [1], |
| 78 | + [1]]) |
| 79 | + >>> PolynomialRegression._design_matrix(x, degree=1) |
| 80 | + array([[1, 0], |
| 81 | + [1, 1], |
| 82 | + [1, 2]]) |
| 83 | + >>> PolynomialRegression._design_matrix(x, degree=2) |
| 84 | + array([[1, 0, 0], |
| 85 | + [1, 1, 1], |
| 86 | + [1, 2, 4]]) |
| 87 | + >>> PolynomialRegression._design_matrix(x, degree=3) |
| 88 | + array([[1, 0, 0, 0], |
| 89 | + [1, 1, 1, 1], |
| 90 | + [1, 2, 4, 8]]) |
| 91 | + >>> PolynomialRegression._design_matrix(np.array([[0, 0], [0 , 0]]), degree=3) |
| 92 | + Traceback (most recent call last): |
| 93 | + ... |
| 94 | + ValueError: Data must have dimensions N x 1 |
| 95 | + """ |
| 96 | + rows, *remaining = data.shape |
| 97 | + if remaining: |
| 98 | + raise ValueError("Data must have dimensions N x 1") |
| 99 | + |
| 100 | + return np.vander(data, N=degree + 1, increasing=True) |
| 101 | + |
| 102 | + def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None: |
| 103 | + """ |
| 104 | + Computes the polynomial regression model parameters using ordinary least squares |
| 105 | + (OLS) estimation: |
| 106 | +
|
| 107 | + β = (XᵀX)⁻¹Xᵀy = X⁺y |
| 108 | +
|
| 109 | + where X⁺ denotes the Moore–Penrose pseudoinverse of the design matrix X. This |
| 110 | + function computes X⁺ using singular value decomposition (SVD). |
| 111 | +
|
| 112 | + References: |
| 113 | + - https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_inverse |
| 114 | + - https://en.wikipedia.org/wiki/Singular_value_decomposition |
| 115 | + - https://en.wikipedia.org/wiki/Multicollinearity |
| 116 | +
|
| 117 | + @param x_train: the predictor values x for model fitting |
| 118 | + @param y_train: the response values y for model fitting |
| 119 | + @raises ArithmeticError: if X isn't full rank, then XᵀX is singular and β |
| 120 | + doesn't exist |
| 121 | +
|
| 122 | + >>> x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) |
| 123 | + >>> y = x**3 - 2 * x**2 + 3 * x - 5 |
| 124 | + >>> poly_reg = PolynomialRegression(degree=3) |
| 125 | + >>> poly_reg.fit(x, y) |
| 126 | + >>> poly_reg.params |
| 127 | + array([-5., 3., -2., 1.]) |
| 128 | + >>> poly_reg = PolynomialRegression(degree=20) |
| 129 | + >>> poly_reg.fit(x, y) |
| 130 | + Traceback (most recent call last): |
| 131 | + ... |
| 132 | + ArithmeticError: Design matrix is not full rank, can't compute coefficients |
| 133 | +
|
| 134 | + Make sure errors don't grow too large: |
| 135 | + >>> coefs = np.array([-250, 50, -2, 36, 20, -12, 10, 2, -1, -15, 1]) |
| 136 | + >>> y = PolynomialRegression._design_matrix(x, len(coefs) - 1) @ coefs |
| 137 | + >>> poly_reg = PolynomialRegression(degree=len(coefs) - 1) |
| 138 | + >>> poly_reg.fit(x, y) |
| 139 | + >>> np.allclose(poly_reg.params, coefs, atol=10e-3) |
| 140 | + True |
| 141 | + """ |
| 142 | + X = PolynomialRegression._design_matrix(x_train, self.degree) # noqa: N806 |
| 143 | + _, cols = X.shape |
| 144 | + if np.linalg.matrix_rank(X) < cols: |
| 145 | + raise ArithmeticError( |
| 146 | + "Design matrix is not full rank, can't compute coefficients" |
| 147 | + ) |
| 148 | + |
| 149 | + # np.linalg.pinv() computes the Moore–Penrose pseudoinverse using SVD |
| 150 | + self.params = np.linalg.pinv(X) @ y_train |
| 151 | + |
| 152 | + def predict(self, data: np.ndarray) -> np.ndarray: |
| 153 | + """ |
| 154 | + Computes the predicted response values y for the given input data by |
| 155 | + constructing the design matrix X and evaluating y = Xβ. |
| 156 | +
|
| 157 | + @param data: the predictor values x for prediction |
| 158 | + @returns: the predicted response values y = Xβ |
| 159 | + @raises ArithmeticError: if this function is called before the model |
| 160 | + parameters are fit |
| 161 | +
|
| 162 | + >>> x = np.array([0, 1, 2, 3, 4]) |
| 163 | + >>> y = x**3 - 2 * x**2 + 3 * x - 5 |
| 164 | + >>> poly_reg = PolynomialRegression(degree=3) |
| 165 | + >>> poly_reg.fit(x, y) |
| 166 | + >>> poly_reg.predict(np.array([-1])) |
| 167 | + array([-11.]) |
| 168 | + >>> poly_reg.predict(np.array([-2])) |
| 169 | + array([-27.]) |
| 170 | + >>> poly_reg.predict(np.array([6])) |
| 171 | + array([157.]) |
| 172 | + >>> PolynomialRegression(degree=3).predict(x) |
| 173 | + Traceback (most recent call last): |
| 174 | + ... |
| 175 | + ArithmeticError: Predictor hasn't been fit yet |
| 176 | + """ |
| 177 | + if self.params is None: |
| 178 | + raise ArithmeticError("Predictor hasn't been fit yet") |
| 179 | + |
| 180 | + return PolynomialRegression._design_matrix(data, self.degree) @ self.params |
| 181 | + |
| 182 | + |
| 183 | +def main() -> None: |
| 184 | + """ |
| 185 | + Fit a polynomial regression model to predict fuel efficiency using seaborn's mpg |
| 186 | + dataset |
| 187 | +
|
| 188 | + >>> pass # Placeholder, function is only for demo purposes |
| 189 | + """ |
| 190 | + import seaborn as sns |
| 191 | + |
| 192 | + mpg_data = sns.load_dataset("mpg") |
| 193 | + |
| 194 | + poly_reg = PolynomialRegression(degree=2) |
| 195 | + poly_reg.fit(mpg_data.weight, mpg_data.mpg) |
| 196 | + |
| 197 | + weight_sorted = np.sort(mpg_data.weight) |
| 198 | + predictions = poly_reg.predict(weight_sorted) |
| 199 | + |
| 200 | + plt.scatter(mpg_data.weight, mpg_data.mpg, color="gray", alpha=0.5) |
| 201 | + plt.plot(weight_sorted, predictions, color="red", linewidth=3) |
| 202 | + plt.title("Predicting Fuel Efficiency Using Polynomial Regression") |
| 203 | + plt.xlabel("Weight (lbs)") |
| 204 | + plt.ylabel("Fuel Efficiency (mpg)") |
| 205 | + plt.show() |
| 206 | + |
| 207 | + |
| 208 | +if __name__ == "__main__": |
| 209 | + import doctest |
| 210 | + |
| 211 | + doctest.testmod() |
| 212 | + |
| 213 | + main() |
0 commit comments