Skip to content

Commit 5c4aa05

Browse files
authored
Added AIC, BIC, fit_dataframe
1 parent de880d1 commit 5c4aa05

File tree

1 file changed

+181
-13
lines changed

1 file changed

+181
-13
lines changed

OOP_in_ML/Class_MyLinearRegression.py

Lines changed: 181 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@ class Metrics:
1212
r_squared: Regression coefficient (R^2)
1313
adj_r_squared: Adjusted R^2
1414
mse: Mean sum of squared errors
15+
AIC: Akaike information criterion
16+
BIC: Bayesian information criterion
1517
"""
1618

1719
def sse(self):
18-
"""Returns sum of squared errors (model vs actual)"""
20+
"""Returns sum of squared errors (model vs. actual)"""
1921
if not self.is_fitted:
2022
print("Model not fitted yet!")
2123
return None
@@ -57,8 +59,28 @@ def mse(self):
5759
self.mse_ = np.mean((self.predict(self.features_) - self.target_) ** 2)
5860
return self.mse_
5961

60-
def pretty_print_stats(self):
61-
"""Returns report of statistics for a given model object"""
62+
def aic(self):
63+
"""
64+
Returns AIC (Akaike information criterion)
65+
"""
66+
if not self.is_fitted:
67+
print("Model not fitted yet!")
68+
return None
69+
lm = sm.OLS(self.target_, sm.add_constant(self.features_)).fit()
70+
return lm.aic
71+
72+
def bic(self):
73+
"""
74+
Returns BIC (Bayesian information criterion)
75+
"""
76+
if not self.is_fitted:
77+
print("Model not fitted yet!")
78+
return None
79+
lm = sm.OLS(self.target_, sm.add_constant(self.features_)).fit()
80+
return lm.bic
81+
82+
def print_metrics(self):
83+
"""Prints a report of the useful metrics for a given model object"""
6284
if not self.is_fitted:
6385
print("Model not fitted yet!")
6486
return None
@@ -68,19 +90,54 @@ def pretty_print_stats(self):
6890
("mse:", self.mse()),
6991
("r^2:", self.r_squared()),
7092
("adj_r^2:", self.adj_r_squared()),
93+
("AIC:", self.aic()),
94+
("BIC:", self.bic()),
7195
)
7296
for item in items:
7397
print("{0:8} {1:.4f}".format(item[0], item[1]))
7498

99+
def summary_metrics(self):
100+
"""Returns a dictionary of the useful metrics"""
101+
if not self.is_fitted:
102+
print("Model not fitted yet!")
103+
return None
104+
metrics = {}
105+
items = (
106+
("sse", self.sse()),
107+
("sst", self.sst()),
108+
("mse", self.mse()),
109+
("r^2", self.r_squared()),
110+
("adj_r^2:", self.adj_r_squared()),
111+
("AIC:", self.aic()),
112+
("BIC:", self.bic()),
113+
)
114+
for item in items:
115+
metrics[item[0]] = item[1]
116+
return metrics
117+
75118

76119
class Inference:
77120
"""
78-
Inferential statistics: standard error, p-values, etc.
121+
Inferential statistics:
122+
standard error,
123+
p-values
124+
t-test statistics
125+
F-statistics and p-value of F-test
79126
"""
80127

81128
def __init__():
82129
pass
83130

131+
def std_err(self):
132+
"""
133+
Returns standard error values of the features
134+
"""
135+
if not self.is_fitted:
136+
print("Model not fitted yet!")
137+
return None
138+
lm = sm.OLS(self.target_, sm.add_constant(self.features_)).fit()
139+
return lm.bse
140+
84141
def pvalues(self):
85142
"""
86143
Returns p-values of the features
@@ -90,7 +147,7 @@ def pvalues(self):
90147
return None
91148
lm = sm.OLS(self.target_, sm.add_constant(self.features_)).fit()
92149
return lm.pvalues
93-
150+
94151
def tvalues(self):
95152
"""
96153
Returns t-test values of the features
@@ -100,16 +157,16 @@ def tvalues(self):
100157
return None
101158
lm = sm.OLS(self.target_, sm.add_constant(self.features_)).fit()
102159
return lm.tvalues
103-
104-
def std_err(self):
160+
161+
def ftest(self):
105162
"""
106-
Returns standard error values of the features
163+
Returns the F-statistic of the overall regression and corresponding p-value
107164
"""
108165
if not self.is_fitted:
109166
print("Model not fitted yet!")
110167
return None
111168
lm = sm.OLS(self.target_, sm.add_constant(self.features_)).fit()
112-
return lm.bse
169+
return (lm.fvalue, lm.f_pvalue)
113170

114171

115172
class Diagnostics_plots:
@@ -364,7 +421,7 @@ class MyLinearRegression(
364421
def __init__(self, fit_intercept=True):
365422
self.coef_ = None
366423
self.intercept_ = None
367-
self._fit_intercept = fit_intercept
424+
self.fit_intercept_ = fit_intercept
368425
self.is_fitted = False
369426
self.features_ = None
370427
self.target_ = None
@@ -388,7 +445,7 @@ def ingest_data(self, X, y):
388445
self.features_ = X
389446
self.target_ = y
390447

391-
def fit(self, X=None, y=None, _fit_intercept=True):
448+
def fit(self, X=None, y=None, fit_intercept_=True):
392449
"""
393450
Fit model coefficients.
394451
Arguments:
@@ -409,7 +466,118 @@ def fit(self, X=None, y=None, _fit_intercept=True):
409466
self.dfe_ = self.features_.shape[0] - self.features_.shape[1] - 1
410467

411468
# add bias if fit_intercept is True
412-
if self._fit_intercept:
469+
if self.fit_intercept_:
470+
X_biased = np.c_[np.ones(self.features_.shape[0]), self.features_]
471+
else:
472+
X_biased = self.features_
473+
# Assign target_ to a local variable y
474+
y = self.target_
475+
476+
# closed form solution
477+
xTx = np.dot(X_biased.T, X_biased)
478+
inverse_xTx = np.linalg.inv(xTx)
479+
xTy = np.dot(X_biased.T, y)
480+
coef = np.dot(inverse_xTx, xTy)
481+
482+
# set attributes
483+
if self.fit_intercept_:
484+
self.intercept_ = coef[0]
485+
self.coef_ = coef[1:]
486+
else:
487+
self.intercept_ = 0
488+
self.coef_ = coef
489+
490+
# Predicted/fitted y
491+
self.fitted_ = np.dot(self.features_, self.coef_) + self.intercept_
492+
493+
# Residuals
494+
residuals = self.target_ - self.fitted_
495+
self.resid_ = residuals
496+
497+
# Set is_fitted to True
498+
self.is_fitted = True
499+
500+
def fit(self, X=None, y=None, fit_intercept_=True):
501+
"""
502+
Fits model coefficients.
503+
504+
Arguments:
505+
X: 1D or 2D numpy array
506+
y: 1D numpy array
507+
fit_intercept: Boolean, whether an intercept term will be included in the fit
508+
"""
509+
510+
if X != None:
511+
if len(X.shape) == 1:
512+
X = X.reshape(-1, 1)
513+
self.features_ = X
514+
if y != None:
515+
self.target_ = y
516+
517+
# degrees of freedom of population dependent variable variance
518+
self.dft_ = self.features_.shape[0] - 1
519+
# degrees of freedom of population error variance
520+
self.dfe_ = self.features_.shape[0] - self.features_.shape[1] - 1
521+
522+
# add bias if fit_intercept is True
523+
if self.fit_intercept_:
524+
X_biased = np.c_[np.ones(self.features_.shape[0]), self.features_]
525+
else:
526+
X_biased = self.features_
527+
# Assign target_ to a local variable y
528+
y = self.target_
529+
530+
# closed form solution
531+
xTx = np.dot(X_biased.T, X_biased)
532+
inverse_xTx = np.linalg.inv(xTx)
533+
xTy = np.dot(X_biased.T, y)
534+
coef = np.dot(inverse_xTx, xTy)
535+
536+
# set attributes
537+
if self.fit_intercept_:
538+
self.intercept_ = coef[0]
539+
self.coef_ = coef[1:]
540+
else:
541+
self.intercept_ = 0
542+
self.coef_ = coef
543+
544+
# Predicted/fitted y
545+
self.fitted_ = np.dot(self.features_, self.coef_) + self.intercept_
546+
547+
# Residuals
548+
residuals = self.target_ - self.fitted_
549+
self.resid_ = residuals
550+
551+
# Set is_fitted to True
552+
self.is_fitted = True
553+
554+
def fit_dataframe(self, X, y, dataframe, fit_intercept_=True):
555+
"""
556+
Fit model coefficients from a Pandas DataFrame.
557+
558+
Arguments:
559+
X: A list of columns of the dataframe acting as features. Must be only numerical.
560+
y: Name of the column of the dataframe acting as the target
561+
fit_intercept: Boolean, whether an intercept term will be included in the fit
562+
"""
563+
564+
assert (
565+
type(X) == list
566+
), "X must be a list of the names of the numerical feature/predictor columns"
567+
assert (
568+
type(y) == str
569+
), "y must be a string - name of the column you want as target"
570+
571+
self.features_ = np.array(dataframe[X])
572+
self.target_ = np.array(dataframe[y])
573+
574+
# degrees of freedom of population dependent variable variance
575+
self.dft_ = self.features_.shape[0] - 1
576+
# degrees of freedom of population error variance
577+
self.dfe_ = self.features_.shape[0] - self.features_.shape[1] - 1
578+
579+
# add bias if fit_intercept is True
580+
if self.fit_intercept_:
413581
X_biased = np.c_[np.ones(self.features_.shape[0]), self.features_]
414582
else:
415583
X_biased = self.features_
@@ -423,7 +591,7 @@ def fit(self, X=None, y=None, _fit_intercept=True):
423591
coef = np.dot(inverse_xTx, xTy)
424592

425593
# set attributes
426-
if self._fit_intercept:
594+
if self.fit_intercept_:
427595
self.intercept_ = coef[0]
428596
self.coef_ = coef[1:]
429597
else:

0 commit comments

Comments
 (0)