@@ -12,10 +12,12 @@ class Metrics:
1212 r_squared: Regression coefficient (R^2)
1313 adj_r_squared: Adjusted R^2
1414 mse: Mean sum of squared errors
15+ AIC: Akaike information criterion
16+ BIC: Bayesian information criterion
1517 """
1618
1719 def sse (self ):
18- """Returns sum of squared errors (model vs actual)"""
20+ """Returns sum of squared errors (model vs. actual)"""
1921 if not self .is_fitted :
2022 print ("Model not fitted yet!" )
2123 return None
@@ -57,8 +59,28 @@ def mse(self):
5759 self .mse_ = np .mean ((self .predict (self .features_ ) - self .target_ ) ** 2 )
5860 return self .mse_
5961
60- def pretty_print_stats (self ):
61- """Returns report of statistics for a given model object"""
62+ def aic (self ):
63+ """
64+ Returns AIC (Akaike information criterion)
65+ """
66+ if not self .is_fitted :
67+ print ("Model not fitted yet!" )
68+ return None
69+ lm = sm .OLS (self .target_ , sm .add_constant (self .features_ )).fit ()
70+ return lm .aic
71+
72+ def bic (self ):
73+ """
74+ Returns BIC (Bayesian information criterion)
75+ """
76+ if not self .is_fitted :
77+ print ("Model not fitted yet!" )
78+ return None
79+ lm = sm .OLS (self .target_ , sm .add_constant (self .features_ )).fit ()
80+ return lm .bic
81+
82+ def print_metrics (self ):
83+ """Prints a report of the useful metrics for a given model object"""
6284 if not self .is_fitted :
6385 print ("Model not fitted yet!" )
6486 return None
@@ -68,19 +90,54 @@ def pretty_print_stats(self):
6890 ("mse:" , self .mse ()),
6991 ("r^2:" , self .r_squared ()),
7092 ("adj_r^2:" , self .adj_r_squared ()),
93+ ("AIC:" , self .aic ()),
94+ ("BIC:" , self .bic ()),
7195 )
7296 for item in items :
7397 print ("{0:8} {1:.4f}" .format (item [0 ], item [1 ]))
7498
99+ def summary_metrics (self ):
100+ """Returns a dictionary of the useful metrics"""
101+ if not self .is_fitted :
102+ print ("Model not fitted yet!" )
103+ return None
104+ metrics = {}
105+ items = (
106+ ("sse" , self .sse ()),
107+ ("sst" , self .sst ()),
108+ ("mse" , self .mse ()),
109+ ("r^2" , self .r_squared ()),
110+ ("adj_r^2:" , self .adj_r_squared ()),
111+ ("AIC:" , self .aic ()),
112+ ("BIC:" , self .bic ()),
113+ )
114+ for item in items :
115+ metrics [item [0 ]] = item [1 ]
116+ return metrics
117+
75118
76119class Inference :
77120 """
78- Inferential statistics: standard error, p-values, etc.
121+ Inferential statistics:
122+ standard error,
123+ p-values
124+ t-test statistics
125+ F-statistics and p-value of F-test
79126 """
80127
81128 def __init__ ():
82129 pass
83130
131+ def std_err (self ):
132+ """
133+ Returns standard error values of the features
134+ """
135+ if not self .is_fitted :
136+ print ("Model not fitted yet!" )
137+ return None
138+ lm = sm .OLS (self .target_ , sm .add_constant (self .features_ )).fit ()
139+ return lm .bse
140+
84141 def pvalues (self ):
85142 """
86143 Returns p-values of the features
@@ -90,7 +147,7 @@ def pvalues(self):
90147 return None
91148 lm = sm .OLS (self .target_ , sm .add_constant (self .features_ )).fit ()
92149 return lm .pvalues
93-
150+
94151 def tvalues (self ):
95152 """
96153 Returns t-test values of the features
@@ -100,16 +157,16 @@ def tvalues(self):
100157 return None
101158 lm = sm .OLS (self .target_ , sm .add_constant (self .features_ )).fit ()
102159 return lm .tvalues
103-
104- def std_err (self ):
160+
161+ def ftest (self ):
105162 """
106- Returns standard error values of the features
163+ Returns the F-statistic of the overall regression and corresponding p-value
107164 """
108165 if not self .is_fitted :
109166 print ("Model not fitted yet!" )
110167 return None
111168 lm = sm .OLS (self .target_ , sm .add_constant (self .features_ )).fit ()
112- return lm .bse
169+ return ( lm .fvalue , lm . f_pvalue )
113170
114171
115172class Diagnostics_plots :
@@ -364,7 +421,7 @@ class MyLinearRegression(
364421 def __init__ (self , fit_intercept = True ):
365422 self .coef_ = None
366423 self .intercept_ = None
367- self ._fit_intercept = fit_intercept
424+ self .fit_intercept_ = fit_intercept
368425 self .is_fitted = False
369426 self .features_ = None
370427 self .target_ = None
@@ -388,7 +445,7 @@ def ingest_data(self, X, y):
388445 self .features_ = X
389446 self .target_ = y
390447
391- def fit (self , X = None , y = None , _fit_intercept = True ):
448+ def fit (self , X = None , y = None , fit_intercept_ = True ):
392449 """
393450 Fit model coefficients.
394451 Arguments:
@@ -409,7 +466,118 @@ def fit(self, X=None, y=None, _fit_intercept=True):
409466 self .dfe_ = self .features_ .shape [0 ] - self .features_ .shape [1 ] - 1
410467
411468 # add bias if fit_intercept is True
412- if self ._fit_intercept :
469+ if self .fit_intercept_ :
470+ X_biased = np .c_ [np .ones (self .features_ .shape [0 ]), self .features_ ]
471+ else :
472+ X_biased = self .features_
473+ # Assign target_ to a local variable y
474+ y = self .target_
475+
476+ # closed form solution
477+ xTx = np .dot (X_biased .T , X_biased )
478+ inverse_xTx = np .linalg .inv (xTx )
479+ xTy = np .dot (X_biased .T , y )
480+ coef = np .dot (inverse_xTx , xTy )
481+
482+ # set attributes
483+ if self .fit_intercept_ :
484+ self .intercept_ = coef [0 ]
485+ self .coef_ = coef [1 :]
486+ else :
487+ self .intercept_ = 0
488+ self .coef_ = coef
489+
490+ # Predicted/fitted y
491+ self .fitted_ = np .dot (self .features_ , self .coef_ ) + self .intercept_
492+
493+ # Residuals
494+ residuals = self .target_ - self .fitted_
495+ self .resid_ = residuals
496+
497+ # Set is_fitted to True
498+ self .is_fitted = True
499+
500+ def fit (self , X = None , y = None , fit_intercept_ = True ):
501+ """
502+ Fits model coefficients.
503+
504+ Arguments:
505+ X: 1D or 2D numpy array
506+ y: 1D numpy array
507+ fit_intercept: Boolean, whether an intercept term will be included in the fit
508+ """
509+
510+ if X != None :
511+ if len (X .shape ) == 1 :
512+ X = X .reshape (- 1 , 1 )
513+ self .features_ = X
514+ if y != None :
515+ self .target_ = y
516+
517+ # degrees of freedom of population dependent variable variance
518+ self .dft_ = self .features_ .shape [0 ] - 1
519+ # degrees of freedom of population error variance
520+ self .dfe_ = self .features_ .shape [0 ] - self .features_ .shape [1 ] - 1
521+
522+ # add bias if fit_intercept is True
523+ if self .fit_intercept_ :
524+ X_biased = np .c_ [np .ones (self .features_ .shape [0 ]), self .features_ ]
525+ else :
526+ X_biased = self .features_
527+ # Assign target_ to a local variable y
528+ y = self .target_
529+
530+ # closed form solution
531+ xTx = np .dot (X_biased .T , X_biased )
532+ inverse_xTx = np .linalg .inv (xTx )
533+ xTy = np .dot (X_biased .T , y )
534+ coef = np .dot (inverse_xTx , xTy )
535+
536+ # set attributes
537+ if self .fit_intercept_ :
538+ self .intercept_ = coef [0 ]
539+ self .coef_ = coef [1 :]
540+ else :
541+ self .intercept_ = 0
542+ self .coef_ = coef
543+
544+ # Predicted/fitted y
545+ self .fitted_ = np .dot (self .features_ , self .coef_ ) + self .intercept_
546+
547+ # Residuals
548+ residuals = self .target_ - self .fitted_
549+ self .resid_ = residuals
550+
551+ # Set is_fitted to True
552+ self .is_fitted = True
553+
554+ def fit_dataframe (self , X , y , dataframe , fit_intercept_ = True ):
555+ """
556+ Fit model coefficients from a Pandas DataFrame.
557+
558+ Arguments:
559+ X: A list of columns of the dataframe acting as features. Must be only numerical.
560+ y: Name of the column of the dataframe acting as the target
561+ fit_intercept: Boolean, whether an intercept term will be included in the fit
562+ """
563+
564+ assert (
565+ type (X ) == list
566+ ), "X must be a list of the names of the numerical feature/predictor columns"
567+ assert (
568+ type (y ) == str
569+ ), "y must be a string - name of the column you want as target"
570+
571+ self .features_ = np .array (dataframe [X ])
572+ self .target_ = np .array (dataframe [y ])
573+
574+ # degrees of freedom of population dependent variable variance
575+ self .dft_ = self .features_ .shape [0 ] - 1
576+ # degrees of freedom of population error variance
577+ self .dfe_ = self .features_ .shape [0 ] - self .features_ .shape [1 ] - 1
578+
579+ # add bias if fit_intercept is True
580+ if self .fit_intercept_ :
413581 X_biased = np .c_ [np .ones (self .features_ .shape [0 ]), self .features_ ]
414582 else :
415583 X_biased = self .features_
@@ -423,7 +591,7 @@ def fit(self, X=None, y=None, _fit_intercept=True):
423591 coef = np .dot (inverse_xTx , xTy )
424592
425593 # set attributes
426- if self ._fit_intercept :
594+ if self .fit_intercept_ :
427595 self .intercept_ = coef [0 ]
428596 self .coef_ = coef [1 :]
429597 else :
0 commit comments