|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": 1, |
| 6 | + "metadata": {}, |
| 7 | + "outputs": [], |
| 8 | + "source": [ |
| 9 | + "import statsmodels.formula.api as sm\n", |
| 10 | + "import pandas as pd\n", |
| 11 | + "import numpy as np\n" |
| 12 | + ] |
| 13 | + }, |
| 14 | + { |
| 15 | + "cell_type": "code", |
| 16 | + "execution_count": 7, |
| 17 | + "metadata": {}, |
| 18 | + "outputs": [], |
| 19 | + "source": [ |
| 20 | + "# This data is from pandas/doc/data\n", |
| 21 | + "bb = pd.read_csv('data/baseball.csv', index_col='id')\n" |
| 22 | + ] |
| 23 | + }, |
| 24 | + { |
| 25 | + "cell_type": "code", |
| 26 | + "execution_count": 5, |
| 27 | + "metadata": {}, |
| 28 | + "outputs": [ |
| 29 | + { |
| 30 | + "data": { |
| 31 | + "text/html": [ |
| 32 | + "<table class=\"simpletable\">\n", |
| 33 | + "<caption>OLS Regression Results</caption>\n", |
| 34 | + "<tr>\n", |
| 35 | + " <th>Dep. Variable:</th> <td>hr</td> <th> R-squared: </th> <td> 0.685</td>\n", |
| 36 | + "</tr>\n", |
| 37 | + "<tr>\n", |
| 38 | + " <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.665</td>\n", |
| 39 | + "</tr>\n", |
| 40 | + "<tr>\n", |
| 41 | + " <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 34.28</td>\n", |
| 42 | + "</tr>\n", |
| 43 | + "<tr>\n", |
| 44 | + " <th>Date:</th> <td>Mon, 27 Apr 2020</td> <th> Prob (F-statistic):</th> <td>3.48e-15</td>\n", |
| 45 | + "</tr>\n", |
| 46 | + "<tr>\n", |
| 47 | + " <th>Time:</th> <td>09:31:30</td> <th> Log-Likelihood: </th> <td> -205.92</td>\n", |
| 48 | + "</tr>\n", |
| 49 | + "<tr>\n", |
| 50 | + " <th>No. Observations:</th> <td> 68</td> <th> AIC: </th> <td> 421.8</td>\n", |
| 51 | + "</tr>\n", |
| 52 | + "<tr>\n", |
| 53 | + " <th>Df Residuals:</th> <td> 63</td> <th> BIC: </th> <td> 432.9</td>\n", |
| 54 | + "</tr>\n", |
| 55 | + "<tr>\n", |
| 56 | + " <th>Df Model:</th> <td> 4</td> <th> </th> <td> </td> \n", |
| 57 | + "</tr>\n", |
| 58 | + "<tr>\n", |
| 59 | + " <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n", |
| 60 | + "</tr>\n", |
| 61 | + "</table>\n", |
| 62 | + "<table class=\"simpletable\">\n", |
| 63 | + "<tr>\n", |
| 64 | + " <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n", |
| 65 | + "</tr>\n", |
| 66 | + "<tr>\n", |
| 67 | + " <th>Intercept</th> <td>-8484.7720</td> <td> 4664.146</td> <td> -1.819</td> <td> 0.074</td> <td>-1.78e+04</td> <td> 835.780</td>\n", |
| 68 | + "</tr>\n", |
| 69 | + "<tr>\n", |
| 70 | + " <th>C(lg)[T.NL]</th> <td> -2.2736</td> <td> 1.325</td> <td> -1.716</td> <td> 0.091</td> <td> -4.922</td> <td> 0.375</td>\n", |
| 71 | + "</tr>\n", |
| 72 | + "<tr>\n", |
| 73 | + " <th>ln_h</th> <td> -1.3542</td> <td> 0.875</td> <td> -1.547</td> <td> 0.127</td> <td> -3.103</td> <td> 0.395</td>\n", |
| 74 | + "</tr>\n", |
| 75 | + "<tr>\n", |
| 76 | + " <th>year</th> <td> 4.2277</td> <td> 2.324</td> <td> 1.819</td> <td> 0.074</td> <td> -0.417</td> <td> 8.872</td>\n", |
| 77 | + "</tr>\n", |
| 78 | + "<tr>\n", |
| 79 | + " <th>g</th> <td> 0.1841</td> <td> 0.029</td> <td> 6.258</td> <td> 0.000</td> <td> 0.125</td> <td> 0.243</td>\n", |
| 80 | + "</tr>\n", |
| 81 | + "</table>\n", |
| 82 | + "<table class=\"simpletable\">\n", |
| 83 | + "<tr>\n", |
| 84 | + " <th>Omnibus:</th> <td>10.875</td> <th> Durbin-Watson: </th> <td> 1.999</td>\n", |
| 85 | + "</tr>\n", |
| 86 | + "<tr>\n", |
| 87 | + " <th>Prob(Omnibus):</th> <td> 0.004</td> <th> Jarque-Bera (JB): </th> <td> 17.298</td>\n", |
| 88 | + "</tr>\n", |
| 89 | + "<tr>\n", |
| 90 | + " <th>Skew:</th> <td> 0.537</td> <th> Prob(JB): </th> <td>0.000175</td>\n", |
| 91 | + "</tr>\n", |
| 92 | + "<tr>\n", |
| 93 | + " <th>Kurtosis:</th> <td> 5.225</td> <th> Cond. No. </th> <td>1.49e+07</td>\n", |
| 94 | + "</tr>\n", |
| 95 | + "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 1.49e+07. This might indicate that there are<br/>strong multicollinearity or other numerical problems." |
| 96 | + ], |
| 97 | + "text/plain": [ |
| 98 | + "<class 'statsmodels.iolib.summary.Summary'>\n", |
| 99 | + "\"\"\"\n", |
| 100 | + " OLS Regression Results \n", |
| 101 | + "==============================================================================\n", |
| 102 | + "Dep. Variable: hr R-squared: 0.685\n", |
| 103 | + "Model: OLS Adj. R-squared: 0.665\n", |
| 104 | + "Method: Least Squares F-statistic: 34.28\n", |
| 105 | + "Date: Mon, 27 Apr 2020 Prob (F-statistic): 3.48e-15\n", |
| 106 | + "Time: 09:31:30 Log-Likelihood: -205.92\n", |
| 107 | + "No. Observations: 68 AIC: 421.8\n", |
| 108 | + "Df Residuals: 63 BIC: 432.9\n", |
| 109 | + "Df Model: 4 \n", |
| 110 | + "Covariance Type: nonrobust \n", |
| 111 | + "===============================================================================\n", |
| 112 | + " coef std err t P>|t| [0.025 0.975]\n", |
| 113 | + "-------------------------------------------------------------------------------\n", |
| 114 | + "Intercept -8484.7720 4664.146 -1.819 0.074 -1.78e+04 835.780\n", |
| 115 | + "C(lg)[T.NL] -2.2736 1.325 -1.716 0.091 -4.922 0.375\n", |
| 116 | + "ln_h -1.3542 0.875 -1.547 0.127 -3.103 0.395\n", |
| 117 | + "year 4.2277 2.324 1.819 0.074 -0.417 8.872\n", |
| 118 | + "g 0.1841 0.029 6.258 0.000 0.125 0.243\n", |
| 119 | + "==============================================================================\n", |
| 120 | + "Omnibus: 10.875 Durbin-Watson: 1.999\n", |
| 121 | + "Prob(Omnibus): 0.004 Jarque-Bera (JB): 17.298\n", |
| 122 | + "Skew: 0.537 Prob(JB): 0.000175\n", |
| 123 | + "Kurtosis: 5.225 Cond. No. 1.49e+07\n", |
| 124 | + "==============================================================================\n", |
| 125 | + "\n", |
| 126 | + "Warnings:\n", |
| 127 | + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", |
| 128 | + "[2] The condition number is large, 1.49e+07. This might indicate that there are\n", |
| 129 | + "strong multicollinearity or other numerical problems.\n", |
| 130 | + "\"\"\"" |
| 131 | + ] |
| 132 | + }, |
| 133 | + "execution_count": 5, |
| 134 | + "metadata": {}, |
| 135 | + "output_type": "execute_result" |
| 136 | + } |
| 137 | + ], |
| 138 | + "source": [ |
| 139 | + "bb\n", |
| 140 | + "(bb.query('h > 0')\n", |
| 141 | + ".assign(ln_h=lambda df: np.log(df.h))\n", |
| 142 | + ".pipe((sm.ols, 'data'), 'hr ~ ln_h + year + g + C(lg)')\n", |
| 143 | + ".fit()\n", |
| 144 | + ".summary()\n", |
| 145 | + ")" |
| 146 | + ] |
| 147 | + }, |
| 148 | + { |
| 149 | + "cell_type": "code", |
| 150 | + "execution_count": null, |
| 151 | + "metadata": {}, |
| 152 | + "outputs": [], |
| 153 | + "source": [] |
| 154 | + } |
| 155 | + ], |
| 156 | + "metadata": { |
| 157 | + "kernelspec": { |
| 158 | + "display_name": "Python 3", |
| 159 | + "language": "python", |
| 160 | + "name": "python3" |
| 161 | + }, |
| 162 | + "language_info": { |
| 163 | + "codemirror_mode": { |
| 164 | + "name": "ipython", |
| 165 | + "version": 3 |
| 166 | + }, |
| 167 | + "file_extension": ".py", |
| 168 | + "mimetype": "text/x-python", |
| 169 | + "name": "python", |
| 170 | + "nbconvert_exporter": "python", |
| 171 | + "pygments_lexer": "ipython3", |
| 172 | + "version": "3.7.7" |
| 173 | + } |
| 174 | + }, |
| 175 | + "nbformat": 4, |
| 176 | + "nbformat_minor": 4 |
| 177 | +} |
0 commit comments