Added Basball Score initial

jsutch · jsutch · commit 0581d5f08ca2 · 2020-04-27T13:52:04.000-07:00
diff --git a/Analyzing Baseball Scores - v2.ipynb b/Analyzing Baseball Scores - v2.ipynb
@@ -0,0 +1,177 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import statsmodels.formula.api as sm\n",
+    "import pandas as pd\n",
+    "import numpy as np\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This data is from pandas/doc/data\n",
+    "bb = pd.read_csv('data/baseball.csv', index_col='id')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table class=\"simpletable\">\n",
+       "<caption>OLS Regression Results</caption>\n",
+       "<tr>\n",
+       "  <th>Dep. Variable:</th>           <td>hr</td>        <th>  R-squared:         </th> <td>   0.685</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.665</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   34.28</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Date:</th>             <td>Mon, 27 Apr 2020</td> <th>  Prob (F-statistic):</th> <td>3.48e-15</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Time:</th>                 <td>09:31:30</td>     <th>  Log-Likelihood:    </th> <td> -205.92</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>No. Observations:</th>      <td>    68</td>      <th>  AIC:               </th> <td>   421.8</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Df Residuals:</th>          <td>    63</td>      <th>  BIC:               </th> <td>   432.9</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Df Model:</th>              <td>     4</td>      <th>                     </th>     <td> </td>   \n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
+       "</tr>\n",
+       "</table>\n",
+       "<table class=\"simpletable\">\n",
+       "<tr>\n",
+       "       <td></td>          <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Intercept</th>   <td>-8484.7720</td> <td> 4664.146</td> <td>   -1.819</td> <td> 0.074</td> <td>-1.78e+04</td> <td>  835.780</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>C(lg)[T.NL]</th> <td>   -2.2736</td> <td>    1.325</td> <td>   -1.716</td> <td> 0.091</td> <td>   -4.922</td> <td>    0.375</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>ln_h</th>        <td>   -1.3542</td> <td>    0.875</td> <td>   -1.547</td> <td> 0.127</td> <td>   -3.103</td> <td>    0.395</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>year</th>        <td>    4.2277</td> <td>    2.324</td> <td>    1.819</td> <td> 0.074</td> <td>   -0.417</td> <td>    8.872</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>g</th>           <td>    0.1841</td> <td>    0.029</td> <td>    6.258</td> <td> 0.000</td> <td>    0.125</td> <td>    0.243</td>\n",
+       "</tr>\n",
+       "</table>\n",
+       "<table class=\"simpletable\">\n",
+       "<tr>\n",
+       "  <th>Omnibus:</th>       <td>10.875</td> <th>  Durbin-Watson:     </th> <td>   1.999</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Prob(Omnibus):</th> <td> 0.004</td> <th>  Jarque-Bera (JB):  </th> <td>  17.298</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Skew:</th>          <td> 0.537</td> <th>  Prob(JB):          </th> <td>0.000175</td>\n",
+       "</tr>\n",
+       "<tr>\n",
+       "  <th>Kurtosis:</th>      <td> 5.225</td> <th>  Cond. No.          </th> <td>1.49e+07</td>\n",
+       "</tr>\n",
+       "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 1.49e+07. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
+      ],
+      "text/plain": [
+       "<class 'statsmodels.iolib.summary.Summary'>\n",
+       "\"\"\"\n",
+       "                            OLS Regression Results                            \n",
+       "==============================================================================\n",
+       "Dep. Variable:                     hr   R-squared:                       0.685\n",
+       "Model:                            OLS   Adj. R-squared:                  0.665\n",
+       "Method:                 Least Squares   F-statistic:                     34.28\n",
+       "Date:                Mon, 27 Apr 2020   Prob (F-statistic):           3.48e-15\n",
+       "Time:                        09:31:30   Log-Likelihood:                -205.92\n",
+       "No. Observations:                  68   AIC:                             421.8\n",
+       "Df Residuals:                      63   BIC:                             432.9\n",
+       "Df Model:                           4                                         \n",
+       "Covariance Type:            nonrobust                                         \n",
+       "===============================================================================\n",
+       "                  coef    std err          t      P>|t|      [0.025      0.975]\n",
+       "-------------------------------------------------------------------------------\n",
+       "Intercept   -8484.7720   4664.146     -1.819      0.074   -1.78e+04     835.780\n",
+       "C(lg)[T.NL]    -2.2736      1.325     -1.716      0.091      -4.922       0.375\n",
+       "ln_h           -1.3542      0.875     -1.547      0.127      -3.103       0.395\n",
+       "year            4.2277      2.324      1.819      0.074      -0.417       8.872\n",
+       "g               0.1841      0.029      6.258      0.000       0.125       0.243\n",
+       "==============================================================================\n",
+       "Omnibus:                       10.875   Durbin-Watson:                   1.999\n",
+       "Prob(Omnibus):                  0.004   Jarque-Bera (JB):               17.298\n",
+       "Skew:                           0.537   Prob(JB):                     0.000175\n",
+       "Kurtosis:                       5.225   Cond. No.                     1.49e+07\n",
+       "==============================================================================\n",
+       "\n",
+       "Warnings:\n",
+       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
+       "[2] The condition number is large, 1.49e+07. This might indicate that there are\n",
+       "strong multicollinearity or other numerical problems.\n",
+       "\"\"\""
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bb\n",
+    "(bb.query('h > 0')\n",
+    ".assign(ln_h=lambda df: np.log(df.h))\n",
+    ".pipe((sm.ols, 'data'), 'hr ~ ln_h + year + g + C(lg)')\n",
+    ".fit()\n",
+    ".summary()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}