lizichen
diff --git a/‎ConceptExplain/Scipy_Linear_Regression.ipynb
Lines changed: 180 additions & 0 deletions b/‎ConceptExplain/Scipy_Linear_Regression.ipynb
Lines changed: 180 additions & 0 deletions
diff --git a/‎ConceptExplain/TensorFlow-Linear-Model-Sample.ipynb
Lines changed: 387 additions & 0 deletions b/‎ConceptExplain/TensorFlow-Linear-Model-Sample.ipynb
Lines changed: 387 additions & 0 deletions
@@ -0,0 +1,387 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "import tempfile\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import urllib.request"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('/var/folders/vk/hdq2y0l55v72c696m28nyymr0000gp/T/tmpzrz55s82',\n",
+       " <http.client.HTTPMessage at 0x119553358>)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_file = tempfile.NamedTemporaryFile()\n",
+    "test_file = tempfile.NamedTemporaryFile()\n",
+    "urllib.request.urlretrieve(\"http://mlr.cs.umass.edu/ml/machine-learning-databases/adult/adult.data\", train_file.name)\n",
+    "urllib.request.urlretrieve(\"http://mlr.cs.umass.edu/ml/machine-learning-databases/adult/adult.test\", test_file.name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "COLUMNS = [\"age\", \"workclass\", \"fnlwgt\", \"education\", \"education_num\",\n",
+    "           \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\",\n",
+    "           \"capital_gain\", \"capital_loss\", \"hours_per_week\", \"native_country\",\n",
+    "           \"income_bracket\"]\n",
+    "df_train = pd.read_csv(train_file, names=COLUMNS, skipinitialspace=True)\n",
+    "df_test = pd.read_csv(test_file, names=COLUMNS, skipinitialspace=True, skiprows=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "LABEL_COLUMN = \"label\"\n",
+    "df_train[LABEL_COLUMN] = (df_train[\"income_bracket\"].apply(lambda x: \">50K\" in x)).astype(int)\n",
+    "df_test[LABEL_COLUMN] = (df_test[\"income_bracket\"].apply(lambda x: \">50K\" in x)).astype(int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "CATEGORICAL_COLUMNS = [\"workclass\", \"education\", \"marital_status\", \"occupation\",\n",
+    "                       \"relationship\", \"race\", \"gender\", \"native_country\"]\n",
+    "CONTINUOUS_COLUMNS = [\"age\", \"education_num\", \"capital_gain\", \"capital_loss\", \"hours_per_week\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "      <th>workclass</th>\n",
+       "      <th>fnlwgt</th>\n",
+       "      <th>education</th>\n",
+       "      <th>education_num</th>\n",
+       "      <th>marital_status</th>\n",
+       "      <th>occupation</th>\n",
+       "      <th>relationship</th>\n",
+       "      <th>race</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>capital_gain</th>\n",
+       "      <th>capital_loss</th>\n",
+       "      <th>hours_per_week</th>\n",
+       "      <th>native_country</th>\n",
+       "      <th>income_bracket</th>\n",
+       "      <th>label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>39</td>\n",
+       "      <td>State-gov</td>\n",
+       "      <td>77516</td>\n",
+       "      <td>Bachelors</td>\n",
+       "      <td>13</td>\n",
+       "      <td>Never-married</td>\n",
+       "      <td>Adm-clerical</td>\n",
+       "      <td>Not-in-family</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>2174</td>\n",
+       "      <td>0</td>\n",
+       "      <td>40</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&lt;=50K</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>50</td>\n",
+       "      <td>Self-emp-not-inc</td>\n",
+       "      <td>83311</td>\n",
+       "      <td>Bachelors</td>\n",
+       "      <td>13</td>\n",
+       "      <td>Married-civ-spouse</td>\n",
+       "      <td>Exec-managerial</td>\n",
+       "      <td>Husband</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>13</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&lt;=50K</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>38</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>215646</td>\n",
+       "      <td>HS-grad</td>\n",
+       "      <td>9</td>\n",
+       "      <td>Divorced</td>\n",
+       "      <td>Handlers-cleaners</td>\n",
+       "      <td>Not-in-family</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>40</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&lt;=50K</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>53</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>234721</td>\n",
+       "      <td>11th</td>\n",
+       "      <td>7</td>\n",
+       "      <td>Married-civ-spouse</td>\n",
+       "      <td>Handlers-cleaners</td>\n",
+       "      <td>Husband</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>40</td>\n",
+       "      <td>United-States</td>\n",
+       "      <td>&lt;=50K</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>28</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>338409</td>\n",
+       "      <td>Bachelors</td>\n",
+       "      <td>13</td>\n",
+       "      <td>Married-civ-spouse</td>\n",
+       "      <td>Prof-specialty</td>\n",
+       "      <td>Wife</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>Female</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>40</td>\n",
+       "      <td>Cuba</td>\n",
+       "      <td>&lt;=50K</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   age         workclass  fnlwgt  education  education_num  \\\n",
+       "0   39         State-gov   77516  Bachelors             13   \n",
+       "1   50  Self-emp-not-inc   83311  Bachelors             13   \n",
+       "2   38           Private  215646    HS-grad              9   \n",
+       "3   53           Private  234721       11th              7   \n",
+       "4   28           Private  338409  Bachelors             13   \n",
+       "\n",
+       "       marital_status         occupation   relationship   race  gender  \\\n",
+       "0       Never-married       Adm-clerical  Not-in-family  White    Male   \n",
+       "1  Married-civ-spouse    Exec-managerial        Husband  White    Male   \n",
+       "2            Divorced  Handlers-cleaners  Not-in-family  White    Male   \n",
+       "3  Married-civ-spouse  Handlers-cleaners        Husband  Black    Male   \n",
+       "4  Married-civ-spouse     Prof-specialty           Wife  Black  Female   \n",
+       "\n",
+       "   capital_gain  capital_loss  hours_per_week native_country income_bracket  \\\n",
+       "0          2174             0              40  United-States          <=50K   \n",
+       "1             0             0              13  United-States          <=50K   \n",
+       "2             0             0              40  United-States          <=50K   \n",
+       "3             0             0              40  United-States          <=50K   \n",
+       "4             0             0              40           Cuba          <=50K   \n",
+       "\n",
+       "   label  \n",
+       "0      0  \n",
+       "1      0  \n",
+       "2      0  \n",
+       "3      0  \n",
+       "4      0  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_train.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "def input_fn(df):\n",
+    "  # Creates a dictionary mapping from each continuous feature column name (k) to\n",
+    "  # the values of that column stored in a constant Tensor.\n",
+    "  continuous_cols = {k: tf.constant(df[k].values)\n",
+    "                     for k in CONTINUOUS_COLUMNS}\n",
+    "  # Creates a dictionary mapping from each categorical feature column name (k)\n",
+    "  # to the values of that column stored in a tf.SparseTensor.\n",
+    "  categorical_cols = {k: tf.SparseTensor(\n",
+    "      indices=[[i, 0] for i in range(df[k].size)],\n",
+    "      values=df[k].values,\n",
+    "      dense_shape=[df[k].size, 1])\n",
+    "                      for k in CATEGORICAL_COLUMNS}\n",
+    "  # Merges the two dictionaries into one.\n",
+    "  feature_cols = dict(continuous_cols.items() + categorical_cols.items())\n",
+    "  # Converts the label column into a constant Tensor.\n",
+    "  label = tf.constant(df[LABEL_COLUMN].values)\n",
+    "  # Returns the feature columns and the label.\n",
+    "  return feature_cols, label\n",
+    "\n",
+    "def train_input_fn():\n",
+    "  return input_fn(df_train)\n",
+    "\n",
+    "def eval_input_fn():\n",
+    "  return input_fn(df_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "gender = tf.contrib.layers.sparse_column_with_keys(\n",
+    "  column_name=\"gender\", keys=[\"Female\", \"Male\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "education = tf.contrib.layers.sparse_column_with_hash_bucket(\"education\", hash_bucket_size=1000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "race = tf.contrib.layers.sparse_column_with_hash_bucket(\"race\", hash_bucket_size=100)\n",
+    "marital_status = tf.contrib.layers.sparse_column_with_hash_bucket(\"marital_status\", hash_bucket_size=100)\n",
+    "relationship = tf.contrib.layers.sparse_column_with_hash_bucket(\"relationship\", hash_bucket_size=100)\n",
+    "workclass = tf.contrib.layers.sparse_column_with_hash_bucket(\"workclass\", hash_bucket_size=100)\n",
+    "occupation = tf.contrib.layers.sparse_column_with_hash_bucket(\"occupation\", hash_bucket_size=1000)\n",
+    "native_country = tf.contrib.layers.sparse_column_with_hash_bucket(\"native_country\", hash_bucket_size=1000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "Python [conda env:snakes]",
+   "language": "python",
+   "name": "conda-env-snakes-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}