update notebooks

jlm429 · jlm429 · commit 5fa5b179745b · 2024-01-10T04:09:34.000-05:00
diff --git a/notebooks/blackjack.ipynb b/notebooks/blackjack.ipynb
@@ -25,22 +25,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 1,
    "id": "900376a8-792f-41bc-988c-2bc23ff2d7d4",
    "metadata": {},
    "outputs": [],
    "source": [
     "import gymnasium as gym\n",
-    "from utils.blackjack_wrapper import BlackjackWrapper\n",
-    "from utils.test_env import TestEnv\n",
-    "from algorithms.planner import Planner\n",
-    "from algorithms.rl import RL\n",
+    "from bettermdptools.utils.blackjack_wrapper import BlackjackWrapper\n",
+    "from bettermdptools.utils.test_env import TestEnv\n",
+    "from bettermdptools.algorithms.planner import Planner\n",
+    "from bettermdptools.algorithms.rl import RL\n",
     "import numpy as np"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 2,
    "id": "265aae24-a1b3-4400-8bcc-8da16e9a4612",
    "metadata": {},
    "outputs": [
@@ -49,22 +49,15 @@
      "output_type": "stream",
      "text": [
       "runtime = 0.03 seconds\n",
-      "0.2\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                      "
+      "-0.03\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "runtime = 1.34 seconds\n",
-      "0.01\n"
+      "runtime = 1.30 seconds\n",
+      "0.1\n"
      ]
     },
     {
diff --git a/notebooks/frozen_lake.ipynb b/notebooks/frozen_lake.ipynb
@@ -28,8 +28,8 @@
    "outputs": [],
    "source": [
     "import gymnasium as gym\n",
-    "from algorithms.planner import Planner\n",
-    "from utils.plots import Plots"
+    "from bettermdptools.algorithms.planner import Planner\n",
+    "from bettermdptools.utils.plots import Plots"
    ]
   },
   {
@@ -42,7 +42,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "runtime = 0.51 seconds\n"
+      "runtime = 0.42 seconds\n"
      ]
     },
     {
diff --git a/notebooks/other_utilities.ipynb b/notebooks/other_utilities.ipynb
@@ -20,22 +20,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 1,
    "id": "4815068f-bb52-47fb-9d35-3a340663c758",
    "metadata": {},
    "outputs": [],
    "source": [
     "import gymnasium as gym\n",
-    "from utils.test_env import TestEnv\n",
-    "from utils.grid_search import GridSearch\n",
-    "from utils.blackjack_wrapper import BlackjackWrapper\n",
-    "from algorithms.rl import RL\n",
-    "from algorithms.planner import Planner"
+    "from bettermdptools.utils.test_env import TestEnv\n",
+    "from bettermdptools.utils.grid_search import GridSearch\n",
+    "from bettermdptools.utils.blackjack_wrapper import BlackjackWrapper\n",
+    "from bettermdptools.algorithms.rl import RL\n",
+    "from bettermdptools.algorithms.planner import Planner"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "id": "448dc393-ba4f-412d-ac89-6c71a22cfeaf",
    "metadata": {},
    "outputs": [
@@ -50,51 +50,30 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "runtime = 0.67 seconds\n",
+      "runtime = 0.64 seconds\n",
       "Avg. episode reward:  0.0\n",
       "###################\n",
       "running q_learning with gamma: 0.99 epsilon decay: 0.9  iterations: 5000\n"
      ]
     },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                     \r"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "runtime = 6.56 seconds\n",
+      "runtime = 6.49 seconds\n",
       "Avg. episode reward:  0.0\n",
       "###################\n",
       "running q_learning with gamma: 0.99 epsilon decay: 0.9  iterations: 50000\n"
      ]
     },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                       "
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "runtime = 53.30 seconds\n",
-      "Avg. episode reward:  0.83\n",
+      "runtime = 54.57 seconds\n",
+      "Avg. episode reward:  0.88\n",
       "###################\n"
      ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
     }
    ],
    "source": [
@@ -107,7 +86,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
    "id": "60161c93-6183-4de6-a937-7c12b9742fb7",
    "metadata": {},
    "outputs": [
@@ -116,28 +95,28 @@
      "output_type": "stream",
      "text": [
       "running VI with gamma: 0.7  n_iters: 500  theta: 0.001\n",
-      "runtime = 0.02 seconds\n",
-      "Avg. episode reward:  0.01\n",
+      "runtime = 0.01 seconds\n",
+      "Avg. episode reward:  -0.16\n",
       "###################\n",
       "running VI with gamma: 0.7  n_iters: 500  theta: 1e-05\n",
       "runtime = 0.02 seconds\n",
-      "Avg. episode reward:  -0.07\n",
+      "Avg. episode reward:  0.18\n",
       "###################\n",
       "running VI with gamma: 0.9  n_iters: 500  theta: 0.001\n",
       "runtime = 0.01 seconds\n",
-      "Avg. episode reward:  -0.16\n",
+      "Avg. episode reward:  -0.06\n",
       "###################\n",
       "running VI with gamma: 0.9  n_iters: 500  theta: 1e-05\n",
-      "runtime = 0.02 seconds\n",
-      "Avg. episode reward:  0.01\n",
+      "runtime = 0.01 seconds\n",
+      "Avg. episode reward:  -0.1\n",
       "###################\n",
       "running VI with gamma: 0.99  n_iters: 500  theta: 0.001\n",
       "runtime = 0.01 seconds\n",
-      "Avg. episode reward:  0.13\n",
+      "Avg. episode reward:  -0.1\n",
       "###################\n",
       "running VI with gamma: 0.99  n_iters: 500  theta: 1e-05\n",
       "runtime = 0.01 seconds\n",
-      "Avg. episode reward:  0.07\n",
+      "Avg. episode reward:  -0.04\n",
       "###################\n"
      ]
     }
@@ -164,20 +143,20 @@
    "id": "c329484d-db1b-48fe-b7a0-b035ad5356bb",
    "metadata": {},
    "source": [
-    "RL algorithms SARSA and Q-learning have callback hooks for episode number, begin, end, and env. step.   To create a callback, override one of the callback functions in the child class MyCallbacks.  Or, you can use the add_to decorator and define the override outside of the class definition. For example, print the episode number every 1000 episodes."
+    "RL algorithms SARSA and Q-learning have callback hooks for episode number, begin, end, and env. step.   To create a callback, override one of the callback functions in the child class MyCallbacks.  Or, you can use the add_to decorator to define the override outside of the class definition. For example, print the episode number every 1000 episodes."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 4,
    "id": "787ce4d3-e2b6-459f-94a6-ae4201fa091f",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 16%|█▌        | 1580/10000 [00:00<00:01, 7893.83it/s]"
+      " 17%|█▋        | 1661/10000 [00:00<00:01, 8314.69it/s]"
      ]
     },
     {
@@ -192,7 +171,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 32%|███▏      | 3177/10000 [00:00<00:00, 7868.57it/s]"
+      " 33%|███▎      | 3302/10000 [00:00<00:00, 8044.59it/s]"
      ]
     },
     {
@@ -207,7 +186,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 55%|█████▌    | 5530/10000 [00:00<00:00, 7772.50it/s]"
+      " 49%|████▉     | 4911/10000 [00:00<00:00, 7923.74it/s]"
      ]
     },
     {
@@ -222,7 +201,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 71%|███████   | 7053/10000 [00:00<00:00, 7329.64it/s]"
+      " 73%|███████▎  | 7295/10000 [00:00<00:00, 7878.84it/s]"
      ]
     },
     {
@@ -237,7 +216,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 93%|█████████▎| 9256/10000 [00:01<00:00, 7189.14it/s]"
+      " 88%|████████▊ | 8843/10000 [00:01<00:00, 7328.18it/s]"
      ]
     },
     {
@@ -259,7 +238,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "runtime = 1.35 seconds\n"
+      "runtime = 1.31 seconds\n"
      ]
     },
     {
@@ -271,10 +250,10 @@
     }
    ],
    "source": [
-    "from utils.decorators import add_to\n",
-    "from utils.callbacks import MyCallbacks\n",
-    "from algorithms.rl import RL\n",
-    "from utils.blackjack_wrapper import BlackjackWrapper\n",
+    "from bettermdptools.utils.decorators import add_to\n",
+    "from bettermdptools.utils.callbacks import MyCallbacks\n",
+    "from bettermdptools.algorithms.rl import RL\n",
+    "from bettermdptools.utils.blackjack_wrapper import BlackjackWrapper\n",
     "\n",
     "base_env = gym.make('Blackjack-v1', render_mode=None)\n",
     "blackjack = BlackjackWrapper(base_env)\n",
diff --git a/notebooks/plots.ipynb b/notebooks/plots.ipynb
@@ -26,11 +26,11 @@
    "outputs": [],
    "source": [
     "import gymnasium as gym\n",
-    "from utils.test_env import TestEnv\n",
-    "from algorithms.rl import RL\n",
-    "from algorithms.planner import Planner\n",
-    "from utils.plots import Plots\n",
-    "from utils.blackjack_wrapper import BlackjackWrapper\n",
+    "from bettermdptools.utils.test_env import TestEnv\n",
+    "from bettermdptools.algorithms.rl import RL\n",
+    "from bettermdptools.algorithms.planner import Planner\n",
+    "from bettermdptools.utils.plots import Plots\n",
+    "from bettermdptools.utils.blackjack_wrapper import BlackjackWrapper\n",
     "import numpy as np\n",
     "import seaborn as sns\n",
     "import matplotlib.pyplot as plt"
@@ -54,7 +54,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "runtime = 0.69 seconds\n"
+      "runtime = 0.57 seconds\n"
      ]
     },
     {
@@ -117,7 +117,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "id": "5a1642ef-4a93-4d08-96f7-d62b58a63e58",
    "metadata": {},
    "outputs": [
@@ -150,7 +150,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 5,
    "id": "8b7c01ed-cb3e-45d5-9f88-c8733ae3c6f0",
    "metadata": {},
    "outputs": [
@@ -197,12 +197,12 @@
    "id": "31f641c6-d465-4c49-a7bc-770a0aaebe12",
    "metadata": {},
    "source": [
-    "### Using the add_to decorator to make changes on the fly. "
+    "### Customized policy map using the add_to decorator. "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 7,
    "id": "e4c8ae11",
    "metadata": {},
    "outputs": [
@@ -218,7 +218,7 @@
     }
    ],
    "source": [
-    "from utils.decorators import add_to\n",
+    "from bettermdptools.utils.decorators import add_to\n",
     "\n",
     "@add_to(Plots)\n",
     "@staticmethod\n",
diff --git a/notebooks/taxi.ipynb b/notebooks/taxi.ipynb
@@ -28,9 +28,9 @@
    "outputs": [],
    "source": [
     "import gymnasium as gym\n",
-    "from algorithms.planner import Planner\n",
-    "from utils.plots import Plots\n",
-    "from utils.test_env import TestEnv"
+    "from bettermdptools.algorithms.planner import Planner\n",
+    "from bettermdptools.utils.plots import Plots\n",
+    "from bettermdptools.utils.test_env import TestEnv"
    ]
   },
   {
@@ -44,7 +44,14 @@
      "output_type": "stream",
      "text": [
       "runtime = 0.04 seconds\n",
-      "[ 5. 10.  6.  4. 11.  9.  8.  6.  8.  6.]\n"
+      "[12.  8. 11.  8.  8.  6.  8.  7. 10.  8.]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ""
      ]
     }
    ],

Original file line number	Diff line number	Diff line change
`@@ -28,8 +28,8 @@`
`28`	`28`	`"outputs": [],`
`29`	`29`	`"source": [`
`30`	`30`	`"import gymnasium as gym\n",`
`31`		`- "from algorithms.planner import Planner\n",`
`32`		`- "from utils.plots import Plots"`
	`31`	`+ "from bettermdptools.algorithms.planner import Planner\n",`
	`32`	`+ "from bettermdptools.utils.plots import Plots"`
`33`	`33`	`]`
`34`	`34`	`},`
`35`	`35`	`{`
`@@ -42,7 +42,7 @@`
`42`	`42`	`"name": "stdout",`
`43`	`43`	`"output_type": "stream",`
`44`	`44`	`"text": [`
`45`		`- "runtime = 0.51 seconds\n"`
	`45`	`+ "runtime = 0.42 seconds\n"`
`46`	`46`	`]`
`47`	`47`	`},`
`48`	`48`	`{`