Q-Learning docstring improvements.

anuzis · anuzis · commit 1abaae41f6bf · 2019-04-02T17:15:57.000-04:00
diff --git a/TD/Q-Learning Solution.ipynb b/TD/Q-Learning Solution.ipynb
@@ -50,7 +50,7 @@
     "    Args:\n",
     "        Q: A dictionary that maps from state -> action-values.\n",
     "            Each value is a numpy array of length nA (see below)\n",
-    "        epsilon: The probability to select a random action . float between 0 and 1.\n",
+    "        epsilon: The probability to select a random action. Float between 0 and 1.\n",
     "        nA: Number of actions in the environment.\n",
     "    \n",
     "    Returns:\n",
@@ -82,7 +82,7 @@
     "        num_episodes: Number of episodes to run for.\n",
     "        discount_factor: Gamma discount factor.\n",
     "        alpha: TD learning rate.\n",
-    "        epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
+    "        epsilon: Chance to sample a random action. Float between 0 and 1.\n",
     "    \n",
     "    Returns:\n",
     "        A tuple (Q, episode_lengths).\n",
diff --git a/TD/Q-Learning.ipynb b/TD/Q-Learning.ipynb
@@ -49,7 +49,7 @@
     "    Args:\n",
     "        Q: A dictionary that maps from state -> action-values.\n",
     "            Each value is a numpy array of length nA (see below)\n",
-    "        epsilon: The probability to select a random action . float between 0 and 1.\n",
+    "        epsilon: The probability to select a random action. Float between 0 and 1.\n",
     "        nA: Number of actions in the environment.\n",
     "    \n",
     "    Returns:\n",
@@ -81,7 +81,7 @@
     "        num_episodes: Number of episodes to run for.\n",
     "        discount_factor: Gamma discount factor.\n",
     "        alpha: TD learning rate.\n",
-    "        epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
+    "        epsilon: Chance to sample a random action. Float between 0 and 1.\n",
     "    \n",
     "    Returns:\n",
     "        A tuple (Q, episode_lengths).\n",