Skip to content

Commit 60013e5

Browse files
author
Alex
committed
Sync function descriptions. Lambda -> gamma (discount factor). Added description of env.nS and env.nA
1 parent 094ebf7 commit 60013e5

6 files changed

+90
-126
lines changed

DP/Policy Evaluation Solution.ipynb

+14-20
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
{
44
"cell_type": "code",
55
"execution_count": 53,
6-
"metadata": {
7-
"collapsed": false
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
119
"import numpy as np\n",
@@ -43,9 +41,11 @@
4341
" Args:\n",
4442
" policy: [S, A] shaped matrix representing the policy.\n",
4543
" env: OpenAI env. env.P represents the transition probabilities of the environment.\n",
46-
" env.P[s][a] is a (prob, next_state, reward, done) tuple.\n",
44+
" env.P[s][a] is a list of transition tuples (prob, next_state, reward, done).\n",
45+
" env.nS is a number of available states. \n",
46+
" env.nA is a number of available actions.\n",
4747
" theta: We stop evaluation once our value function change is less than theta for all states.\n",
48-
" discount_factor: lambda discount factor.\n",
48+
" discount_factor: gamma discount factor.\n",
4949
" \n",
5050
" Returns:\n",
5151
" Vector of length env.nS representing the value function.\n",
@@ -75,9 +75,7 @@
7575
{
7676
"cell_type": "code",
7777
"execution_count": 56,
78-
"metadata": {
79-
"collapsed": false
80-
},
78+
"metadata": {},
8179
"outputs": [],
8280
"source": [
8381
"random_policy = np.ones([env.nS, env.nA]) / env.nA\n",
@@ -87,9 +85,7 @@
8785
{
8886
"cell_type": "code",
8987
"execution_count": 57,
90-
"metadata": {
91-
"collapsed": false
92-
},
88+
"metadata": {},
9389
"outputs": [
9490
{
9591
"name": "stdout",
@@ -122,9 +118,7 @@
122118
{
123119
"cell_type": "code",
124120
"execution_count": 51,
125-
"metadata": {
126-
"collapsed": false
127-
},
121+
"metadata": {},
128122
"outputs": [],
129123
"source": [
130124
"# Test: Make sure the evaluated policy is what we expected\n",
@@ -144,23 +138,23 @@
144138
],
145139
"metadata": {
146140
"kernelspec": {
147-
"display_name": "Python 3",
141+
"display_name": "Python 2",
148142
"language": "python",
149-
"name": "python3"
143+
"name": "python2"
150144
},
151145
"language_info": {
152146
"codemirror_mode": {
153147
"name": "ipython",
154-
"version": 3
148+
"version": 2
155149
},
156150
"file_extension": ".py",
157151
"mimetype": "text/x-python",
158152
"name": "python",
159153
"nbconvert_exporter": "python",
160-
"pygments_lexer": "ipython3",
161-
"version": "3.5.1"
154+
"pygments_lexer": "ipython2",
155+
"version": "2.7.12"
162156
}
163157
},
164158
"nbformat": 4,
165-
"nbformat_minor": 0
159+
"nbformat_minor": 1
166160
}

DP/Policy Evaluation.ipynb

+12-18
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
{
44
"cell_type": "code",
55
"execution_count": 23,
6-
"metadata": {
7-
"collapsed": false
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
119
"import numpy as np\n",
@@ -29,9 +27,7 @@
2927
{
3028
"cell_type": "code",
3129
"execution_count": 25,
32-
"metadata": {
33-
"collapsed": false
34-
},
30+
"metadata": {},
3531
"outputs": [],
3632
"source": [
3733
"def policy_eval(policy, env, discount_factor=1.0, theta=0.00001):\n",
@@ -42,6 +38,8 @@
4238
" policy: [S, A] shaped matrix representing the policy.\n",
4339
" env: OpenAI env. env.P represents the transition probabilities of the environment.\n",
4440
" env.P[s][a] is a list of transition tuples (prob, next_state, reward, done).\n",
41+
" env.nS is a number of available states. \n",
42+
" env.nA is a number of available actions.\n",
4543
" theta: We stop evaluation once our value function change is less than theta for all states.\n",
4644
" discount_factor: gamma discount factor.\n",
4745
" \n",
@@ -59,9 +57,7 @@
5957
{
6058
"cell_type": "code",
6159
"execution_count": 26,
62-
"metadata": {
63-
"collapsed": false
64-
},
60+
"metadata": {},
6561
"outputs": [],
6662
"source": [
6763
"random_policy = np.ones([env.nS, env.nA]) / env.nA\n",
@@ -71,9 +67,7 @@
7167
{
7268
"cell_type": "code",
7369
"execution_count": 22,
74-
"metadata": {
75-
"collapsed": false
76-
},
70+
"metadata": {},
7771
"outputs": [
7872
{
7973
"ename": "AssertionError",
@@ -107,23 +101,23 @@
107101
],
108102
"metadata": {
109103
"kernelspec": {
110-
"display_name": "Python 3",
104+
"display_name": "Python 2",
111105
"language": "python",
112-
"name": "python3"
106+
"name": "python2"
113107
},
114108
"language_info": {
115109
"codemirror_mode": {
116110
"name": "ipython",
117-
"version": 3
111+
"version": 2
118112
},
119113
"file_extension": ".py",
120114
"mimetype": "text/x-python",
121115
"name": "python",
122116
"nbconvert_exporter": "python",
123-
"pygments_lexer": "ipython3",
124-
"version": "3.5.1"
117+
"pygments_lexer": "ipython2",
118+
"version": "2.7.12"
125119
}
126120
},
127121
"nbformat": 4,
128-
"nbformat_minor": 0
122+
"nbformat_minor": 1
129123
}

DP/Policy Iteration Solution.ipynb

+15-19
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
{
44
"cell_type": "code",
55
"execution_count": 1,
6-
"metadata": {
7-
"collapsed": false
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
119
"import numpy as np\n",
@@ -45,9 +43,11 @@
4543
" Args:\n",
4644
" policy: [S, A] shaped matrix representing the policy.\n",
4745
" env: OpenAI env. env.P represents the transition probabilities of the environment.\n",
48-
" env.P[s][a] is a (prob, next_state, reward, done) tuple.\n",
49-
" theta: We stop evaluation one our value function change is less than theta for all states.\n",
50-
" discount_factor: lambda discount factor.\n",
46+
" env.P[s][a] is a list of transition tuples (prob, next_state, reward, done).\n",
47+
" env.nS is a number of available states. \n",
48+
" env.nA is a number of available actions.\n",
49+
" theta: We stop evaluation once our value function change is less than theta for all states.\n",
50+
" discount_factor: gamma discount factor.\n",
5151
" \n",
5252
" Returns:\n",
5353
" Vector of length env.nS representing the value function.\n",
@@ -91,7 +91,7 @@
9191
" env: The OpenAI envrionment.\n",
9292
" policy_eval_fn: Policy Evaluation function that takes 3 arguments:\n",
9393
" policy, env, discount_factor.\n",
94-
" discount_factor: Lambda discount factor.\n",
94+
" discount_factor: gamma discount factor.\n",
9595
" \n",
9696
" Returns:\n",
9797
" A tuple (policy, V). \n",
@@ -136,9 +136,7 @@
136136
{
137137
"cell_type": "code",
138138
"execution_count": 64,
139-
"metadata": {
140-
"collapsed": false
141-
},
139+
"metadata": {},
142140
"outputs": [
143141
{
144142
"name": "stdout",
@@ -203,9 +201,7 @@
203201
{
204202
"cell_type": "code",
205203
"execution_count": 59,
206-
"metadata": {
207-
"collapsed": false
208-
},
204+
"metadata": {},
209205
"outputs": [],
210206
"source": [
211207
"# Test the value function\n",
@@ -225,23 +221,23 @@
225221
],
226222
"metadata": {
227223
"kernelspec": {
228-
"display_name": "Python 3",
224+
"display_name": "Python 2",
229225
"language": "python",
230-
"name": "python3"
226+
"name": "python2"
231227
},
232228
"language_info": {
233229
"codemirror_mode": {
234230
"name": "ipython",
235-
"version": 3
231+
"version": 2
236232
},
237233
"file_extension": ".py",
238234
"mimetype": "text/x-python",
239235
"name": "python",
240236
"nbconvert_exporter": "python",
241-
"pygments_lexer": "ipython3",
242-
"version": "3.5.1"
237+
"pygments_lexer": "ipython2",
238+
"version": "2.7.12"
243239
}
244240
},
245241
"nbformat": 4,
246-
"nbformat_minor": 0
242+
"nbformat_minor": 1
247243
}

DP/Policy Iteration.ipynb

+16-22
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
{
44
"cell_type": "code",
55
"execution_count": 5,
6-
"metadata": {
7-
"collapsed": false
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
119
"import numpy as np\n",
@@ -45,9 +43,11 @@
4543
" Args:\n",
4644
" policy: [S, A] shaped matrix representing the policy.\n",
4745
" env: OpenAI env. env.P represents the transition probabilities of the environment.\n",
48-
" env.P[s][a] is a (prob, next_state, reward, done) tuple.\n",
49-
" theta: We stop evaluation one our value function change is less than theta for all states.\n",
50-
" discount_factor: lambda discount factor.\n",
46+
" env.P[s][a] is a list of transition tuples (prob, next_state, reward, done).\n",
47+
" env.nS is a number of available states. \n",
48+
" env.nA is a number of available actions.\n",
49+
" theta: We stop evaluation once our value function change is less than theta for all states.\n",
50+
" discount_factor: gamma discount factor.\n",
5151
" \n",
5252
" Returns:\n",
5353
" Vector of length env.nS representing the value function.\n",
@@ -77,9 +77,7 @@
7777
{
7878
"cell_type": "code",
7979
"execution_count": 13,
80-
"metadata": {
81-
"collapsed": false
82-
},
80+
"metadata": {},
8381
"outputs": [],
8482
"source": [
8583
"def policy_improvement(env, policy_eval_fn=policy_eval, discount_factor=1.0):\n",
@@ -91,7 +89,7 @@
9189
" env: The OpenAI envrionment.\n",
9290
" policy_eval_fn: Policy Evaluation function that takes 3 arguments:\n",
9391
" policy, env, discount_factor.\n",
94-
" discount_factor: Lambda discount factor.\n",
92+
" discount_factor: gamma discount factor.\n",
9593
" \n",
9694
" Returns:\n",
9795
" A tuple (policy, V). \n",
@@ -113,9 +111,7 @@
113111
{
114112
"cell_type": "code",
115113
"execution_count": 14,
116-
"metadata": {
117-
"collapsed": false
118-
},
114+
"metadata": {},
119115
"outputs": [
120116
{
121117
"name": "stdout",
@@ -180,9 +176,7 @@
180176
{
181177
"cell_type": "code",
182178
"execution_count": 15,
183-
"metadata": {
184-
"collapsed": false
185-
},
179+
"metadata": {},
186180
"outputs": [
187181
{
188182
"ename": "AssertionError",
@@ -216,23 +210,23 @@
216210
],
217211
"metadata": {
218212
"kernelspec": {
219-
"display_name": "Python 3",
213+
"display_name": "Python 2",
220214
"language": "python",
221-
"name": "python3"
215+
"name": "python2"
222216
},
223217
"language_info": {
224218
"codemirror_mode": {
225219
"name": "ipython",
226-
"version": 3
220+
"version": 2
227221
},
228222
"file_extension": ".py",
229223
"mimetype": "text/x-python",
230224
"name": "python",
231225
"nbconvert_exporter": "python",
232-
"pygments_lexer": "ipython3",
233-
"version": "3.5.1"
226+
"pygments_lexer": "ipython2",
227+
"version": "2.7.12"
234228
}
235229
},
236230
"nbformat": 4,
237-
"nbformat_minor": 0
231+
"nbformat_minor": 1
238232
}

0 commit comments

Comments
 (0)