1
+ import io
1
2
import numpy as np
2
3
import sys
3
4
from gym .envs .toy_text import discrete
@@ -49,6 +50,7 @@ def __init__(self, shape=[4,4]):
49
50
s = it .iterindex
50
51
y , x = it .multi_index
51
52
53
+ # P[s][a] = (prob, next_state, reward, is_done)
52
54
P [s ] = {a : [] for a in range (nA )}
53
55
54
56
is_done = lambda s : s == 0 or s == (nS - 1 )
@@ -83,10 +85,19 @@ def __init__(self, shape=[4,4]):
83
85
super (GridworldEnv , self ).__init__ (nS , nA , P , isd )
84
86
85
87
def _render (self , mode = 'human' , close = False ):
88
+ """ Renders the current gridworld layout
89
+
90
+ For example, a 4x4 grid with the mode="human" looks like:
91
+ T o o o
92
+ o x o o
93
+ o o o o
94
+ o o o T
95
+ where x is your position and T are the two terminal states.
96
+ """
86
97
if close :
87
98
return
88
99
89
- outfile = StringIO () if mode == 'ansi' else sys .stdout
100
+ outfile = io . StringIO () if mode == 'ansi' else sys .stdout
90
101
91
102
grid = np .arange (self .nS ).reshape (self .shape )
92
103
it = np .nditer (grid , flags = ['multi_index' ])
@@ -102,7 +113,7 @@ def _render(self, mode='human', close=False):
102
113
output = " o "
103
114
104
115
if x == 0 :
105
- output = output .lstrip ()
116
+ output = output .lstrip ()
106
117
if x == self .shape [1 ] - 1 :
107
118
output = output .rstrip ()
108
119
@@ -111,4 +122,4 @@ def _render(self, mode='human', close=False):
111
122
if x == self .shape [1 ] - 1 :
112
123
outfile .write ("\n " )
113
124
114
- it .iternext ()
125
+ it .iternext ()
0 commit comments