@@ -28,7 +28,7 @@ def prepare_loss(self, entropy_beta):
28
28
entropy = - tf .reduce_sum (self .pi * log_pi , reduction_indices = 1 )
29
29
30
30
# policy loss (output) (Adding minus, because the original paper's objective function is for gradient ascent, but we use gradient descent optimizer.)
31
- policy_loss = - tf .reduce_sum ( tf .reduce_sum ( tf .mul ( log_pi , self .a ), reduction_indices = 1 ) * self .td + entropy * entropy_beta )
31
+ policy_loss = - tf .reduce_sum ( tf .reduce_sum ( tf .multiply ( log_pi , self .a ), reduction_indices = 1 ) * self .td + entropy * entropy_beta )
32
32
33
33
# R (input for value)
34
34
self .r = tf .placeholder ("float" , [None ])
@@ -162,7 +162,7 @@ def __init__(self,
162
162
self .W_fc1 , self .b_fc1 = self ._fc_variable ([2592 , 256 ])
163
163
164
164
# lstm
165
- self .lstm = tf .nn . rnn_cell .BasicLSTMCell (256 , state_is_tuple = True )
165
+ self .lstm = tf .contrib . rnn .BasicLSTMCell (256 , state_is_tuple = True )
166
166
167
167
# weight for policy output layer
168
168
self .W_fc2 , self .b_fc2 = self ._fc_variable ([256 , action_size ])
@@ -188,7 +188,7 @@ def __init__(self,
188
188
189
189
self .initial_lstm_state0 = tf .placeholder (tf .float32 , [1 , 256 ])
190
190
self .initial_lstm_state1 = tf .placeholder (tf .float32 , [1 , 256 ])
191
- self .initial_lstm_state = tf .nn . rnn_cell .LSTMStateTuple (self .initial_lstm_state0 ,
191
+ self .initial_lstm_state = tf .contrib . rnn .LSTMStateTuple (self .initial_lstm_state0 ,
192
192
self .initial_lstm_state1 )
193
193
194
194
# Unrolling LSTM up to LOCAL_T_MAX time steps. (= 5time steps.)
@@ -215,13 +215,13 @@ def __init__(self,
215
215
self .v = tf .reshape ( v_ , [- 1 ] )
216
216
217
217
scope .reuse_variables ()
218
- self .W_lstm = tf .get_variable ("BasicLSTMCell/Linear/Matrix " )
219
- self .b_lstm = tf .get_variable ("BasicLSTMCell/Linear/Bias " )
218
+ self .W_lstm = tf .get_variable ("basic_lstm_cell/weights " )
219
+ self .b_lstm = tf .get_variable ("basic_lstm_cell/biases " )
220
220
221
221
self .reset_state ()
222
222
223
223
def reset_state (self ):
224
- self .lstm_state_out = tf .nn . rnn_cell .LSTMStateTuple (np .zeros ([1 , 256 ]),
224
+ self .lstm_state_out = tf .contrib . rnn .LSTMStateTuple (np .zeros ([1 , 256 ]),
225
225
np .zeros ([1 , 256 ]))
226
226
227
227
def run_policy_and_value (self , sess , s_t ):
0 commit comments