Skip to content

Commit 169f52f

Browse files
committed
TensorFlow r1.0 upgrade
1 parent f7ed026 commit 169f52f

File tree

3 files changed

+8
-8
lines changed

3 files changed

+8
-8
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ Score plots of local threads of pong were like these. (with GTX980Ti)
6868
Scores are not averaged using global network unlike the original paper.
6969

7070
## Requirements
71-
- TensorFlow r0.12
71+
- TensorFlow r1.0
7272
- numpy
7373
- cv2
7474
- matplotlib

a3c_visualize.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
device = device)
4242

4343
sess = tf.Session()
44-
init = tf.initialize_all_variables()
44+
init = tf.global_variables_initializer()
4545
sess.run(init)
4646

4747
saver = tf.train.Saver()

game_ac_network.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def prepare_loss(self, entropy_beta):
2828
entropy = -tf.reduce_sum(self.pi * log_pi, reduction_indices=1)
2929

3030
# policy loss (output) (Adding minus, because the original paper's objective function is for gradient ascent, but we use gradient descent optimizer.)
31-
policy_loss = - tf.reduce_sum( tf.reduce_sum( tf.mul( log_pi, self.a ), reduction_indices=1 ) * self.td + entropy * entropy_beta )
31+
policy_loss = - tf.reduce_sum( tf.reduce_sum( tf.multiply( log_pi, self.a ), reduction_indices=1 ) * self.td + entropy * entropy_beta )
3232

3333
# R (input for value)
3434
self.r = tf.placeholder("float", [None])
@@ -162,7 +162,7 @@ def __init__(self,
162162
self.W_fc1, self.b_fc1 = self._fc_variable([2592, 256])
163163

164164
# lstm
165-
self.lstm = tf.nn.rnn_cell.BasicLSTMCell(256, state_is_tuple=True)
165+
self.lstm = tf.contrib.rnn.BasicLSTMCell(256, state_is_tuple=True)
166166

167167
# weight for policy output layer
168168
self.W_fc2, self.b_fc2 = self._fc_variable([256, action_size])
@@ -188,7 +188,7 @@ def __init__(self,
188188

189189
self.initial_lstm_state0 = tf.placeholder(tf.float32, [1, 256])
190190
self.initial_lstm_state1 = tf.placeholder(tf.float32, [1, 256])
191-
self.initial_lstm_state = tf.nn.rnn_cell.LSTMStateTuple(self.initial_lstm_state0,
191+
self.initial_lstm_state = tf.contrib.rnn.LSTMStateTuple(self.initial_lstm_state0,
192192
self.initial_lstm_state1)
193193

194194
# Unrolling LSTM up to LOCAL_T_MAX time steps. (= 5time steps.)
@@ -215,13 +215,13 @@ def __init__(self,
215215
self.v = tf.reshape( v_, [-1] )
216216

217217
scope.reuse_variables()
218-
self.W_lstm = tf.get_variable("BasicLSTMCell/Linear/Matrix")
219-
self.b_lstm = tf.get_variable("BasicLSTMCell/Linear/Bias")
218+
self.W_lstm = tf.get_variable("basic_lstm_cell/weights")
219+
self.b_lstm = tf.get_variable("basic_lstm_cell/biases")
220220

221221
self.reset_state()
222222

223223
def reset_state(self):
224-
self.lstm_state_out = tf.nn.rnn_cell.LSTMStateTuple(np.zeros([1, 256]),
224+
self.lstm_state_out = tf.contrib.rnn.LSTMStateTuple(np.zeros([1, 256]),
225225
np.zeros([1, 256]))
226226

227227
def run_policy_and_value(self, sess, s_t):

0 commit comments

Comments
 (0)