TensorFlow r1.0 upgrade

miyosuda · miyosuda · commit 169f52fb15c2 · 2017-03-27T09:32:08.000+09:00
diff --git a/README.md b/README.md
@@ -68,7 +68,7 @@ Score plots of local threads of pong were like these. (with GTX980Ti)
 Scores are not averaged using global network unlike the original paper.
 
 ## Requirements
-- TensorFlow r0.12
+- TensorFlow r1.0
 - numpy
 - cv2
 - matplotlib
diff --git a/a3c_visualize.py b/a3c_visualize.py
@@ -41,7 +41,7 @@
                               device = device)
 
 sess = tf.Session()
-init = tf.initialize_all_variables()
+init = tf.global_variables_initializer()
 sess.run(init)
 
 saver = tf.train.Saver()
diff --git a/game_ac_network.py b/game_ac_network.py
@@ -28,7 +28,7 @@ def prepare_loss(self, entropy_beta):
       entropy = -tf.reduce_sum(self.pi * log_pi, reduction_indices=1)
       
       # policy loss (output)  (Adding minus, because the original paper's objective function is for gradient ascent, but we use gradient descent optimizer.)
-      policy_loss = - tf.reduce_sum( tf.reduce_sum( tf.mul( log_pi, self.a ), reduction_indices=1 ) * self.td + entropy * entropy_beta )
+      policy_loss = - tf.reduce_sum( tf.reduce_sum( tf.multiply( log_pi, self.a ), reduction_indices=1 ) * self.td + entropy * entropy_beta )
 
       # R (input for value)
       self.r = tf.placeholder("float", [None])
@@ -162,7 +162,7 @@ def __init__(self,
       self.W_fc1, self.b_fc1 = self._fc_variable([2592, 256])
 
       # lstm
-      self.lstm = tf.nn.rnn_cell.BasicLSTMCell(256, state_is_tuple=True)
+      self.lstm = tf.contrib.rnn.BasicLSTMCell(256, state_is_tuple=True)
 
       # weight for policy output layer
       self.W_fc2, self.b_fc2 = self._fc_variable([256, action_size])
@@ -188,7 +188,7 @@ def __init__(self,
 
       self.initial_lstm_state0 = tf.placeholder(tf.float32, [1, 256])
       self.initial_lstm_state1 = tf.placeholder(tf.float32, [1, 256])
-      self.initial_lstm_state = tf.nn.rnn_cell.LSTMStateTuple(self.initial_lstm_state0,
+      self.initial_lstm_state = tf.contrib.rnn.LSTMStateTuple(self.initial_lstm_state0,
                                                               self.initial_lstm_state1)
       
       # Unrolling LSTM up to LOCAL_T_MAX time steps. (= 5time steps.)
@@ -215,13 +215,13 @@ def __init__(self,
       self.v = tf.reshape( v_, [-1] )
 
       scope.reuse_variables()
-      self.W_lstm = tf.get_variable("BasicLSTMCell/Linear/Matrix")
-      self.b_lstm = tf.get_variable("BasicLSTMCell/Linear/Bias")
+      self.W_lstm = tf.get_variable("basic_lstm_cell/weights")
+      self.b_lstm = tf.get_variable("basic_lstm_cell/biases")
 
       self.reset_state()
       
   def reset_state(self):
-    self.lstm_state_out = tf.nn.rnn_cell.LSTMStateTuple(np.zeros([1, 256]),
+    self.lstm_state_out = tf.contrib.rnn.LSTMStateTuple(np.zeros([1, 256]),
                                                         np.zeros([1, 256]))
 
   def run_policy_and_value(self, sess, s_t):