tensorlayer
diff --git a/‎examples/basic_tutorials/tutorial_cifar10_cnn_static.py
Lines changed: 1 addition & 2 deletions b/‎examples/basic_tutorials/tutorial_cifar10_cnn_static.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/data_process/tutorial_fast_affine_transform.py
Lines changed: 1 addition & 1 deletion b/‎examples/data_process/tutorial_fast_affine_transform.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/data_process/tutorial_tfrecord3.py
Lines changed: 2 additions & 2 deletions b/‎examples/data_process/tutorial_tfrecord3.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/deprecated_tutorials/tutorial_imagenet_inceptionV3_distributed.py
Lines changed: 1 addition & 2 deletions b/‎examples/deprecated_tutorials/tutorial_imagenet_inceptionV3_distributed.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/distributed_training/tutorial_cifar10_distributed_trainer.py
Lines changed: 1 addition & 2 deletions b/‎examples/distributed_training/tutorial_cifar10_distributed_trainer.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/reinforcement_learning/tutorial_A3C.py
Lines changed: 5 additions & 4 deletions b/‎examples/reinforcement_learning/tutorial_A3C.py
Lines changed: 5 additions & 4 deletions
diff --git a/‎examples/reinforcement_learning/tutorial_AC.py
Lines changed: 2 additions & 4 deletions b/‎examples/reinforcement_learning/tutorial_AC.py
Lines changed: 2 additions & 4 deletions
diff --git a/‎examples/reinforcement_learning/tutorial_C51.py
Lines changed: 4 additions & 2 deletions b/‎examples/reinforcement_learning/tutorial_C51.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎examples/reinforcement_learning/tutorial_DDPG.py
Lines changed: 7 additions & 6 deletions b/‎examples/reinforcement_learning/tutorial_DDPG.py
Lines changed: 7 additions & 6 deletions
diff --git a/‎examples/reinforcement_learning/tutorial_DPPO.py
Lines changed: 5 additions & 3 deletions b/‎examples/reinforcement_learning/tutorial_DPPO.py
Lines changed: 5 additions & 3 deletions
diff --git a/‎examples/reinforcement_learning/tutorial_DQN.py
Lines changed: 1 addition & 2 deletions b/‎examples/reinforcement_learning/tutorial_DQN.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/reinforcement_learning/tutorial_DQN_variants.py
Lines changed: 4 additions & 2 deletions b/‎examples/reinforcement_learning/tutorial_DQN_variants.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎examples/reinforcement_learning/tutorial_PG.py
Lines changed: 10 additions & 3 deletions b/‎examples/reinforcement_learning/tutorial_PG.py
Lines changed: 10 additions & 3 deletions
diff --git a/‎examples/reinforcement_learning/tutorial_PPO.py
Lines changed: 7 additions & 3 deletions b/‎examples/reinforcement_learning/tutorial_PPO.py
Lines changed: 7 additions & 3 deletions
diff --git a/‎examples/reinforcement_learning/tutorial_Qlearning.py
Lines changed: 5 additions & 3 deletions b/‎examples/reinforcement_learning/tutorial_Qlearning.py
Lines changed: 5 additions & 3 deletions
@@ -8,8 +8,7 @@
 import tensorflow as tf
 
 import tensorlayer as tl
-from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Flatten, Input,
-                                LocalResponseNorm, MaxPool2d)
+from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Flatten, Input, LocalResponseNorm, MaxPool2d)
 from tensorlayer.models import Model
 
 # enable debug logging
 
@@ -8,10 +8,10 @@
 import multiprocessing
 import time
 
+import cv2
 import numpy as np
 import tensorflow as tf
 
-import cv2
 import tensorlayer as tl
 
 # tl.logging.set_verbosity(tl.logging.DEBUG)
 
@@ -231,8 +231,8 @@ def distort_image(image, thread_id):
 
 
 def prefetch_input_data(
-    reader, file_pattern, is_training, batch_size, values_per_shard, input_queue_capacity_factor=16,
-    num_reader_threads=1, shard_queue_name="filename_queue", value_queue_name="input_queue"
+        reader, file_pattern, is_training, batch_size, values_per_shard, input_queue_capacity_factor=16,
+        num_reader_threads=1, shard_queue_name="filename_queue", value_queue_name="input_queue"
 ):
     """Prefetches string values from disk into an input queue.
 
 
@@ -21,8 +21,7 @@
 import numpy as np
 import tensorflow as tf
 from tensorflow.contrib import slim
-from tensorflow.contrib.slim.python.slim.nets.inception_v3 import (inception_v3,
-                                                                   inception_v3_arg_scope)
+from tensorflow.contrib.slim.python.slim.nets.inception_v3 import (inception_v3, inception_v3_arg_scope)
 from tensorflow.python.framework.errors_impl import OutOfRangeError
 from tensorflow.python.training import session_run_hook
 from tensorflow.python.training.basic_session_run_hooks import StopAtStepHook
 
@@ -18,8 +18,7 @@
 import tensorflow as tf
 
 import tensorlayer as tl
-from tensorlayer.layers import (BatchNormLayer, Conv2d, DenseLayer,
-                                FlattenLayer, InputLayer, MaxPool2d)
+from tensorlayer.layers import (BatchNormLayer, Conv2d, DenseLayer, FlattenLayer, InputLayer, MaxPool2d)
 
 tf.logging.set_verbosity(tf.logging.DEBUG)
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
@@ -62,7 +62,6 @@
 
 tl.logging.set_verbosity(tl.logging.DEBUG)
 
-
 # add arguments in command  --train/test
 parser = argparse.ArgumentParser(description='Train or test neural net motor controller.')
 parser.add_argument('--train', dest='train', action='store_true', default=False)
@@ -177,7 +176,7 @@ def save(self):  # save trained weights
             os.makedirs(path)
         tl.files.save_npz(self.actor.trainable_weights, name=os.path.join(path, 'model_actor.npz'))
         tl.files.save_npz(self.critic.trainable_weights, name=os.path.join(path, 'model_critic.npz'))
-        
+
     def load(self):  # load trained weights
         path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
         tl.files.load_and_assign_npz(name=os.path.join(path, 'model_actor.npz'), network=self.actor)
@@ -296,7 +295,7 @@ def work(self, globalAC):
         COORD.join(worker_threads)
 
         GLOBAL_AC.save()
-        
+
         plt.plot(GLOBAL_RUNNING_R)
         if not os.path.exists('image'):
             os.makedirs('image')
@@ -319,4 +318,6 @@ def work(self, globalAC):
             print(
                 'Testing  | Episode: {}/{}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
                     episode + 1, TEST_EPISODES, episode_reward,
-                    time.time() - T0))
+                    time.time() - T0
+                )
+            )
@@ -78,8 +78,6 @@
 LR_A = 0.001  # learning rate for actor
 LR_C = 0.01  # learning rate for critic
 
-
-
 ###############################  Actor-Critic  ####################################
 
 
@@ -205,7 +203,7 @@ def load(self):  # load trained weights
                 state_new, reward, done, info = env.step(action)
                 state_new = state_new.astype(np.float32)
 
-                if done: reward = -20   # reward shaping trick
+                if done: reward = -20  # reward shaping trick
                 # these may helpful in some tasks
                 # if abs(s_new[0]) >= env.observation_space.high[0]:
                 # #  cart moves more than 2.4 units from the center
@@ -240,7 +238,7 @@ def load(self):  # load trained weights
 
             # Early Stopping for quick check
             if step >= MAX_STEPS:
-                print("Early Stopping")     # Hao Dong: it is important for this task
+                print("Early Stopping")  # Hao Dong: it is important for this task
                 break
         actor.save()
         critic.save()
 
@@ -309,7 +309,8 @@ def _train_func(self, b_o, b_index, b_m):
             nepisode += 1
             print(
                 'Training  | Episode: {}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
-                    nepisode, episode_reward, time.time() - t0
+                    nepisode, episode_reward,
+                    time.time() - t0
                 )
             )  # episode num starts from 1 in print
 
@@ -336,6 +337,7 @@ def _train_func(self, b_o, b_index, b_m):
             nepisode += 1
             print(
                 'Testing  | Episode: {}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
-                    nepisode, episode_reward, time.time() - t0
+                    nepisode, episode_reward,
+                    time.time() - t0
                 )
             )
@@ -29,7 +29,6 @@
 
 import argparse
 import os
-import threading
 import time
 
 import gym
@@ -159,9 +158,9 @@ def get_action(self, s, greedy=False):
         a = self.actor(np.array([s], dtype=np.float32))[0]
         if greedy:
             return a
-        return np.clip(np.random.normal(a, self.var),
-                             -self.action_range,
-                             self.action_range)  # add randomness to action selection for exploration
+        return np.clip(
+            np.random.normal(a, self.var), -self.action_range, self.action_range
+        )  # add randomness to action selection for exploration
 
     def learn(self):
         """
@@ -276,7 +275,7 @@ def load(self):
                 all_episode_reward.append(all_episode_reward[-1] * 0.9 + episode_reward * 0.1)
             print(
                 'Training  | Episode: {}/{}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
-                    episode+1, TRAIN_EPISODES, episode_reward,
+                    episode + 1, TRAIN_EPISODES, episode_reward,
                     time.time() - t0
                 )
             )
@@ -301,4 +300,6 @@ def load(self):
             print(
                 'Testing  | Episode: {}/{}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
                     episode + 1, TEST_EPISODES, episode_reward,
-                    time.time() - t0))
+                    time.time() - t0
+                )
+            )
@@ -73,7 +73,6 @@
 # ppo-clip parameters
 EPSILON = 0.2
 
-
 ###############################  DPPO  ####################################
 
 
@@ -305,7 +304,8 @@ def work(self):
 
             print(
                 'Training  | Episode: {}/{}  | Worker: {} | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
-                    GLOBAL_EP + 1, TRAIN_EPISODES, self.wid, ep_r, time.time() - T0
+                    GLOBAL_EP + 1, TRAIN_EPISODES, self.wid, ep_r,
+                    time.time() - T0
                 )
             )
             # record reward changes, plot later
@@ -372,4 +372,6 @@ def work(self):
             print(
                 'Testing  | Episode: {}/{}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
                     episode + 1, TEST_EPISODES, episode_reward,
-                    time.time() - T0))
+                    time.time() - T0
+                )
+            )
@@ -61,7 +61,6 @@
 num_episodes = 10000
 render = False  # display the game environment
 
-
 ##################### DQN ##########################
 
 
@@ -140,7 +139,7 @@ def load_ckpt(model):  # load trained weights
                 rAll += r
                 s = s1
                 ## Reduce chance of random action if an episode is done.
-                if d == True:
+                if d ==True:
                     e = 1. / ((i / 50) + 10)  # reduce e, GLIE: Greey in the limit with infinite Exploration
                     break
 
 
@@ -399,7 +399,8 @@ def load(self, path):
             nepisode += 1
             print(
                 'Training  | Episode: {}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
-                    nepisode, episode_reward, time.time() - t0
+                    nepisode, episode_reward,
+                    time.time() - t0
                 )
             )  # episode num starts from 1 in print
 
@@ -426,6 +427,7 @@ def load(self, path):
             nepisode += 1
             print(
                 'Testing  | Episode: {}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
-                    nepisode, episode_reward, time.time() - t0
+                    nepisode, episode_reward,
+                    time.time() - t0
                 )
             )
@@ -60,6 +60,7 @@ class PolicyGradient:
     """
     PG class
     """
+
     def __init__(self, state_dim, action_num, learning_rate=0.02, gamma=0.99):
         self.gamma = gamma
 
@@ -113,7 +114,9 @@ def learn(self):
 
         with tf.GradientTape() as tape:
             _logits = self.model(np.vstack(self.state_buffer))
-            neg_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=_logits, labels=np.array(self.action_buffer))
+            neg_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
+                logits=_logits, labels=np.array(self.action_buffer)
+            )
             loss = tf.reduce_mean(neg_log_prob * discounted_reward_buffer_norm)
 
         grad = tape.gradient(loss, self.model.trainable_weights)
@@ -195,7 +198,9 @@ def load(self):
             print(
                 'Training  | Episode: {}/{}  | Episode Reward: {:.0f}  | Running Time: {:.4f}'.format(
                     episode + 1, TRAIN_EPISODES, episode_reward,
-                    time.time() - t0))
+                    time.time() - t0
+                )
+            )
 
             if episode == 0:
                 all_episode_reward.append(episode_reward)
@@ -223,4 +228,6 @@ def load(self):
             print(
                 'Testing  | Episode: {}/{}  | Episode Reward: {:.0f}  | Running Time: {:.4f}'.format(
                     episode + 1, TEST_EPISODES, episode_reward,
-                    time.time() - t0))
+                    time.time() - t0
+                )
+            )
@@ -63,14 +63,14 @@
 # ppo-clip parameters
 EPSILON = 0.2
 
-
 ###############################  PPO  ####################################
 
 
 class PPO(object):
     """
     PPO class
     """
+
     def __init__(self, state_dim, action_dim, action_bound, method='clip'):
         # critic
         with tf.name_scope('critic'):
@@ -288,7 +288,9 @@ def finish_path(self, next_state):
             agent.finish_path(state_)
             print(
                 'Training  | Episode: {}/{}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
-                    episode + 1, TRAIN_EPISODES, episode_reward, time.time() - t0)
+                    episode + 1, TRAIN_EPISODES, episode_reward,
+                    time.time() - t0
+                )
             )
             if episode == 0:
                 all_episode_reward.append(episode_reward)
@@ -316,4 +318,6 @@ def finish_path(self, next_state):
             print(
                 'Testing  | Episode: {}/{}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
                     episode + 1, TEST_EPISODES, episode_reward,
-                    time.time() - t0))
+                    time.time() - t0
+                )
+            )
@@ -23,7 +23,7 @@
 
 parser.add_argument(
     '--save_path', default=None, help='folder to save if mode == train else model path,'
-                                      'qnet will be saved once target net update'
+    'qnet will be saved once target net update'
 )
 parser.add_argument('--seed', help='random seed', type=int, default=0)
 parser.add_argument('--env_id', default='FrozenLake-v0')
@@ -65,7 +65,8 @@
                 break
         print(
             'Training  | Episode: {}/{}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
-                i + 1, num_episodes, rAll, time.time() - t0
+                i + 1, num_episodes, rAll,
+                time.time() - t0
             )
         )
         if i == 0:
@@ -106,6 +107,7 @@
                 break
         print(
             'Testing  | Episode: {}/{}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.format(
-                i + 1, num_episodes, rAll, time.time() - t0
+                i + 1, num_episodes, rAll,
+                time.time() - t0
             )
         )
Original file line number	Diff line number	Diff line change
`@@ -309,7 +309,8 @@ def _train_func(self, b_o, b_index, b_m):`
`309`	`309`	`nepisode += 1`
`310`	`310`	`print(`
`311`	`311`	`'Training \| Episode: {} \| Episode Reward: {:.4f} \| Running Time: {:.4f}'.format(`
`312`		`- nepisode, episode_reward, time.time() - t0`
	`312`	`+ nepisode, episode_reward,`
	`313`	`+ time.time() - t0`
`313`	`314`	`)`
`314`	`315`	`) # episode num starts from 1 in print`
`315`	`316`
`@@ -336,6 +337,7 @@ def _train_func(self, b_o, b_index, b_m):`
`336`	`337`	`nepisode += 1`
`337`	`338`	`print(`
`338`	`339`	`'Testing \| Episode: {} \| Episode Reward: {:.4f} \| Running Time: {:.4f}'.format(`
`339`		`- nepisode, episode_reward, time.time() - t0`
	`340`	`+ nepisode, episode_reward,`
	`341`	`+ time.time() - t0`
`340`	`342`	`)`
`341`	`343`	`)`
Original file line number	Diff line number	Diff line change
`@@ -399,7 +399,8 @@ def load(self, path):`
`399`	`399`	`nepisode += 1`
`400`	`400`	`print(`
`401`	`401`	`'Training \| Episode: {} \| Episode Reward: {:.4f} \| Running Time: {:.4f}'.format(`
`402`		`- nepisode, episode_reward, time.time() - t0`
	`402`	`+ nepisode, episode_reward,`
	`403`	`+ time.time() - t0`
`403`	`404`	`)`
`404`	`405`	`) # episode num starts from 1 in print`
`405`	`406`
`@@ -426,6 +427,7 @@ def load(self, path):`
`426`	`427`	`nepisode += 1`
`427`	`428`	`print(`
`428`	`429`	`'Testing \| Episode: {} \| Episode Reward: {:.4f} \| Running Time: {:.4f}'.format(`
`429`		`- nepisode, episode_reward, time.time() - t0`
	`430`	`+ nepisode, episode_reward,`
	`431`	`+ time.time() - t0`
`430`	`432`	`)`
`431`	`433`	`)`
Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@`
`23`	`23`
`24`	`24`	`parser.add_argument(`
`25`	`25`	`'--save_path', default=None, help='folder to save if mode == train else model path,'`
`26`		`- 'qnet will be saved once target net update'`
	`26`	`+ 'qnet will be saved once target net update'`
`27`	`27`	`)`
`28`	`28`	`parser.add_argument('--seed', help='random seed', type=int, default=0)`
`29`	`29`	`parser.add_argument('--env_id', default='FrozenLake-v0')`
`@@ -65,7 +65,8 @@`
`65`	`65`	`break`
`66`	`66`	`print(`
`67`	`67`	`'Training \| Episode: {}/{} \| Episode Reward: {:.4f} \| Running Time: {:.4f}'.format(`
`68`		`- i + 1, num_episodes, rAll, time.time() - t0`
	`68`	`+ i + 1, num_episodes, rAll,`
	`69`	`+ time.time() - t0`
`69`	`70`	`)`
`70`	`71`	`)`
`71`	`72`	`if i == 0:`
`@@ -106,6 +107,7 @@`
`106`	`107`	`break`
`107`	`108`	`print(`
`108`	`109`	`'Testing \| Episode: {}/{} \| Episode Reward: {:.4f} \| Running Time: {:.4f}'.format(`
`109`		`- i + 1, num_episodes, rAll, time.time() - t0`
	`110`	`+ i + 1, num_episodes, rAll,`
	`111`	`+ time.time() - t0`
`110`	`112`	`)`
`111`	`113`	`)`