update

bob7783 · bob7783 · commit 2049f4e078c5 · 2025-02-19T02:23:27.000-05:00
diff --git a/tf2.0/rl_trader.py b/tf2.0/rl_trader.py
@@ -15,6 +15,11 @@
 from sklearn.preprocessing import StandardScaler
 
 
+import tensorflow as tf
+# if tf.__version__.startswith('2'):
+#   tf.compat.v1.disable_eager_execution()
+
+
 # Let's use AAPL (Apple), MSI (Motorola), SBUX (Starbucks)
 def get_data():
   # returns a T x 3 list of stock prices
@@ -270,10 +275,10 @@ def update_replay_memory(self, state, action, reward, next_state, done):
   def act(self, state):
     if np.random.rand() <= self.epsilon:
       return np.random.choice(self.action_size)
-    act_values = self.model.predict(state)
+    act_values = self.model.predict(state, verbose=0)
     return np.argmax(act_values[0])  # returns action
 
-
+  @tf.function
   def replay(self, batch_size=32):
     # first check if replay buffer contains enough data
     if self.memory.size < batch_size:
@@ -288,7 +293,7 @@ def replay(self, batch_size=32):
     done = minibatch['d']
 
     # Calculate the tentative target: Q(s',a)
-    target = rewards + (1 - done) * self.gamma * np.amax(self.model.predict(next_states), axis=1)
+    target = rewards + (1 - done) * self.gamma * np.amax(self.model.predict(next_states, verbose=0), axis=1)
 
     # With the Keras API, the target (usually) must have the same
     # shape as the predictions.
@@ -298,7 +303,7 @@ def replay(self, batch_size=32):
     # the prediction for all values.
     # Then, only change the targets for the actions taken.
     # Q(s,a)
-    target_full = self.model.predict(states)
+    target_full = self.model.predict(states, verbose=0)
     target_full[np.arange(batch_size), actions] = target
 
     # Run one training step
@@ -316,6 +321,7 @@ def save(self, name):
     self.model.save_weights(name)
 
 
+
 def play_one_episode(agent, env, is_train):
   # note: after transforming states are already 1xD
   state = env.reset()
@@ -340,6 +346,7 @@ def play_one_episode(agent, env, is_train):
   # config
   models_folder = 'rl_trader_models'
   rewards_folder = 'rl_trader_rewards'
+  model_file = 'dqn.weights.h5'
   num_episodes = 2000
   batch_size = 32
   initial_investment = 20000
@@ -383,7 +390,7 @@ def play_one_episode(agent, env, is_train):
     agent.epsilon = 0.01
 
     # load trained weights
-    agent.load(f'{models_folder}/dqn.h5')
+    agent.load(f'{models_folder}/{model_file}')
 
   # play the game num_episodes times
   for e in range(num_episodes):
@@ -396,7 +403,7 @@ def play_one_episode(agent, env, is_train):
   # save the weights when we are done
   if args.mode == 'train':
     # save the DQN
-    agent.save(f'{models_folder}/dqn.h5')
+    agent.save(f'{models_folder}/{model_file}')
 
     # save the scaler
     with open(f'{models_folder}/scaler.pkl', 'wb') as f: