15
15
from sklearn .preprocessing import StandardScaler
16
16
17
17
18
+ import tensorflow as tf
19
+ # if tf.__version__.startswith('2'):
20
+ # tf.compat.v1.disable_eager_execution()
21
+
22
+
18
23
# Let's use AAPL (Apple), MSI (Motorola), SBUX (Starbucks)
19
24
def get_data ():
20
25
# returns a T x 3 list of stock prices
@@ -270,10 +275,10 @@ def update_replay_memory(self, state, action, reward, next_state, done):
270
275
def act (self , state ):
271
276
if np .random .rand () <= self .epsilon :
272
277
return np .random .choice (self .action_size )
273
- act_values = self .model .predict (state )
278
+ act_values = self .model .predict (state , verbose = 0 )
274
279
return np .argmax (act_values [0 ]) # returns action
275
280
276
-
281
+ @ tf . function
277
282
def replay (self , batch_size = 32 ):
278
283
# first check if replay buffer contains enough data
279
284
if self .memory .size < batch_size :
@@ -288,7 +293,7 @@ def replay(self, batch_size=32):
288
293
done = minibatch ['d' ]
289
294
290
295
# Calculate the tentative target: Q(s',a)
291
- target = rewards + (1 - done ) * self .gamma * np .amax (self .model .predict (next_states ), axis = 1 )
296
+ target = rewards + (1 - done ) * self .gamma * np .amax (self .model .predict (next_states , verbose = 0 ), axis = 1 )
292
297
293
298
# With the Keras API, the target (usually) must have the same
294
299
# shape as the predictions.
@@ -298,7 +303,7 @@ def replay(self, batch_size=32):
298
303
# the prediction for all values.
299
304
# Then, only change the targets for the actions taken.
300
305
# Q(s,a)
301
- target_full = self .model .predict (states )
306
+ target_full = self .model .predict (states , verbose = 0 )
302
307
target_full [np .arange (batch_size ), actions ] = target
303
308
304
309
# Run one training step
@@ -316,6 +321,7 @@ def save(self, name):
316
321
self .model .save_weights (name )
317
322
318
323
324
+
319
325
def play_one_episode (agent , env , is_train ):
320
326
# note: after transforming states are already 1xD
321
327
state = env .reset ()
@@ -340,6 +346,7 @@ def play_one_episode(agent, env, is_train):
340
346
# config
341
347
models_folder = 'rl_trader_models'
342
348
rewards_folder = 'rl_trader_rewards'
349
+ model_file = 'dqn.weights.h5'
343
350
num_episodes = 2000
344
351
batch_size = 32
345
352
initial_investment = 20000
@@ -383,7 +390,7 @@ def play_one_episode(agent, env, is_train):
383
390
agent .epsilon = 0.01
384
391
385
392
# load trained weights
386
- agent .load (f'{ models_folder } /dqn.h5 ' )
393
+ agent .load (f'{ models_folder } /{ model_file } ' )
387
394
388
395
# play the game num_episodes times
389
396
for e in range (num_episodes ):
@@ -396,7 +403,7 @@ def play_one_episode(agent, env, is_train):
396
403
# save the weights when we are done
397
404
if args .mode == 'train' :
398
405
# save the DQN
399
- agent .save (f'{ models_folder } /dqn.h5 ' )
406
+ agent .save (f'{ models_folder } /{ model_file } ' )
400
407
401
408
# save the scaler
402
409
with open (f'{ models_folder } /scaler.pkl' , 'wb' ) as f :
0 commit comments