diff --git a/scripts/config/agent/open_manipulator_reacher_v0/sac.py b/scripts/config/agent/open_manipulator_reacher_v0/sac.py new file mode 100644 index 0000000..8f3dca4 --- /dev/null +++ b/scripts/config/agent/open_manipulator_reacher_v0/sac.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +"""Run module for SAC on OpenManipulator. + +- Author: Curt Park +- Contact: curt.park@medipixel.io +""" + +import numpy as np +import torch +import torch.optim as optim + +from algorithms.common.networks.mlp import MLP, FlattenMLP, TanhGaussianDistParams +from algorithms.sac.agent import Agent + +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + +# hyper parameters +hyper_params = { + "N_STEP": 3, + "GAMMA": 0.99, + "TAU": 1e-3, + "BUFFER_SIZE": int(1e6), + "BATCH_SIZE": 128, + "AUTO_ENTROPY_TUNING": True, + "LR_ACTOR": 3e-4, + "LR_VF": 3e-4, + "LR_QF1": 3e-4, + "LR_QF2": 3e-4, + "W_ENTROPY": 1e-3, + "W_MEAN_REG": 1e-3, + "W_STD_REG": 1e-3, + "W_PRE_ACTIVATION_REG": 0.0, + "LR_ENTROPY": 3e-4, + "DELAYED_UPDATE": 2, + "WEIGHT_DECAY": 0.0, + "INITIAL_RANDOM_ACTION": int(1e4), + "NETWORK": { + "ACTOR_HIDDEN_SIZES": [256, 256], + "VF_HIDDEN_SIZES": [256, 256], + "QF_HIDDEN_SIZES": [256, 256], + }, +} + + +def get(env, args): + """Run training or test. + + Args: + env (gym.Env): openAI Gym environment with continuous action space + args (argparse.Namespace): arguments including training settings + + """ + state_dim = env.observation_space.shape[0] + action_dim = env.action_space.shape[0] + + hidden_sizes_actor = hyper_params["NETWORK"]["ACTOR_HIDDEN_SIZES"] + hidden_sizes_vf = hyper_params["NETWORK"]["VF_HIDDEN_SIZES"] + hidden_sizes_qf = hyper_params["NETWORK"]["QF_HIDDEN_SIZES"] + + # target entropy + target_entropy = -np.prod((action_dim,)).item() # heuristic + + # create actor + actor = TanhGaussianDistParams( + input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor + ).to(device) + + # create v_critic + vf = MLP(input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_vf).to( + device + ) + vf_target = MLP( + input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_vf + ).to(device) + vf_target.load_state_dict(vf.state_dict()) + + # create q_critic + qf_1 = FlattenMLP( + input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_qf + ).to(device) + qf_2 = FlattenMLP( + input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_qf + ).to(device) + + # create optimizers + actor_optim = optim.Adam( + actor.parameters(), + lr=hyper_params["LR_ACTOR"], + weight_decay=hyper_params["WEIGHT_DECAY"], + ) + vf_optim = optim.Adam( + vf.parameters(), + lr=hyper_params["LR_VF"], + weight_decay=hyper_params["WEIGHT_DECAY"], + ) + qf_1_optim = optim.Adam( + qf_1.parameters(), + lr=hyper_params["LR_QF1"], + weight_decay=hyper_params["WEIGHT_DECAY"], + ) + qf_2_optim = optim.Adam( + qf_2.parameters(), + lr=hyper_params["LR_QF2"], + weight_decay=hyper_params["WEIGHT_DECAY"], + ) + + # make tuples to create an agent + models = (actor, vf, vf_target, qf_1, qf_2) + optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) + + # create an agent + return Agent(env, args, hyper_params, models, optims, target_entropy) diff --git a/scripts/config/agent/open_manipulator_reacher_v0/td3.py b/scripts/config/agent/open_manipulator_reacher_v0/td3.py index 665b1ed..24f6986 100755 --- a/scripts/config/agent/open_manipulator_reacher_v0/td3.py +++ b/scripts/config/agent/open_manipulator_reacher_v0/td3.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -"""Run module for TD3 on LunarLanderContinuous-v2. +"""Run module for TD3 on OpenManipulator. - Author: whikwon - Contact: whikwon@gmail.com