Skip to content

Commit c8574f2

Browse files
committed
added one more example
1 parent 9a52283 commit c8574f2

File tree

3 files changed

+178
-4
lines changed

3 files changed

+178
-4
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
params:
2+
seed: 7
3+
4+
#devices: [0, 0]
5+
6+
algo:
7+
name: a2c_continuous
8+
9+
model:
10+
name: continuous_a2c_logstd
11+
12+
network:
13+
name: actor_critic
14+
separate: False
15+
space:
16+
continuous:
17+
mu_activation: None
18+
sigma_activation: None
19+
mu_init:
20+
name: default
21+
scale: 0.02
22+
sigma_init:
23+
name: const_initializer
24+
val: 0
25+
fixed_sigma: True
26+
27+
cnn:
28+
permute_input: True
29+
type: conv2d
30+
activation: relu
31+
initializer:
32+
name: orthogonal_initializer
33+
gain: 1.41421356237
34+
convs:
35+
- filters: 32
36+
kernel_size: 8
37+
strides: 4
38+
padding: 0
39+
- filters: 64
40+
kernel_size: 4
41+
strides: 2
42+
padding: 0
43+
- filters: 64
44+
kernel_size: 3
45+
strides: 1
46+
padding: 0
47+
48+
mlp:
49+
units: [256]
50+
activation: relu
51+
initializer:
52+
name: orthogonal_initializer
53+
gain: 1.41421356237
54+
rnn:
55+
name: 'lstm'
56+
units: 512
57+
layers: 1
58+
before_mlp: True
59+
layer_norm: True
60+
config:
61+
name: PickCube_RGB_Maniskill
62+
full_experiment_name: PickCube_RGB_Maniskill
63+
env_name: maniskill
64+
multi_gpu: False
65+
mixed_precision: True
66+
normalize_input: True
67+
normalize_value: True
68+
normalize_advantage: True
69+
use_smooth_clamp: False
70+
reward_shaper:
71+
scale_value: 1.0
72+
gamma: 0.99
73+
tau: 0.95
74+
learning_rate: 1e-4
75+
lr_schedule: None #adaptive
76+
kl_threshold: 0.008
77+
score_to_win: 20000
78+
max_epochs: 5000
79+
save_best_after: 100
80+
save_frequency: 50
81+
grad_norm: 1.0
82+
entropy_coef: 0.0
83+
truncate_grads: True
84+
e_clip: 0.2
85+
horizon_length: 8
86+
num_actors: 512
87+
minibatch_size: 2048
88+
mini_epochs: 2
89+
critic_coef: 2
90+
clip_value: True
91+
bounds_loss_coef: 0.0001
92+
93+
env_config:
94+
# look at the https://maniskill.readthedocs.io/en/latest/user_guide/concepts/observation.html
95+
env_name: PickCube-v1
96+
obs_mode: "rgbd"
97+
control_mode: "pd_ee_delta_pose" # there is also "pd_joint_delta_pos", ..
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
params:
2+
seed: 7
3+
4+
#devices: [0, 0]
5+
6+
algo:
7+
name: a2c_continuous
8+
9+
model:
10+
name: continuous_a2c_logstd
11+
12+
network:
13+
name: actor_critic
14+
separate: False
15+
space:
16+
continuous:
17+
mu_activation: None
18+
sigma_activation: None
19+
20+
mu_init:
21+
name: default
22+
sigma_init:
23+
name: const_initializer
24+
val: 0
25+
fixed_sigma: True
26+
mlp:
27+
units: [256, 128, 64]
28+
activation: elu
29+
d2rl: False
30+
31+
initializer:
32+
name: default
33+
regularizer:
34+
name: None
35+
36+
config:
37+
name: PickCube_State_Maniskill
38+
full_experiment_name: PickCube_State_Maniskill
39+
env_name: maniskill
40+
multi_gpu: False
41+
mixed_precision: True
42+
normalize_input: True
43+
normalize_value: True
44+
normalize_advantage: True
45+
use_smooth_clamp: False
46+
reward_shaper:
47+
scale_value: 1.0
48+
gamma: 0.99
49+
tau: 0.95
50+
learning_rate: 3e-4
51+
lr_schedule: adaptive
52+
kl_threshold: 0.008
53+
score_to_win: 20000
54+
max_epochs: 1000
55+
save_best_after: 100
56+
save_frequency: 50
57+
grad_norm: 1.0
58+
entropy_coef: 0.0
59+
truncate_grads: True
60+
e_clip: 0.2
61+
horizon_length: 8
62+
num_actors: 1024
63+
minibatch_size: 4096
64+
mini_epochs: 5
65+
critic_coef: 2
66+
clip_value: True
67+
bounds_loss_coef: 0.0001
68+
69+
env_config:
70+
env_name: PickCube-v1
71+
obs_mode: "state" # there is also "state_dict", "rgbd", ...
72+
control_mode: "pd_ee_delta_pose" # there is also "pd_joint_delta_pos", ..

rl_games/envs/maniskill.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,20 +34,25 @@ def __init__(self, config_name, num_actors, **kwargs):
3434
import gymnasium
3535
import mani_skill.envs
3636
from mani_skill.vector.wrappers.gymnasium import ManiSkillVectorEnv
37+
from mani_skill.utils.wrappers.flatten import FlattenRGBDObservationWrapper
3738
self.batch_size = num_actors
3839
env_name=kwargs.pop('env_name')
3940
self.seed = kwargs.pop('seed', 0) # not sure how to set this in mani_skill
40-
env = gymnasium.make(
41+
self.env = gymnasium.make(
4142
env_name,
4243
num_envs=num_actors,
4344
**kwargs
4445
)
46+
#self.env = FlattenRGBDObservationWrapper(self.env, rgb=True, depth=False, state=False, sep_depth=False)
4547
# need to use this wrapper to have automatic reset for done envs
46-
self.env = ManiSkillVectorEnv(env)
48+
self.env = ManiSkillVectorEnv(self.env)
49+
50+
print(f"ManiSkill env: {env_name} with {num_actors} actors")
51+
print(f"Original observation space: {self.env.observation_space}")
4752
self.action_space = wrappers.OldGymWrapper.convert_space(remove_batch_dim(self.env.action_space))
4853
self.observation_space = wrappers.OldGymWrapper.convert_space(remove_batch_dim(self.env.observation_space))
49-
50-
# remove first batch dim from obs spacce and action space
54+
print(f"Converted action space: {self.action_space}")
55+
print(f"Converted observation space: {self.observation_space}")
5156

5257

5358

0 commit comments

Comments
 (0)