Skip to content

Commit 51ac9aa

Browse files
Denys88DenSumy
andauthored
[ISSUE-322]added maniskill Ant demo (#323)
* added maniskill nat demo * added one more example --------- Co-authored-by: Denys Makoviichuk <[email protected]>
1 parent b483bd6 commit 51ac9aa

File tree

7 files changed

+334
-24
lines changed

7 files changed

+334
-24
lines changed

rl_games/common/env_configurations.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,10 @@ def create_env(name, **kwargs):
425425
'env_creator': lambda **kwargs: create_brax_env(**kwargs),
426426
'vecenv_type': 'BRAX'
427427
},
428+
'maniskill' : {
429+
'env_creator': lambda **kwargs: create_brax_env(**kwargs),
430+
'vecenv_type': 'MANISKILL'
431+
},
428432
'envpool': {
429433
'env_creator': lambda **kwargs: create_envpool(**kwargs),
430434
'vecenv_type': 'ENVPOOL'

rl_games/common/vecenv.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,4 +280,7 @@ def create_vec_env(config_name, num_actors, **kwargs):
280280
register('ENVPOOL', lambda config_name, num_actors, **kwargs: Envpool(config_name, num_actors, **kwargs))
281281

282282
from rl_games.envs.cule import CuleEnv
283-
register('CULE', lambda config_name, num_actors, **kwargs: CuleEnv(config_name, num_actors, **kwargs))
283+
register('CULE', lambda config_name, num_actors, **kwargs: CuleEnv(config_name, num_actors, **kwargs))
284+
285+
from rl_games.envs.maniskill import ManiskillEnv
286+
register('MANISKILL', lambda config_name, num_actors, **kwargs: ManiskillEnv(config_name, num_actors, **kwargs))

rl_games/common/wrappers.py

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,10 @@ def __init__(self, env):
635635
self.observation_space = self.convert_space(env.observation_space)
636636
self.action_space = self.convert_space(env.action_space)
637637

638-
def convert_space(self, space):
638+
639+
# static function to convert Gymnasium spaces to Gym spaces
640+
@staticmethod
641+
def convert_space(space):
639642
"""Recursively convert Gymnasium spaces to Gym spaces."""
640643
if isinstance(space, gymnasium.spaces.Box):
641644
return gym.spaces.Box(
@@ -651,9 +654,9 @@ def convert_space(self, space):
651654
elif isinstance(space, gymnasium.spaces.MultiBinary):
652655
return gym.spaces.MultiBinary(n=space.n)
653656
elif isinstance(space, gymnasium.spaces.Tuple):
654-
return gym.spaces.Tuple([self.convert_space(s) for s in space.spaces])
657+
return gym.spaces.Tuple([OldGymWrapper.convert_space(s) for s in space.spaces])
655658
elif isinstance(space, gymnasium.spaces.Dict):
656-
return gym.spaces.Dict({k: self.convert_space(s) for k, s in space.spaces.items()})
659+
return gym.spaces.Dict({k: OldGymWrapper.convert_space(s) for k, s in space.spaces.items()})
657660
else:
658661
raise NotImplementedError(f"Space type {type(space)} is not supported.")
659662

@@ -691,26 +694,6 @@ def render(self, mode='human'):
691694
def close(self):
692695
return self.env.close()
693696

694-
# Example usage:
695-
if __name__ == "__main__":
696-
# Create a MyoSuite environment
697-
env = myosuite.make('myoChallengeDieReorientP2-v0')
698-
699-
# Wrap it with the old Gym-style wrapper
700-
env = OldGymWrapper(env)
701-
702-
# Use the environment as usual
703-
observation = env.reset()
704-
done = False
705-
while not done:
706-
# Sample a random action
707-
action = env.action_space.sample()
708-
# Step the environment
709-
observation, reward, done, info = env.step(action)
710-
# Optionally render the environment
711-
env.render()
712-
env.close()
713-
714697

715698
def make_atari(env_id, timelimit=True, noop_max=0, skip=4, sticky=False, directory=None, **kwargs):
716699
env = gym.make(env_id, **kwargs)
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
params:
2+
seed: 7
3+
4+
#devices: [0, 0]
5+
6+
algo:
7+
name: a2c_continuous
8+
9+
model:
10+
name: continuous_a2c_logstd
11+
12+
network:
13+
name: actor_critic
14+
separate: False
15+
space:
16+
continuous:
17+
mu_activation: None
18+
sigma_activation: None
19+
20+
mu_init:
21+
name: default
22+
sigma_init:
23+
name: const_initializer
24+
val: 0
25+
fixed_sigma: True
26+
mlp:
27+
units: [256, 128, 64]
28+
activation: elu
29+
d2rl: False
30+
31+
initializer:
32+
name: default
33+
regularizer:
34+
name: None
35+
36+
config:
37+
name: Ant_Maniskill
38+
full_experiment_name: Ant_Maniskill
39+
env_name: maniskill
40+
multi_gpu: False
41+
mixed_precision: True
42+
normalize_input: True
43+
normalize_value: True
44+
normalize_advantage: True
45+
use_smooth_clamp: False
46+
reward_shaper:
47+
scale_value: 1.0
48+
gamma: 0.99
49+
tau: 0.95
50+
learning_rate: 3e-4
51+
lr_schedule: adaptive
52+
kl_threshold: 0.008
53+
score_to_win: 20000
54+
max_epochs: 1000
55+
save_best_after: 100
56+
save_frequency: 50
57+
grad_norm: 1.0
58+
entropy_coef: 0.0
59+
truncate_grads: True
60+
e_clip: 0.2
61+
horizon_length: 8
62+
num_actors: 1024
63+
minibatch_size: 4096
64+
mini_epochs: 5
65+
critic_coef: 2
66+
clip_value: True
67+
bounds_loss_coef: 0.0001
68+
69+
env_config:
70+
env_name: MS-AntRun-v1
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
params:
2+
seed: 7
3+
4+
#devices: [0, 0]
5+
6+
algo:
7+
name: a2c_continuous
8+
9+
model:
10+
name: continuous_a2c_logstd
11+
12+
network:
13+
name: actor_critic
14+
separate: False
15+
space:
16+
continuous:
17+
mu_activation: None
18+
sigma_activation: None
19+
mu_init:
20+
name: default
21+
scale: 0.02
22+
sigma_init:
23+
name: const_initializer
24+
val: 0
25+
fixed_sigma: True
26+
27+
cnn:
28+
permute_input: True
29+
type: conv2d
30+
activation: relu
31+
initializer:
32+
name: orthogonal_initializer
33+
gain: 1.41421356237
34+
convs:
35+
- filters: 32
36+
kernel_size: 8
37+
strides: 4
38+
padding: 0
39+
- filters: 64
40+
kernel_size: 4
41+
strides: 2
42+
padding: 0
43+
- filters: 64
44+
kernel_size: 3
45+
strides: 1
46+
padding: 0
47+
48+
mlp:
49+
units: [256]
50+
activation: relu
51+
initializer:
52+
name: orthogonal_initializer
53+
gain: 1.41421356237
54+
rnn:
55+
name: 'lstm'
56+
units: 512
57+
layers: 1
58+
before_mlp: True
59+
layer_norm: True
60+
config:
61+
name: PickCube_RGB_Maniskill
62+
full_experiment_name: PickCube_RGB_Maniskill
63+
env_name: maniskill
64+
multi_gpu: False
65+
mixed_precision: True
66+
normalize_input: True
67+
normalize_value: True
68+
normalize_advantage: True
69+
use_smooth_clamp: False
70+
reward_shaper:
71+
scale_value: 1.0
72+
gamma: 0.99
73+
tau: 0.95
74+
learning_rate: 1e-4
75+
lr_schedule: None #adaptive
76+
kl_threshold: 0.008
77+
score_to_win: 20000
78+
max_epochs: 5000
79+
save_best_after: 100
80+
save_frequency: 50
81+
grad_norm: 1.0
82+
entropy_coef: 0.0
83+
truncate_grads: True
84+
e_clip: 0.2
85+
horizon_length: 8
86+
num_actors: 512
87+
minibatch_size: 2048
88+
mini_epochs: 2
89+
critic_coef: 2
90+
clip_value: True
91+
bounds_loss_coef: 0.0001
92+
93+
env_config:
94+
# look at the https://maniskill.readthedocs.io/en/latest/user_guide/concepts/observation.html
95+
env_name: PickCube-v1
96+
obs_mode: "rgbd"
97+
control_mode: "pd_ee_delta_pose" # there is also "pd_joint_delta_pos", ..
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
params:
2+
seed: 7
3+
4+
#devices: [0, 0]
5+
6+
algo:
7+
name: a2c_continuous
8+
9+
model:
10+
name: continuous_a2c_logstd
11+
12+
network:
13+
name: actor_critic
14+
separate: False
15+
space:
16+
continuous:
17+
mu_activation: None
18+
sigma_activation: None
19+
20+
mu_init:
21+
name: default
22+
sigma_init:
23+
name: const_initializer
24+
val: 0
25+
fixed_sigma: True
26+
mlp:
27+
units: [256, 128, 64]
28+
activation: elu
29+
d2rl: False
30+
31+
initializer:
32+
name: default
33+
regularizer:
34+
name: None
35+
36+
config:
37+
name: PickCube_State_Maniskill
38+
full_experiment_name: PickCube_State_Maniskill
39+
env_name: maniskill
40+
multi_gpu: False
41+
mixed_precision: True
42+
normalize_input: True
43+
normalize_value: True
44+
normalize_advantage: True
45+
use_smooth_clamp: False
46+
reward_shaper:
47+
scale_value: 1.0
48+
gamma: 0.99
49+
tau: 0.95
50+
learning_rate: 3e-4
51+
lr_schedule: adaptive
52+
kl_threshold: 0.008
53+
score_to_win: 20000
54+
max_epochs: 1000
55+
save_best_after: 100
56+
save_frequency: 50
57+
grad_norm: 1.0
58+
entropy_coef: 0.0
59+
truncate_grads: True
60+
e_clip: 0.2
61+
horizon_length: 8
62+
num_actors: 1024
63+
minibatch_size: 4096
64+
mini_epochs: 5
65+
critic_coef: 2
66+
clip_value: True
67+
bounds_loss_coef: 0.0001
68+
69+
env_config:
70+
env_name: PickCube-v1
71+
obs_mode: "state" # there is also "state_dict", "rgbd", ...
72+
control_mode: "pd_ee_delta_pose" # there is also "pd_joint_delta_pos", ..

rl_games/envs/maniskill.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import rl_games.common.wrappers as wrappers
2+
from rl_games.common.ivecenv import IVecEnv
3+
4+
# wrap your vector env so it resets for you under the hood
5+
from gymnasium import spaces
6+
7+
def remove_batch_dim(space: spaces.Space) -> spaces.Space:
8+
"""Recursively remove the first (batch) dimension from a Gym space."""
9+
if isinstance(space, spaces.Box):
10+
# assume shape = (B, *shape); drop the 0th index
11+
low = space.low[0]
12+
high = space.high[0]
13+
return spaces.Box(low=low, high=high, dtype=space.dtype)
14+
elif isinstance(space, spaces.MultiDiscrete):
15+
# assume nvec = (B, n); take first row
16+
nvec = space.nvec[0]
17+
return spaces.MultiDiscrete(nvec)
18+
elif isinstance(space, spaces.MultiBinary):
19+
# n can be int or array-like
20+
n = space.n[0] if hasattr(space.n, "__len__") else space.n
21+
return spaces.MultiBinary(n)
22+
elif isinstance(space, spaces.Discrete):
23+
# Discrete spaces have no extra dims
24+
return space
25+
elif isinstance(space, spaces.Tuple):
26+
return spaces.Tuple(tuple(remove_batch_dim(s) for s in space.spaces))
27+
elif isinstance(space, spaces.Dict):
28+
return spaces.Dict({k: remove_batch_dim(s) for k, s in space.spaces.items()})
29+
else:
30+
raise ValueError(f"Unsupported space type: {type(space)}")
31+
32+
class ManiskillEnv(IVecEnv):
33+
def __init__(self, config_name, num_actors, **kwargs):
34+
import gymnasium
35+
import mani_skill.envs
36+
from mani_skill.vector.wrappers.gymnasium import ManiSkillVectorEnv
37+
from mani_skill.utils.wrappers.flatten import FlattenRGBDObservationWrapper
38+
self.batch_size = num_actors
39+
env_name=kwargs.pop('env_name')
40+
self.seed = kwargs.pop('seed', 0) # not sure how to set this in mani_skill
41+
self.env = gymnasium.make(
42+
env_name,
43+
num_envs=num_actors,
44+
**kwargs
45+
)
46+
#self.env = FlattenRGBDObservationWrapper(self.env, rgb=True, depth=False, state=False, sep_depth=False)
47+
# need to use this wrapper to have automatic reset for done envs
48+
self.env = ManiSkillVectorEnv(self.env)
49+
50+
print(f"ManiSkill env: {env_name} with {num_actors} actors")
51+
print(f"Original observation space: {self.env.observation_space}")
52+
self.action_space = wrappers.OldGymWrapper.convert_space(remove_batch_dim(self.env.action_space))
53+
self.observation_space = wrappers.OldGymWrapper.convert_space(remove_batch_dim(self.env.observation_space))
54+
print(f"Converted action space: {self.action_space}")
55+
print(f"Converted observation space: {self.observation_space}")
56+
57+
58+
59+
60+
def step(self, action):
61+
next_obs, reward, done, truncated, info = self.env.step(action)
62+
is_done = done | truncated
63+
info['time_outs'] = truncated
64+
return next_obs, reward, is_done, info
65+
66+
def reset(self):
67+
obs, _ = self.env.reset()
68+
return obs
69+
70+
def get_number_of_agents(self):
71+
return 1
72+
73+
def get_env_info(self):
74+
info = {}
75+
info['action_space'] = self.action_space
76+
info['observation_space'] = self.observation_space
77+
return info
78+
79+
80+
def create_maniskill_env(config_name, num_actors, **kwargs):
81+
return ManiskillEnv(config_name, num_actors, **kwargs)

0 commit comments

Comments
 (0)