[ISSUE-322]added maniskill Ant demo (#323)

Denys88 · DenSumy · web-flow · commit 51ac9aa2981b · 2025-05-03T15:04:43.000-07:00
* added maniskill nat demo

* added one more example

---------

Co-authored-by: Denys Makoviichuk &lt;trrrrr97@gmail.com&gt;
diff --git a/rl_games/common/env_configurations.py b/rl_games/common/env_configurations.py
@@ -425,6 +425,10 @@ def create_env(name, **kwargs):
         'env_creator': lambda **kwargs: create_brax_env(**kwargs),
         'vecenv_type': 'BRAX' 
     },
+    'maniskill' : {
+        'env_creator': lambda **kwargs: create_brax_env(**kwargs),
+        'vecenv_type': 'MANISKILL' 
+    },
     'envpool': {
         'env_creator': lambda **kwargs: create_envpool(**kwargs),
         'vecenv_type': 'ENVPOOL'
diff --git a/rl_games/common/vecenv.py b/rl_games/common/vecenv.py
@@ -280,4 +280,7 @@ def create_vec_env(config_name, num_actors, **kwargs):
 register('ENVPOOL', lambda config_name, num_actors, **kwargs: Envpool(config_name, num_actors, **kwargs))
 
 from rl_games.envs.cule import CuleEnv
-register('CULE', lambda config_name, num_actors, **kwargs: CuleEnv(config_name, num_actors, **kwargs))
+register('CULE', lambda config_name, num_actors, **kwargs: CuleEnv(config_name, num_actors, **kwargs))
+
+from rl_games.envs.maniskill import ManiskillEnv
+register('MANISKILL', lambda config_name, num_actors, **kwargs: ManiskillEnv(config_name, num_actors, **kwargs))
diff --git a/rl_games/common/wrappers.py b/rl_games/common/wrappers.py
@@ -635,7 +635,10 @@ def __init__(self, env):
         self.observation_space = self.convert_space(env.observation_space)
         self.action_space = self.convert_space(env.action_space)
 
-    def convert_space(self, space):
+
+    # static function to convert Gymnasium spaces to Gym spaces
+    @staticmethod
+    def convert_space(space):
         """Recursively convert Gymnasium spaces to Gym spaces."""
         if isinstance(space, gymnasium.spaces.Box):
             return gym.spaces.Box(
@@ -651,9 +654,9 @@ def convert_space(self, space):
         elif isinstance(space, gymnasium.spaces.MultiBinary):
             return gym.spaces.MultiBinary(n=space.n)
         elif isinstance(space, gymnasium.spaces.Tuple):
-            return gym.spaces.Tuple([self.convert_space(s) for s in space.spaces])
+            return gym.spaces.Tuple([OldGymWrapper.convert_space(s) for s in space.spaces])
         elif isinstance(space, gymnasium.spaces.Dict):
-            return gym.spaces.Dict({k: self.convert_space(s) for k, s in space.spaces.items()})
+            return gym.spaces.Dict({k: OldGymWrapper.convert_space(s) for k, s in space.spaces.items()})
         else:
             raise NotImplementedError(f"Space type {type(space)} is not supported.")
 
@@ -691,26 +694,6 @@ def render(self, mode='human'):
     def close(self):
         return self.env.close()
 
-# Example usage:
-if __name__ == "__main__":
-    # Create a MyoSuite environment
-    env = myosuite.make('myoChallengeDieReorientP2-v0')
-
-    # Wrap it with the old Gym-style wrapper
-    env = OldGymWrapper(env)
-
-    # Use the environment as usual
-    observation = env.reset()
-    done = False
-    while not done:
-        # Sample a random action
-        action = env.action_space.sample()
-        # Step the environment
-        observation, reward, done, info = env.step(action)
-        # Optionally render the environment
-        env.render()
-    env.close()
-
 
 def make_atari(env_id, timelimit=True, noop_max=0, skip=4, sticky=False, directory=None, **kwargs):
     env = gym.make(env_id, **kwargs)
diff --git a/rl_games/configs/maniskill/ppo_ant.yaml b/rl_games/configs/maniskill/ppo_ant.yaml
@@ -0,0 +1,70 @@
+params:
+  seed: 7
+
+  #devices: [0, 0]
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [256, 128, 64]
+      activation: elu
+      d2rl: False
+      
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  config:
+    name: Ant_Maniskill
+    full_experiment_name: Ant_Maniskill
+    env_name: maniskill
+    multi_gpu: False
+    mixed_precision: True
+    normalize_input: True
+    normalize_value: True
+    normalize_advantage: True
+    use_smooth_clamp: False
+    reward_shaper:
+      scale_value: 1.0
+    gamma: 0.99
+    tau: 0.95
+    learning_rate: 3e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 1000
+    save_best_after: 100
+    save_frequency: 50
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 8
+    num_actors: 1024
+    minibatch_size: 4096
+    mini_epochs: 5
+    critic_coef: 2
+    clip_value: True
+    bounds_loss_coef: 0.0001
+
+    env_config:
+      env_name: MS-AntRun-v1
diff --git a/rl_games/configs/maniskill/ppo_pick_cube_rgbd_NOT_WORKING_YET.yaml b/rl_games/configs/maniskill/ppo_pick_cube_rgbd_NOT_WORKING_YET.yaml
@@ -0,0 +1,97 @@
+params:
+  seed: 7
+
+  #devices: [0, 0]
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+          scale: 0.02
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+
+    cnn:
+      permute_input: True
+      type: conv2d
+      activation: relu
+      initializer:
+        name: orthogonal_initializer
+        gain: 1.41421356237 
+      convs:    
+        - filters: 32
+          kernel_size: 8
+          strides: 4
+          padding: 0
+        - filters: 64
+          kernel_size: 4
+          strides: 2
+          padding: 0
+        - filters: 64
+          kernel_size: 3
+          strides: 1
+          padding: 0
+      
+    mlp:
+      units: [256]
+      activation: relu
+      initializer:
+        name: orthogonal_initializer
+        gain: 1.41421356237
+    rnn:
+      name: 'lstm'
+      units: 512
+      layers: 1
+      before_mlp: True
+      layer_norm: True
+  config:
+    name: PickCube_RGB_Maniskill
+    full_experiment_name: PickCube_RGB_Maniskill
+    env_name: maniskill
+    multi_gpu: False
+    mixed_precision: True
+    normalize_input: True
+    normalize_value: True
+    normalize_advantage: True
+    use_smooth_clamp: False
+    reward_shaper:
+      scale_value: 1.0
+    gamma: 0.99
+    tau: 0.95
+    learning_rate: 1e-4
+    lr_schedule: None #adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 5000
+    save_best_after: 100
+    save_frequency: 50
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 8
+    num_actors: 512
+    minibatch_size: 2048
+    mini_epochs: 2
+    critic_coef: 2
+    clip_value: True
+    bounds_loss_coef: 0.0001
+
+    env_config:
+    # look at the https://maniskill.readthedocs.io/en/latest/user_guide/concepts/observation.html
+      env_name: PickCube-v1
+      obs_mode: "rgbd" 
+      control_mode: "pd_ee_delta_pose" # there is also "pd_joint_delta_pos", ..
diff --git a/rl_games/configs/maniskill/ppo_pick_cube_state.yaml b/rl_games/configs/maniskill/ppo_pick_cube_state.yaml
@@ -0,0 +1,72 @@
+params:
+  seed: 7
+
+  #devices: [0, 0]
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [256, 128, 64]
+      activation: elu
+      d2rl: False
+      
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  config:
+    name: PickCube_State_Maniskill
+    full_experiment_name: PickCube_State_Maniskill
+    env_name: maniskill
+    multi_gpu: False
+    mixed_precision: True
+    normalize_input: True
+    normalize_value: True
+    normalize_advantage: True
+    use_smooth_clamp: False
+    reward_shaper:
+      scale_value: 1.0
+    gamma: 0.99
+    tau: 0.95
+    learning_rate: 3e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 1000
+    save_best_after: 100
+    save_frequency: 50
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 8
+    num_actors: 1024
+    minibatch_size: 4096
+    mini_epochs: 5
+    critic_coef: 2
+    clip_value: True
+    bounds_loss_coef: 0.0001
+
+    env_config:
+      env_name: PickCube-v1
+      obs_mode: "state" # there is also "state_dict", "rgbd", ...
+      control_mode: "pd_ee_delta_pose" # there is also "pd_joint_delta_pos", ..
diff --git a/rl_games/envs/maniskill.py b/rl_games/envs/maniskill.py
@@ -0,0 +1,81 @@
+import rl_games.common.wrappers as wrappers
+from rl_games.common.ivecenv import IVecEnv
+
+# wrap your vector env so it resets for you under the hood
+from gymnasium import spaces
+
+def remove_batch_dim(space: spaces.Space) -> spaces.Space:
+    """Recursively remove the first (batch) dimension from a Gym space."""
+    if isinstance(space, spaces.Box):
+        # assume shape = (B, *shape); drop the 0th index
+        low  = space.low[0]
+        high = space.high[0]
+        return spaces.Box(low=low, high=high, dtype=space.dtype)
+    elif isinstance(space, spaces.MultiDiscrete):
+        # assume nvec = (B, n); take first row
+        nvec = space.nvec[0]
+        return spaces.MultiDiscrete(nvec)
+    elif isinstance(space, spaces.MultiBinary):
+        # n can be int or array-like
+        n = space.n[0] if hasattr(space.n, "__len__") else space.n
+        return spaces.MultiBinary(n)
+    elif isinstance(space, spaces.Discrete):
+        # Discrete spaces have no extra dims
+        return space
+    elif isinstance(space, spaces.Tuple):
+        return spaces.Tuple(tuple(remove_batch_dim(s) for s in space.spaces))
+    elif isinstance(space, spaces.Dict):
+        return spaces.Dict({k: remove_batch_dim(s) for k, s in space.spaces.items()})
+    else:
+        raise ValueError(f"Unsupported space type: {type(space)}")
+
+class ManiskillEnv(IVecEnv):
+    def __init__(self, config_name, num_actors,  **kwargs):
+        import gymnasium
+        import mani_skill.envs
+        from mani_skill.vector.wrappers.gymnasium import ManiSkillVectorEnv
+        from mani_skill.utils.wrappers.flatten import FlattenRGBDObservationWrapper
+        self.batch_size = num_actors
+        env_name=kwargs.pop('env_name')
+        self.seed = kwargs.pop('seed', 0) # not sure how to set this in mani_skill
+        self.env = gymnasium.make(
+            env_name,
+            num_envs=num_actors,
+            **kwargs
+        )
+        #self.env = FlattenRGBDObservationWrapper(self.env, rgb=True, depth=False, state=False, sep_depth=False)
+        # need to use this wrapper to have automatic reset for done envs
+        self.env = ManiSkillVectorEnv(self.env)
+        
+        print(f"ManiSkill env: {env_name} with {num_actors} actors")
+        print(f"Original observation space: {self.env.observation_space}")
+        self.action_space = wrappers.OldGymWrapper.convert_space(remove_batch_dim(self.env.action_space))
+        self.observation_space = wrappers.OldGymWrapper.convert_space(remove_batch_dim(self.env.observation_space))
+        print(f"Converted action space: {self.action_space}")
+        print(f"Converted observation space: {self.observation_space}")
+
+
+
+
+    def step(self, action):
+        next_obs, reward, done, truncated, info = self.env.step(action)
+        is_done = done | truncated
+        info['time_outs'] = truncated
+        return next_obs, reward, is_done, info
+
+    def reset(self):
+        obs, _ = self.env.reset()
+        return obs
+
+    def get_number_of_agents(self):
+        return 1
+
+    def get_env_info(self):
+        info = {}
+        info['action_space'] = self.action_space
+        info['observation_space'] = self.observation_space
+        return info
+
+
+def create_maniskill_env(config_name, num_actors, **kwargs):
+    return ManiskillEnv(config_name, num_actors, **kwargs)