added one more example

DenSumy · DenSumy · commit c8574f2a3f20 · 2025-04-19T21:05:54.000-07:00
diff --git a/rl_games/configs/maniskill/ppo_pick_cube_rgbd_NOT_WORKING_YET.yaml b/rl_games/configs/maniskill/ppo_pick_cube_rgbd_NOT_WORKING_YET.yaml
@@ -0,0 +1,97 @@
+params:
+  seed: 7
+
+  #devices: [0, 0]
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+          scale: 0.02
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+
+    cnn:
+      permute_input: True
+      type: conv2d
+      activation: relu
+      initializer:
+        name: orthogonal_initializer
+        gain: 1.41421356237 
+      convs:    
+        - filters: 32
+          kernel_size: 8
+          strides: 4
+          padding: 0
+        - filters: 64
+          kernel_size: 4
+          strides: 2
+          padding: 0
+        - filters: 64
+          kernel_size: 3
+          strides: 1
+          padding: 0
+      
+    mlp:
+      units: [256]
+      activation: relu
+      initializer:
+        name: orthogonal_initializer
+        gain: 1.41421356237
+    rnn:
+      name: 'lstm'
+      units: 512
+      layers: 1
+      before_mlp: True
+      layer_norm: True
+  config:
+    name: PickCube_RGB_Maniskill
+    full_experiment_name: PickCube_RGB_Maniskill
+    env_name: maniskill
+    multi_gpu: False
+    mixed_precision: True
+    normalize_input: True
+    normalize_value: True
+    normalize_advantage: True
+    use_smooth_clamp: False
+    reward_shaper:
+      scale_value: 1.0
+    gamma: 0.99
+    tau: 0.95
+    learning_rate: 1e-4
+    lr_schedule: None #adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 5000
+    save_best_after: 100
+    save_frequency: 50
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 8
+    num_actors: 512
+    minibatch_size: 2048
+    mini_epochs: 2
+    critic_coef: 2
+    clip_value: True
+    bounds_loss_coef: 0.0001
+
+    env_config:
+    # look at the https://maniskill.readthedocs.io/en/latest/user_guide/concepts/observation.html
+      env_name: PickCube-v1
+      obs_mode: "rgbd" 
+      control_mode: "pd_ee_delta_pose" # there is also "pd_joint_delta_pos", ..
diff --git a/rl_games/configs/maniskill/ppo_pick_cube_state.yaml b/rl_games/configs/maniskill/ppo_pick_cube_state.yaml
@@ -0,0 +1,72 @@
+params:
+  seed: 7
+
+  #devices: [0, 0]
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [256, 128, 64]
+      activation: elu
+      d2rl: False
+      
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  config:
+    name: PickCube_State_Maniskill
+    full_experiment_name: PickCube_State_Maniskill
+    env_name: maniskill
+    multi_gpu: False
+    mixed_precision: True
+    normalize_input: True
+    normalize_value: True
+    normalize_advantage: True
+    use_smooth_clamp: False
+    reward_shaper:
+      scale_value: 1.0
+    gamma: 0.99
+    tau: 0.95
+    learning_rate: 3e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    score_to_win: 20000
+    max_epochs: 1000
+    save_best_after: 100
+    save_frequency: 50
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 8
+    num_actors: 1024
+    minibatch_size: 4096
+    mini_epochs: 5
+    critic_coef: 2
+    clip_value: True
+    bounds_loss_coef: 0.0001
+
+    env_config:
+      env_name: PickCube-v1
+      obs_mode: "state" # there is also "state_dict", "rgbd", ...
+      control_mode: "pd_ee_delta_pose" # there is also "pd_joint_delta_pos", ..
diff --git a/rl_games/envs/maniskill.py b/rl_games/envs/maniskill.py
@@ -34,20 +34,25 @@ def __init__(self, config_name, num_actors,  **kwargs):
         import gymnasium
         import mani_skill.envs
         from mani_skill.vector.wrappers.gymnasium import ManiSkillVectorEnv
+        from mani_skill.utils.wrappers.flatten import FlattenRGBDObservationWrapper
         self.batch_size = num_actors
         env_name=kwargs.pop('env_name')
         self.seed = kwargs.pop('seed', 0) # not sure how to set this in mani_skill
-        env = gymnasium.make(
+        self.env = gymnasium.make(
             env_name,
             num_envs=num_actors,
             **kwargs
         )
+        #self.env = FlattenRGBDObservationWrapper(self.env, rgb=True, depth=False, state=False, sep_depth=False)
         # need to use this wrapper to have automatic reset for done envs
-        self.env = ManiSkillVectorEnv(env)
+        self.env = ManiSkillVectorEnv(self.env)
+        
+        print(f"ManiSkill env: {env_name} with {num_actors} actors")
+        print(f"Original observation space: {self.env.observation_space}")
         self.action_space = wrappers.OldGymWrapper.convert_space(remove_batch_dim(self.env.action_space))
         self.observation_space = wrappers.OldGymWrapper.convert_space(remove_batch_dim(self.env.observation_space))
-        
-        # remove first batch dim from obs spacce and action space
+        print(f"Converted action space: {self.action_space}")
+        print(f"Converted observation space: {self.observation_space}")