Update

Vincent Moens · Vincent Moens · commit 44ec09b2b64c · 2025-02-11T12:43:14.000Z
[ghstack-poisoned]
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -13456,11 +13456,14 @@ class TestConditionalSkip(TransformBase):
     @pytest.mark.parametrize("bwad", [False, True])
     def test_single_trans_env_check(self, bwad):
         env = CountingEnv()
-        base_env = TransformedEnv(env, StepCounter(step_count_key="other_count"))
-        env = TransformedEnv(base_env, StepCounter(), auto_unwrap=False)
-        env = env.append_transform(
-            ConditionalSkip(cond=lambda td: td["step_count"] % 2 == 1)
+        base_env = TransformedEnv(
+            env,
+            Compose(
+                StepCounter(step_count_key="other_count"),
+                ConditionalSkip(cond=lambda td: td["step_count"] % 2 == 1),
+            ),
         )
+        env = TransformedEnv(base_env, StepCounter(), auto_unwrap=False)
         env.set_seed(0)
         env.check_env_specs()
         policy = lambda td: td.set("action", torch.ones((1,)))
@@ -13472,13 +13475,16 @@ def test_single_trans_env_check(self, bwad):
     def test_serial_trans_env_check(self, bwad):
         def make_env(i):
             env = CountingEnv()
-            base_env = TransformedEnv(env, StepCounter(step_count_key="other_count"))
-            return TransformedEnv(
-                base_env,
+            base_env = TransformedEnv(
+                env,
                 Compose(
-                    StepCounter(),
+                    StepCounter(step_count_key="other_count"),
                     ConditionalSkip(cond=lambda td, i=i: (td["step_count"] % 2 == i)),
                 ),
+            )
+            return TransformedEnv(
+                base_env,
+                StepCounter(),
                 auto_unwrap=False,
             )
 
@@ -13494,13 +13500,16 @@ def make_env(i):
     def test_parallel_trans_env_check(self, bwad):
         def make_env(i):
             env = CountingEnv()
-            base_env = TransformedEnv(env, StepCounter(step_count_key="other_count"))
-            return TransformedEnv(
-                base_env,
+            base_env = TransformedEnv(
+                env,
                 Compose(
-                    StepCounter(),
+                    StepCounter(step_count_key="other_count"),
                     ConditionalSkip(cond=lambda td, i=i: (td["step_count"] % 2 == i)),
                 ),
+            )
+            return TransformedEnv(
+                base_env,
+                StepCounter(),
                 auto_unwrap=False,
             )
 
@@ -13533,7 +13542,8 @@ def cond(td):
             sc = td["step_count"] + torch.tensor([[0], [1]])
             return sc.squeeze() % 2 == 0
 
-        env = TransformedEnv(base_env, Compose(StepCounter(), ConditionalSkip(cond)))
+        env = TransformedEnv(base_env, ConditionalSkip(cond))
+        env = TransformedEnv(env, StepCounter(), auto_unwrap=False)
         env.check_env_specs()
         policy = lambda td: td.set("action", torch.ones((2, 1)))
         r = env.rollout(10, policy, break_when_any_done=bwad)
@@ -13558,9 +13568,8 @@ def cond(td):
                 sc = td["step_count"] + torch.tensor([[0], [1]])
                 return sc.squeeze() % 2 == 0
 
-            env = TransformedEnv(
-                base_env, Compose(StepCounter(), ConditionalSkip(cond))
-            )
+            env = TransformedEnv(base_env, ConditionalSkip(cond))
+            env = TransformedEnv(env, StepCounter(), auto_unwrap=False)
             env.check_env_specs()
             policy = lambda td: td.set("action", torch.ones((2, 1)))
             r = env.rollout(10, policy, break_when_any_done=bwad)
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -635,7 +635,7 @@ def parent(self) -> Optional[EnvBase]:
                     )
                 parent, _ = container._rebuild_up_to(self)
             elif isinstance(container, TransformedEnv):
-                parent = TransformedEnv(container.base_env)
+                parent = TransformedEnv(container.base_env, auto_unwrap=False)
             else:
                 raise ValueError(f"container is of type {type(container)}")
             self.__dict__["_parent"] = parent
@@ -958,22 +958,22 @@ def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
 
         if next_tensordict is None:
             next_tensordict = self.base_env._step(tensordict_in)
+            if next_preset is not None:
+                # tensordict could already have a "next" key
+                # this could be done more efficiently by not excluding but just passing
+                # the necessary keys
+                next_tensordict.update(
+                    next_preset.exclude(*next_tensordict.keys(True, True))
+                )
+            self.base_env._complete_done(self.base_env.full_done_spec, next_tensordict)
+            # we want the input entries to remain unchanged
+            next_tensordict = self.transform._step(tensordict, next_tensordict)
 
         if partial_steps is not None and tensordict_batch_size != self.batch_size:
             result = next_tensordict.new_zeros(tensordict_batch_size)
             result[partial_steps] = next_tensordict
             next_tensordict = result
 
-        if next_preset is not None:
-            # tensordict could already have a "next" key
-            # this could be done more efficiently by not excluding but just passing
-            # the necessary keys
-            next_tensordict.update(
-                next_preset.exclude(*next_tensordict.keys(True, True))
-            )
-        self.base_env._complete_done(self.base_env.full_done_spec, next_tensordict)
-        # we want the input entries to remain unchanged
-        next_tensordict = self.transform._step(tensordict, next_tensordict)
         return next_tensordict
 
     def set_seed(
@@ -9079,6 +9079,7 @@ class _CallableTransform(Transform):
     # A wrapper around a custom callable to make it possible to transform any data type
     def __init__(self, func):
         super().__init__()
+        raise RuntimeError(isinstance(func, Transform), func)
         self.func = func
 
     def forward(self, *args, **kwargs):
@@ -10266,21 +10267,40 @@ class ConditionalSkip(Transform):
     value in `"_step"` is ``True``. Otherwise, it is trusted that the environment will account for the
     `"_step"` signal accordingly.
 
+    .. note:: The skip will affect transforms that modify the environment output too, i.e., any transform
+        that is to be exectued on the tensordict returned by :meth:`~torchrl.envs.EnvBase.step` will be
+        skipped if the condition is met. To palliate this effect if it is not desirable, one can wrap
+        the transformed env in another transformed env, since the skip only affects the first-degree parent
+        of the ``ConditionalSkip`` transform. See example below.
+
     Args:
         cond (Callable[[TensorDictBase], bool | torch.Tensor]): a callable for the tensordict input
             that checks whether the next env step must be skipped (`True` = skipped, `False` = execute
             env.step).
 
     Examples:
-        >>> from torchrl.envs.transforms.transforms import ConditionalSkip, StepCounter, TransformedEnv
-        >>> from torchrl.envs import GymEnv
         >>> import torch
         >>>
+        >>> from torchrl.envs import GymEnv
+        >>> from torchrl.envs.transforms.transforms import ConditionalSkip, StepCounter, TransformedEnv, Compose
+        >>>
         >>> torch.manual_seed(0)
         >>>
-        >>> base_env = TransformedEnv(GymEnv("Pendulum-v1"), StepCounter(step_count_key="other_count"))
-        >>> env = TransformedEnv(base_env, StepCounter(), auto_unwrap=False)
-        >>> env = env.append_transform(ConditionalSkip(cond=lambda td: td["step_count"] % 2 == 1))
+        >>> base_env = TransformedEnv(
+        ...     GymEnv("Pendulum-v1"),
+        ...     StepCounter(step_count_key="inner_count"),
+        ... )
+        >>> middle_env = TransformedEnv(
+        ...     base_env,
+        ...     Compose(
+        ...         StepCounter(step_count_key="middle_count"),
+        ...         ConditionalSkip(cond=lambda td: td["step_count"] % 2 == 1),
+        ...     ),
+        ...     auto_unwrap=False)  # makes sure that transformed envs are properly wrapped
+        >>> env = TransformedEnv(
+        ...     middle_env,
+        ...     StepCounter(step_count_key="step_count"),
+        ...     auto_unwrap=False)
         >>> env.set_seed(0)
         >>>
         >>> r = env.rollout(10)
@@ -10295,18 +10315,18 @@ class ConditionalSkip(Transform):
                 [-0.9984,  0.0561, -1.7933],
                 [-0.9984,  0.0561, -1.7933],
                 [-0.9895,  0.1445, -1.7779]])
-        >>> print(r["step_count"])
+        >>> print(r["inner_count"])
         tensor([[0],
                 [1],
+                [1],
+                [2],
                 [2],
                 [3],
+                [3],
                 [4],
-                [5],
-                [6],
-                [7],
-                [8],
-                [9]])
-        >>> print(r["other_count"])
+                [4],
+                [5]])
+        >>> print(r["middle_count"])
         tensor([[0],
                 [1],
                 [1],
@@ -10317,6 +10337,18 @@ class ConditionalSkip(Transform):
                 [4],
                 [4],
                 [5]])
+        >>> print(r["step_count"])
+        tensor([[0],
+                [1],
+                [2],
+                [3],
+                [4],
+                [5],
+                [6],
+                [7],
+                [8],
+                [9]])
+
 
     """