[Feature] Transform for partial steps

vmoens · vmoens · commit eec400d19906 · 2025-02-10T16:25:40.000Z
ghstack-source-id: cd6c967ac6d793e078cac90c340942f23ffb16f4 Pull Request resolved: #2777
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -58,6 +58,7 @@
     CenterCrop,
     ClipTransform,
     Compose,
+    ConditionalSkip,
     Crop,
     DeviceCastTransform,
     DiscreteActionProjection,
@@ -13451,6 +13452,159 @@ def test_composite_reward_spec(self) -> None:
         assert transform.transform_reward_spec(reward_spec) == expected_reward_spec
 
 
+class TestConditionalSkip(TransformBase):
+    @pytest.mark.parametrize("bwad", [False, True])
+    def test_single_trans_env_check(self, bwad):
+        env = CountingEnv()
+        base_env = TransformedEnv(env, StepCounter(step_count_key="other_count"))
+        env = TransformedEnv(base_env, StepCounter(), auto_unwrap=False)
+        env = env.append_transform(
+            ConditionalSkip(cond=lambda td: td["step_count"] % 2 == 1)
+        )
+        env.set_seed(0)
+        env.check_env_specs()
+        policy = lambda td: td.set("action", torch.ones((1,)))
+        r = env.rollout(10, policy, break_when_any_done=bwad)
+        assert (r["step_count"] == torch.arange(10).view(10, 1)).all()
+        assert (r["other_count"] == torch.arange(1, 11).view(10, 1) // 2).all()
+
+    @pytest.mark.parametrize("bwad", [False, True])
+    def test_serial_trans_env_check(self, bwad):
+        def make_env(i):
+            env = CountingEnv()
+            base_env = TransformedEnv(env, StepCounter(step_count_key="other_count"))
+            return TransformedEnv(
+                base_env,
+                Compose(
+                    StepCounter(),
+                    ConditionalSkip(cond=lambda td, i=i: (td["step_count"] % 2 == i)),
+                ),
+                auto_unwrap=False,
+            )
+
+        env = SerialEnv(2, [partial(make_env, i=0), partial(make_env, i=1)])
+        env.check_env_specs()
+        policy = lambda td: td.set("action", torch.ones((2, 1)))
+        r = env.rollout(10, policy, break_when_any_done=bwad)
+        assert (r["step_count"] == torch.arange(10).view(10, 1).expand(2, 10, 1)).all()
+        assert (r["other_count"][0] == torch.arange(0, 10).view(10, 1) // 2).all()
+        assert (r["other_count"][1] == torch.arange(1, 11).view(10, 1) // 2).all()
+
+    @pytest.mark.parametrize("bwad", [False, True])
+    def test_parallel_trans_env_check(self, bwad):
+        def make_env(i):
+            env = CountingEnv()
+            base_env = TransformedEnv(env, StepCounter(step_count_key="other_count"))
+            return TransformedEnv(
+                base_env,
+                Compose(
+                    StepCounter(),
+                    ConditionalSkip(cond=lambda td, i=i: (td["step_count"] % 2 == i)),
+                ),
+                auto_unwrap=False,
+            )
+
+        env = ParallelEnv(
+            2, [partial(make_env, i=0), partial(make_env, i=1)], mp_start_method=mp_ctx
+        )
+        try:
+            env.check_env_specs()
+            policy = lambda td: td.set("action", torch.ones((2, 1)))
+            r = env.rollout(10, policy, break_when_any_done=bwad)
+            assert (
+                r["step_count"] == torch.arange(10).view(10, 1).expand(2, 10, 1)
+            ).all()
+            assert (r["other_count"][0] == torch.arange(0, 10).view(10, 1) // 2).all()
+            assert (r["other_count"][1] == torch.arange(1, 11).view(10, 1) // 2).all()
+        finally:
+            env.close()
+            del env
+
+    @pytest.mark.parametrize("bwad", [False, True])
+    def test_trans_serial_env_check(self, bwad):
+        def make_env():
+            env = CountingEnv(max_steps=100)
+            base_env = TransformedEnv(env, StepCounter(step_count_key="other_count"))
+            return base_env
+
+        base_env = SerialEnv(2, [make_env, make_env])
+
+        def cond(td):
+            sc = td["step_count"] + torch.tensor([[0], [1]])
+            return sc.squeeze() % 2 == 0
+
+        env = TransformedEnv(base_env, Compose(StepCounter(), ConditionalSkip(cond)))
+        env.check_env_specs()
+        policy = lambda td: td.set("action", torch.ones((2, 1)))
+        r = env.rollout(10, policy, break_when_any_done=bwad)
+        assert (r["step_count"] == torch.arange(10).view(10, 1).expand(2, 10, 1)).all()
+        assert (r["other_count"][0] == torch.arange(0, 10).view(10, 1) // 2).all()
+        assert (r["other_count"][1] == torch.arange(1, 11).view(10, 1) // 2).all()
+
+    @pytest.mark.parametrize("bwad", [True, False])
+    @pytest.mark.parametrize("buffers", [True, False])
+    def test_trans_parallel_env_check(self, bwad, buffers):
+        def make_env():
+            env = CountingEnv(max_steps=100)
+            base_env = TransformedEnv(env, StepCounter(step_count_key="other_count"))
+            return base_env
+
+        base_env = ParallelEnv(
+            2, [make_env, make_env], mp_start_method=mp_ctx, use_buffers=buffers
+        )
+        try:
+
+            def cond(td):
+                sc = td["step_count"] + torch.tensor([[0], [1]])
+                return sc.squeeze() % 2 == 0
+
+            env = TransformedEnv(
+                base_env, Compose(StepCounter(), ConditionalSkip(cond))
+            )
+            env.check_env_specs()
+            policy = lambda td: td.set("action", torch.ones((2, 1)))
+            r = env.rollout(10, policy, break_when_any_done=bwad)
+            assert (
+                r["step_count"] == torch.arange(10).view(10, 1).expand(2, 10, 1)
+            ).all()
+            assert (r["other_count"][0] == torch.arange(0, 10).view(10, 1) // 2).all()
+            assert (r["other_count"][1] == torch.arange(1, 11).view(10, 1) // 2).all()
+        finally:
+            base_env.close()
+            del base_env
+
+    def test_transform_no_env(self):
+        t = ConditionalSkip(lambda td: torch.arange(td.numel()).view(td.shape) % 2 == 0)
+        assert not t._inv_call(TensorDict())["_step"]
+        assert t._inv_call(TensorDict())["_step"].shape == ()
+        assert t._inv_call(TensorDict(batch_size=(2, 3)))["_step"].shape == (2, 3)
+
+    def test_transform_compose(self):
+        t = Compose(
+            ConditionalSkip(lambda td: torch.arange(td.numel()).view(td.shape) % 2 == 0)
+        )
+        assert not t._inv_call(TensorDict())["_step"]
+        assert t._inv_call(TensorDict())["_step"].shape == ()
+        assert t._inv_call(TensorDict(batch_size=(2, 3)))["_step"].shape == (2, 3)
+
+    def test_transform_env(self):
+        # tested above
+        return
+
+    def test_transform_model(self):
+        t = Compose(
+            ConditionalSkip(lambda td: torch.arange(td.numel()).view(td.shape) % 2 == 0)
+        )
+        with pytest.raises(NotImplementedError):
+            t(TensorDict())["_step"]
+
+    def test_transform_rb(self):
+        return
+
+    def test_transform_inverse(self):
+        return
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
diff --git a/torchrl/envs/__init__.py b/torchrl/envs/__init__.py
@@ -56,6 +56,7 @@
     CenterCrop,
     ClipTransform,
     Compose,
+    ConditionalSkip,
     Crop,
     DeviceCastTransform,
     DiscreteActionProjection,
diff --git a/torchrl/envs/batched_envs.py b/torchrl/envs/batched_envs.py
@@ -1089,7 +1089,7 @@ def _step(
         self,
         tensordict: TensorDict,
     ) -> TensorDict:
-        partial_steps = tensordict.get("_step", None)
+        partial_steps = tensordict.get("_step")
         tensordict_save = tensordict
         if partial_steps is not None and partial_steps.all():
             partial_steps = None
@@ -1164,6 +1164,10 @@ def select_and_clone(name, tensor):
 
         if partial_steps is not None:
             result = out.new_zeros(tensordict_save.shape)
+            # Copy the observation data from the previous step as placeholder
+            result.update(
+                tensordict_save.select(*result.keys(True, True), strict=False).clone()
+            )
             result[partial_steps] = out
             return result
 
@@ -1529,6 +1533,9 @@ def _step_and_maybe_reset_no_buffers(
         )
         if partial_steps is not None:
             result = out.new_zeros(tensordict_save.shape)
+            result.update(
+                tensordict_save.select(*result.keys(True, True), strict=False).clone()
+            )
             result[partial_steps] = out
             return result
         return out
@@ -1543,7 +1550,7 @@ def step_and_maybe_reset(
             # return self._step_and_maybe_reset_no_buffers(tensordict)
             return super().step_and_maybe_reset(tensordict)
 
-        partial_steps = tensordict.get("_step", None)
+        partial_steps = tensordict.get("_step")
         tensordict_save = tensordict
         if partial_steps is not None and partial_steps.all():
             partial_steps = None
@@ -1661,6 +1668,14 @@ def step_and_maybe_reset(
         if partial_steps is not None:
             result = tensordict.new_zeros(tensordict_save.shape)
             result_ = tensordict_.new_zeros(tensordict_save.shape)
+
+            result.update(
+                tensordict_save.select(*result.keys(True, True), strict=False).clone()
+            )
+            result_.update(
+                tensordict_save.select(*result_.keys(True, True), strict=False).clone()
+            )
+
             result[partial_steps] = tensordict
             result_[partial_steps] = tensordict_
             return result, result_
@@ -1700,7 +1715,7 @@ def _wait_for_workers(self, workers_range):
     def _step_no_buffers(
         self, tensordict: TensorDictBase
     ) -> Tuple[TensorDictBase, TensorDictBase]:
-        partial_steps = tensordict.get("_step", None)
+        partial_steps = tensordict.get("_step")
         tensordict_save = tensordict
         if partial_steps is not None and partial_steps.all():
             partial_steps = None
@@ -1727,6 +1742,9 @@ def _step_no_buffers(
             out = out.to(self.device, non_blocking=self.non_blocking)
         if partial_steps is not None:
             result = out.new_zeros(tensordict_save.shape)
+            result.update(
+                tensordict_save.select(*result.keys(True, True), strict=False).clone()
+            )
             result[partial_steps] = out
             return result
         return out
@@ -1744,7 +1762,7 @@ def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
         #   and this transform overrides an observation key (eg, CatFrames)
         #   the shape, dtype or device may not necessarily match and writing
         #   the value in-place will fail.
-        partial_steps = tensordict.get("_step", None)
+        partial_steps = tensordict.get("_step")
         tensordict_save = tensordict
         if partial_steps is not None and partial_steps.all():
             partial_steps = None
@@ -1875,6 +1893,9 @@ def select_and_clone(name, tensor):
         self._sync_w2m()
         if partial_steps is not None:
             result = out.new_zeros(tensordict_save.shape)
+            result.update(
+                tensordict_save.select(*result.keys(True, True), strict=False).clone()
+            )
             result[partial_steps] = out
             return result
         return out
diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
@@ -1933,6 +1933,17 @@ def state_spec_unbatched(self, spec: Composite):
         spec = spec.expand(self.batch_size + spec.shape)
         self.state_spec = spec
 
+    def _skip_tensordict(self, tensordict):
+        # Creates a "skip" tensordict, ie a placeholder for when a step is skipped
+        next_tensordict = self.full_done_spec.zero()
+        next_tensordict.update(self.full_observation_spec.zero())
+        next_tensordict.update(self.full_reward_spec.zero())
+        # Copy the data from tensordict in `next`
+        next_tensordict.update(
+            tensordict.select(*next_tensordict.keys(True, True), strict=False)
+        )
+        return next_tensordict
+
     def step(self, tensordict: TensorDictBase) -> TensorDictBase:
         """Makes a step in the environment.
 
@@ -1953,25 +1964,33 @@ def step(self, tensordict: TensorDictBase) -> TensorDictBase:
         """
         # sanity check
         self._assert_tensordict_shape(tensordict)
-        partial_steps = None
+        partial_steps = tensordict.pop("_step", None)
 
-        if not self.batch_locked:
-            # Batched envs have their own way of dealing with this - batched envs that are not batched-locked may fail here
-            partial_steps = tensordict.get("_step", None)
-            if partial_steps is not None:
+        next_tensordict = None
+
+        if partial_steps is not None:
+            if not self.batch_locked:
+                # Batched envs have their own way of dealing with this - batched envs that are not batched-locked may fail here
                 if partial_steps.all():
                     partial_steps = None
                 else:
                     tensordict_batch_size = tensordict.batch_size
                     partial_steps = partial_steps.view(tensordict_batch_size)
                     tensordict = tensordict[partial_steps]
-        else:
+            else:
+                if not partial_steps.any():
+                    next_tensordict = self._skip_tensordic(tensordict)
+                else:
+                    # trust that the _step can handle this!
+                    tensordict.set("_step", partial_steps)
+
             tensordict_batch_size = self.batch_size
 
         next_preset = tensordict.get("next", None)
 
-        next_tensordict = self._step(tensordict)
-        next_tensordict = self._step_proc_data(next_tensordict)
+        if next_tensordict is None:
+            next_tensordict = self._step(tensordict)
+            next_tensordict = self._step_proc_data(next_tensordict)
         if next_preset is not None:
             # tensordict could already have a "next" key
             # this could be done more efficiently by not excluding but just passing
@@ -1980,7 +1999,7 @@ def step(self, tensordict: TensorDictBase) -> TensorDictBase:
                 next_preset.exclude(*next_tensordict.keys(True, True))
             )
         tensordict.set("next", next_tensordict)
-        if partial_steps is not None:
+        if partial_steps is not None and tensordict_batch_size != self.batch_size:
             result = tensordict.new_zeros(tensordict_batch_size)
             result[partial_steps] = tensordict
             return result
diff --git a/torchrl/envs/transforms/__init__.py b/torchrl/envs/transforms/__init__.py
@@ -20,6 +20,7 @@
     CenterCrop,
     ClipTransform,
     Compose,
+    ConditionalSkip,
     Crop,
     DeviceCastTransform,
     DiscreteActionProjection,
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py