pytorch
diff --git a/‎.github/workflows/test-linux.yml
Lines changed: 0 additions & 32 deletions b/‎.github/workflows/test-linux.yml
Lines changed: 0 additions & 32 deletions
diff --git a/‎docs/source/reference/envs.rst
Lines changed: 78 additions & 0 deletions b/‎docs/source/reference/envs.rst
Lines changed: 78 additions & 0 deletions
diff --git a/‎test/mocking_classes.py
Lines changed: 94 additions & 15 deletions b/‎test/mocking_classes.py
Lines changed: 94 additions & 15 deletions
diff --git a/‎test/test_env.py
Lines changed: 47 additions & 35 deletions b/‎test/test_env.py
Lines changed: 47 additions & 35 deletions
@@ -53,38 +53,6 @@ jobs:
         ## setup_env.sh
         bash .github/unittest/linux/scripts/run_all.sh
 
-  tests-cpu-oldget:
-    # Tests that TD_GET_DEFAULTS_TO_NONE=0 works fine as this will be the default for TD up to 0.7
-    strategy:
-      matrix:
-        python_version: ["3.12"]
-      fail-fast: false
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    with:
-      runner: linux.12xlarge
-      repository: pytorch/rl
-      docker-image: "nvidia/cuda:12.2.0-devel-ubuntu22.04"
-      timeout: 90
-      script: |
-        if [[ "${{ github.ref }}" =~ release/* ]]; then
-          export RELEASE=1
-          export TORCH_VERSION=stable
-        else
-          export RELEASE=0
-          export TORCH_VERSION=nightly
-        fi
-        export TD_GET_DEFAULTS_TO_NONE=0
-
-        # Set env vars from matrix
-        export PYTHON_VERSION=${{ matrix.python_version }}
-        export CU_VERSION="cpu"
-
-        echo "PYTHON_VERSION: $PYTHON_VERSION"
-        echo "CU_VERSION: $CU_VERSION"
-
-        ## setup_env.sh
-        bash .github/unittest/linux/scripts/run_all.sh
-
   tests-gpu:
     strategy:
       matrix:
 
@@ -163,6 +163,81 @@ provides more information on how to design a custom environment from scratch.
     GymLikeEnv
     EnvMetaData
 
+Partial steps and partial resets
+--------------------------------
+
+TorchRL allows environments to reset some but not all the environments, or run a step in one but not all environments.
+If there is only one environment in the batch, then a partial reset / step is also allowed with the behavior detailed
+below.
+
+Batching environments and locking the batch
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. _ref_batch_locked:
+
+Before detailing what partial resets and partial steps do, we must distinguish cases where an environment has
+a batch size of its own (mostly stateful environments) or when the environment is just a mere module that, given an
+input of arbitrary size, batches the operations over all elements (mostly stateless environments).
+
+This is controlled via the :attr:`~torchrl.envs.batch_locked` attribute: a batch-locked environment requires all input
+tensordicts to have the same batch-size as the env's. Typical examples of these environments are
+:class:`~torchrl.envs.GymEnv` and related. Batch-unlocked envs are by contrast allowed to work with any input size.
+Notable examples are :class:`~torchrl.envs.BraxEnv` or :class:`~torchrl.envs.JumanjiEnv`.
+
+Executing partial steps in a batch-unlocked environment is straightforward: one just needs to mask the part of the
+tensordict that does not need to be executed, pass the other part to `step` and merge the results with the previous
+input.
+
+Batched environments (:class:`~torchrl.envs.ParallelEnv` and :class:`~torchrl.envs.SerialEnv`) can also deal with
+partial steps easily, they just pass the actions to the sub-environments that are required to be executed.
+
+In all other cases, TorchRL assumes that the environment handles the partial steps correctly.
+
+.. warning:: This means that custom environments may silently run the non-required steps as there is no way for torchrl
+    to control what happens within the `_step` method!
+
+Partial Steps
+~~~~~~~~~~~~~
+
+.. _ref_partial_steps:
+
+Partial steps are controlled via the temporary key `"_step"` which points to a boolean mask of the
+size of the tensordict that holds it. The classes armed to deal with this are:
+
+- Batched environments: :class:`~torchrl.envs.ParallelEnv` and :class:`~torchrl.envs.SerialEnv` will dispatch the
+  action to and only to the environments where `"_step"` is `True`;
+- Batch-unlocked environments;
+- Unbatched environments (i.e., environments without batch size). In these environments, the :meth:`~torchrl.envs.EnvBase.step`
+  method will first look for a `"_step"` entry and, if present, act accordingly.
+  If a :class:`~torchrl.envs.Transform` instance passes a `"_step"` entry to the tensordict, it is also captured by
+  :class:`~torchrl.envs.TransformedEnv`'s own `_step` method which will skip the `base_env.step` as well as any further
+  transformation.
+
+When dealing with partial steps, the strategy is always to use the step output and mask missing values with the previous
+content of the input tensordict, if present, or a `0`-valued tensor if the tensor cannot be found. This means that
+if the input tensordict does not contain all the previous observations, then the output tensordict will be 0-valued for
+all the non-stepped elements. Within batched environments, data collectors and rollouts utils, this is an issue that
+is not observed because these classes handle the passing of data properly.
+
+Partial steps are an essential feature of :meth:`~torchrl.envs.EnvBase.rollout` when `break_when_all_done` is `True`,
+as the environments with a `True` done state will need to be skipped during calls to `_step`.
+
+The :class:`~torchrl.envs.ConditionalSkip` transform allows you to programmatically ask for (partial) step skips.
+
+Partial Resets
+~~~~~~~~~~~~~~
+
+.. _ref_partial_resets:
+
+Partial resets work pretty much like partial steps, but with the `"_reset"` entry.
+
+The same restrictions of partial steps apply to partial resets.
+
+Likewise, partial resets are an essential feature of :meth:`~torchrl.envs.EnvBase.rollout` when `break_when_any_done` is `True`,
+as the environments with a `True` done state will need to be reset, but not others.
+
+See te following paragraph for a deep dive in partial resets within batched and vectorized environments.
+
 Vectorized envs
 ---------------
 
@@ -212,6 +287,7 @@ component (sub-environments or agents) should be reset.
 This allows to reset some but not all of the components.
 
 The ``"_reset"`` key has two distinct functionalities:
+
 1. During a call to :meth:`~.EnvBase._reset`, the ``"_reset"`` key may or may
    not be present in the input tensordict. TorchRL's convention is that the
    absence of the ``"_reset"`` key at a given ``"done"`` level indicates
@@ -885,6 +961,7 @@ to be able to create this other composition:
     CenterCrop
     ClipTransform
     Compose
+    ConditionalSkip
     Crop
     DTypeCastTransform
     DeviceCastTransform
@@ -900,6 +977,7 @@ to be able to create this other composition:
     InitTracker
     KLRewardTransform
     LineariseReward
+    MultiAction
     NoopResetEnv
     ObservationNorm
     ObservationTransform
 
@@ -358,13 +358,11 @@ def _step(self, tensordict):
             leading_batch_size = tensordict.shape if tensordict is not None else []
         self.counter += 1
         # We use tensordict.batch_size instead of self.batch_size since this method will also be used by MockBatchedUnLockedEnv
-        n = (
-            torch.full(
-                [*leading_batch_size, *self.observation_spec["observation"].shape],
-                self.counter,
-            )
-            .to(self.device)
-            .to(torch.get_default_dtype())
+        n = torch.full(
+            [*leading_batch_size, *self.observation_spec["observation"].shape],
+            self.counter,
+            device=self.device,
+            dtype=torch.get_default_dtype(),
         )
         done = self.counter >= self.max_val
         done = torch.full(
@@ -391,13 +389,11 @@ def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
         else:
             leading_batch_size = tensordict.shape if tensordict is not None else []
 
-        n = (
-            torch.full(
-                [*leading_batch_size, *self.observation_spec["observation"].shape],
-                self.counter,
-            )
-            .to(self.device)
-            .to(torch.get_default_dtype())
+        n = torch.full(
+            [*leading_batch_size, *self.observation_spec["observation"].shape],
+            self.counter,
+            device=self.device,
+            dtype=torch.get_default_dtype(),
         )
         done = self.counter >= self.max_val
         done = torch.full(
@@ -417,7 +413,7 @@ def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
 
 
 class MockBatchedUnLockedEnv(MockBatchedLockedEnv):
-    """Mocks an env whose batch_size does not define the size of the output tensordict.
+    """Mocks an env which batch_size does not define the size of the output tensordict.
 
     The size of the output tensordict is defined by the input tensordict itself.
 
@@ -433,6 +429,89 @@ def __new__(cls, *args, **kwargs):
         return super().__new__(cls, *args, _batch_locked=False, **kwargs)
 
 
+class StateLessCountingEnv(EnvBase):
+    def __init__(self):
+        self.observation_spec = Composite(
+            count=Unbounded((1,), dtype=torch.int32),
+            max_count=Unbounded((1,), dtype=torch.int32),
+        )
+        self.full_action_spec = Composite(
+            action=Unbounded((1,), dtype=torch.int32),
+        )
+        self.full_done_spec = Composite(
+            done=Unbounded((1,), dtype=torch.bool),
+            termindated=Unbounded((1,), dtype=torch.bool),
+            truncated=Unbounded((1,), dtype=torch.bool),
+        )
+        self.reward_spec = Composite(reward=Unbounded((1,), dtype=torch.float))
+        super().__init__()
+        self._batch_locked = False
+
+    def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
+
+        max_count = None
+        count = None
+        if tensordict is not None:
+            max_count = tensordict.get("max_count")
+            count = tensordict.get("count")
+            tensordict = TensorDict(
+                batch_size=tensordict.batch_size, device=tensordict.device
+            )
+            shape = tensordict.batch_size
+        else:
+            shape = ()
+            tensordict = TensorDict(device=self.device)
+        tensordict.update(
+            TensorDict(
+                count=torch.zeros(
+                    (
+                        *shape,
+                        1,
+                    ),
+                    dtype=torch.int32,
+                )
+                if count is None
+                else count,
+                max_count=torch.randint(
+                    10,
+                    20,
+                    (
+                        *shape,
+                        1,
+                    ),
+                    dtype=torch.int32,
+                )
+                if max_count is None
+                else max_count,
+                **self.done_spec.zero(shape),
+                **self.full_reward_spec.zero(shape),
+            )
+        )
+        return tensordict
+
+    def _step(
+        self,
+        tensordict: TensorDictBase,
+    ) -> TensorDictBase:
+        action = tensordict["action"]
+        count = tensordict["count"] + action
+        terminated = done = count >= tensordict["max_count"]
+        truncated = torch.zeros_like(done)
+        return TensorDict(
+            count=count,
+            max_count=tensordict["max_count"],
+            done=done,
+            terminated=terminated,
+            truncated=truncated,
+            reward=self.reward_spec.zero(tensordict.shape),
+            batch_size=tensordict.batch_size,
+            device=tensordict.device,
+        )
+
+    def _set_seed(self, seed: Optional[int]):
+        ...
+
+
 class DiscreteActionVecMockEnv(_MockEnv):
     @classmethod
     def __new__(
 
@@ -4112,17 +4112,21 @@ def test_parallel_partial_steps(
                 use_buffers=use_buffers,
                 device=device,
             )
-            td = penv.reset()
-            psteps = torch.zeros(4, dtype=torch.bool)
-            psteps[[1, 3]] = True
-            td.set("_step", psteps)
-
-            td.set("action", penv.full_action_spec[penv.action_key].one())
-            td = penv.step(td)
-            assert (td[0].get("next") == 0).all()
-            assert (td[1].get("next") != 0).any()
-            assert (td[2].get("next") == 0).all()
-            assert (td[3].get("next") != 0).any()
+            try:
+                td = penv.reset()
+                psteps = torch.zeros(4, dtype=torch.bool)
+                psteps[[1, 3]] = True
+                td.set("_step", psteps)
+
+                td.set("action", penv.full_action_spec[penv.action_key].one())
+                td = penv.step(td)
+                assert_allclose_td(td[0].get("next"), td[0], intersection=True)
+                assert (td[1].get("next") != 0).any()
+                assert_allclose_td(td[2].get("next"), td[2], intersection=True)
+                assert (td[3].get("next") != 0).any()
+            finally:
+                penv.close()
+                del penv
 
     @pytest.mark.parametrize("use_buffers", [False, True])
     def test_parallel_partial_step_and_maybe_reset(
@@ -4135,17 +4139,21 @@ def test_parallel_partial_step_and_maybe_reset(
                 use_buffers=use_buffers,
                 device=device,
             )
-            td = penv.reset()
-            psteps = torch.zeros(4, dtype=torch.bool)
-            psteps[[1, 3]] = True
-            td.set("_step", psteps)
-
-            td.set("action", penv.full_action_spec[penv.action_key].one())
-            td, tdreset = penv.step_and_maybe_reset(td)
-            assert (td[0].get("next") == 0).all()
-            assert (td[1].get("next") != 0).any()
-            assert (td[2].get("next") == 0).all()
-            assert (td[3].get("next") != 0).any()
+            try:
+                td = penv.reset()
+                psteps = torch.zeros(4, dtype=torch.bool)
+                psteps[[1, 3]] = True
+                td.set("_step", psteps)
+
+                td.set("action", penv.full_action_spec[penv.action_key].one())
+                td, tdreset = penv.step_and_maybe_reset(td)
+                assert_allclose_td(td[0].get("next"), td[0], intersection=True)
+                assert (td[1].get("next") != 0).any()
+                assert_allclose_td(td[2].get("next"), td[2], intersection=True)
+                assert (td[3].get("next") != 0).any()
+            finally:
+                penv.close()
+                del penv
 
     @pytest.mark.parametrize("use_buffers", [False, True])
     def test_serial_partial_steps(self, use_buffers, device, env_device):
@@ -4156,17 +4164,21 @@ def test_serial_partial_steps(self, use_buffers, device, env_device):
                 use_buffers=use_buffers,
                 device=device,
             )
-            td = penv.reset()
-            psteps = torch.zeros(4, dtype=torch.bool)
-            psteps[[1, 3]] = True
-            td.set("_step", psteps)
-
-            td.set("action", penv.full_action_spec[penv.action_key].one())
-            td = penv.step(td)
-            assert (td[0].get("next") == 0).all()
-            assert (td[1].get("next") != 0).any()
-            assert (td[2].get("next") == 0).all()
-            assert (td[3].get("next") != 0).any()
+            try:
+                td = penv.reset()
+                psteps = torch.zeros(4, dtype=torch.bool)
+                psteps[[1, 3]] = True
+                td.set("_step", psteps)
+
+                td.set("action", penv.full_action_spec[penv.action_key].one())
+                td = penv.step(td)
+                assert_allclose_td(td[0].get("next"), td[0], intersection=True)
+                assert (td[1].get("next") != 0).any()
+                assert_allclose_td(td[2].get("next"), td[2], intersection=True)
+                assert (td[3].get("next") != 0).any()
+            finally:
+                penv.close()
+                del penv
 
     @pytest.mark.parametrize("use_buffers", [False, True])
     def test_serial_partial_step_and_maybe_reset(self, use_buffers, device, env_device):
@@ -4184,9 +4196,9 @@ def test_serial_partial_step_and_maybe_reset(self, use_buffers, device, env_devi
 
             td.set("action", penv.full_action_spec[penv.action_key].one())
             td = penv.step(td)
-            assert (td[0].get("next") == 0).all()
+            assert_allclose_td(td[0].get("next"), td[0], intersection=True)
             assert (td[1].get("next") != 0).any()
-            assert (td[2].get("next") == 0).all()
+            assert_allclose_td(td[2].get("next"), td[2], intersection=True)
             assert (td[3].get("next") != 0).any()