[Feature] Async environments

vmoens · vmoens · commit 4f00025af430 · 2025-03-26T09:30:55.000Z
ghstack-source-id: 0a70ce0 Pull Request resolved: #2864
diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst
@@ -427,6 +427,87 @@ etc.), but one can not use an arbitrary TorchRL environment, as it is possible w
     ParallelEnv
     EnvCreator
 
+Async environments
+------------------
+
+Asynchronous environments allow for parallel execution of multiple environments, which can significantly speed up the
+data collection process in reinforcement learning.
+
+The `AsyncEnvPool` class and its subclasses provide a flexible interface for managing these environments using different
+backends, such as threading and multiprocessing.
+
+The `AsyncEnvPool` class serves as a base class for asynchronous environment pools, providing a common interface for
+managing multiple environments concurrently. It supports different backends for parallel execution, such as threading
+and multiprocessing, and provides methods for asynchronous stepping and resetting of environments.
+
+Contrary to :class:`~torchrl.envs.ParallelEnv`, :class:`~torchrl.envs.AsyncEnvPool` and its subclasses permit the
+execution of a given set of sub-environments while another task performed, allowing for complex asynchronous jobs to be
+run at the same time. For instance, it is possible to execute some environments while the policy is running based on
+the output of others.
+
+This family of classes is particularly interesting when dealing with environments that have a high (and/or variable)
+latency.
+
+.. note:: This class and its subclasses should work when nested in with :class:`~torchrl.envs.TransformedEnv` and
+    batched environments, but users won't currently be able to use the async features of the base environment when
+    it's nested in these classes. One should prefer nested transformed envs within an `AsyncEnvPool` instead.
+    If this is not possible, please raise an issue.
+
+Classes
+~~~~~~~
+
+- :class:`~torchrl.envs.AsyncEnvPool`: A base class for asynchronous environment pools. It determines the backend
+  implementation to use based on the provided arguments and manages the lifecycle of the environments.
+- :class:`~torchrl.envs.ProcessorAsyncEnvPool`: An implementation of :class:`~torchrl.envs.AsyncEnvPool` using
+  multiprocessing for parallel execution of environments. This class manages a pool of environments, each running in
+  its own process, and provides methods for asynchronous stepping and resetting of environments using inter-process
+  communication. It is automatically instantiated when `"multiprocessing"` is passed as a backend during the
+  :class:`~torchrl.envs.AsyncEnvPool` instantiation.
+- :class:`~torchrl.envs.ThreadingAsyncEnvPool`: An implementation of :class:`~torchrl.envs.AsyncEnvPool` using
+  threading for parallel execution of environments. This class manages a pool of environments, each running in its own
+  thread, and provides methods for asynchronous stepping and resetting of environments using a thread pool executor.
+  It is automatically instantiated when `"threading"` is passed as a backend during the
+  :class:`~torchrl.envs.AsyncEnvPool` instantiation.
+
+Example
+~~~~~~~
+
+     >>> from functools import partial
+     >>> from torchrl.envs import AsyncEnvPool, GymEnv
+     >>> import torch
+     >>> # Choose backend
+     >>> backend = "threading"
+     >>> env = AsyncEnvPool(
+     >>>     [partial(GymEnv, "Pendulum-v1"), partial(GymEnv, "CartPole-v1")],
+     >>>     stack="lazy",
+     >>>     backend=backend
+     >>> )
+     >>> # Execute a synchronous reset
+     >>> reset = env.reset()
+     >>> print(reset)
+     >>> # Execute a synchronous step
+     >>> s = env.rand_step(reset)
+     >>> print(s)
+     >>> # Execute an asynchronous step in env 0
+     >>> s0 = s[0]
+     >>> s0["action"] = torch.randn(1).clamp(-1, 1)
+     >>> s0["env_index"] = 0
+     >>> env.async_step_send(s0)
+     >>> # Receive data
+     >>> s0_result = env.async_step_recv()
+     >>> print('result', s0_result)
+     >>> # Close env
+     >>> env.close()
+
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template.rst
+
+    AsyncEnvPool
+    ProcessorAsyncEnvPool
+    ThreadingAsyncEnvPool
+
 
 Custom native TorchRL environments
 ----------------------------------
diff --git a/test/test_env.py b/test/test_env.py
@@ -42,6 +42,7 @@
 from torchrl.collectors import MultiSyncDataCollector, SyncDataCollector
 from torchrl.data.tensor_specs import Categorical, Composite, NonTensor, Unbounded
 from torchrl.envs import (
+    AsyncEnvPool,
     CatFrames,
     CatTensors,
     ChessEnv,
@@ -4996,6 +4997,68 @@ def policy(td):
                 assert "done" in r
 
 
+class TestAsyncEnvPool:
+    def make_env(self, *, makers, backend):
+        return AsyncEnvPool(makers, backend=backend)
+
+    @pytest.fixture(scope="module")
+    def make_envs(self):
+        yield [
+            partial(CountingEnv),
+            partial(CountingEnv),
+            partial(CountingEnv),
+            partial(CountingEnv),
+        ]
+
+    @pytest.mark.parametrize("backend", ["multiprocessing", "threading"])
+    def test_specs(self, backend, make_envs):
+        env = self.make_env(makers=make_envs, backend=backend)
+        assert env.batch_size == (4,)
+        try:
+            r = env.reset()
+            assert r.shape == env.shape
+            s = env.rand_step(r)
+            assert s.shape == env.shape
+            env.check_env_specs(break_when_any_done="both")
+        finally:
+            env._maybe_shutdown()
+
+    @pytest.mark.parametrize("backend", ["multiprocessing", "threading"])
+    @pytest.mark.parametrize("min_get", [None, 1, 2])
+    @set_capture_non_tensor_stack(False)
+    def test_async_reset_and_step(self, backend, make_envs, min_get):
+        env = self.make_env(makers=make_envs, backend=backend)
+        try:
+            env.async_reset_send(
+                TensorDict(
+                    {env._env_idx_key: NonTensorStack(*range(env.batch_size.numel()))},
+                    batch_size=env.batch_size,
+                )
+            )
+            r = env.async_reset_recv(min_get=min_get)
+            if min_get is not None:
+                assert r.numel() >= min_get
+            assert env._env_idx_key in r
+            # take an action
+            r.set("action", torch.ones(r.shape + (1,)))
+            env.async_step_send(r)
+            s = env.async_step_recv(min_get=min_get)
+            if min_get is not None:
+                assert s.numel() >= min_get
+            assert env._env_idx_key in s
+        finally:
+            env._maybe_shutdown()
+
+    @pytest.mark.parametrize("backend", ["multiprocessing", "threading"])
+    def test_async_transformed(self, backend, make_envs):
+        base_env = self.make_env(makers=make_envs, backend=backend)
+        try:
+            env = TransformedEnv(base_env, StepCounter())
+            env.check_env_specs(break_when_any_done="both")
+        finally:
+            base_env._maybe_shutdown()
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
diff --git a/test/test_tensordictmodules.py b/test/test_tensordictmodules.py
@@ -187,7 +187,7 @@ def test_stateful(self, safe, spec_type, lazy):
 
         # test bounds
         if not safe and spec_type == "bounded":
-            assert ((td.get("out") > 0.1) | (td.get("out") < -0.1)).any()
+            assert ((td.get("out") > 0.1) | (td.get("out") < -0.1)).any(), td.get("out")
         elif safe and spec_type == "bounded":
             assert ((td.get("out") < 0.1) | (td.get("out") > -0.1)).all()
 
diff --git a/torchrl/envs/__init__.py b/torchrl/envs/__init__.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
+from .async_envs import AsyncEnvPool, ProcessorAsyncEnvPool, ThreadingAsyncEnvPool
 from .batched_envs import ParallelEnv, SerialEnv
 from .common import EnvBase, EnvMetaData, make_tensordict
 from .custom import ChessEnv, LLMEnv, LLMHashingEnv, PendulumEnv, TicTacToeEnv
@@ -135,6 +136,9 @@
     "VecNormV2",
     "AutoResetEnv",
     "AutoResetTransform",
+    "AsyncEnvPool",
+    "ProcessorAsyncEnvPool",
+    "ThreadingAsyncEnvPool",
     "BatchSizeTransform",
     "BinarizeReward",
     "BraxEnv",
diff --git a/torchrl/envs/async_envs.py b/torchrl/envs/async_envs.py
diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py