[BugFix] Fix env.full_done_spec~s~

vmoens · vmoens · commit 90f37eddfd31 · 2025-03-07T16:56:41.000-08:00
ghstack-source-id: ba0d371d10b3f46ec1172fbec639ccc4d5559659 Pull Request resolved: #2815 (cherry picked from commit f5c0666)
diff --git a/torchrl/envs/batched_envs.py b/torchrl/envs/batched_envs.py
@@ -1521,15 +1521,10 @@ def _step_and_maybe_reset_no_buffers(
 
         results = [None] * len(workers_range)
 
-        consumed_indices = []
-        events = set(workers_range)
-        while len(consumed_indices) < len(workers_range):
-            for i in list(events):
-                if self._events[i].is_set():
-                    results[i] = self.parent_channels[i].recv()
-                    self._events[i].clear()
-                    consumed_indices.append(i)
-                    events.discard(i)
+        self._wait_for_workers(workers_range)
+
+        for i, w in enumerate(workers_range):
+            results[i] = self.parent_channels[w].recv()
 
         out_next, out_root = zip(*(future for future in results))
         out = TensorDict.maybe_dense_stack(out_next), TensorDict.maybe_dense_stack(
diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
@@ -9,7 +9,7 @@
 import warnings
 from copy import deepcopy
 from functools import partial, wraps
-from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
+from typing import Any, Callable, Iterator
 
 import numpy as np
 import torch
@@ -476,7 +476,7 @@ def __init__(
         self,
         *,
         device: DEVICE_TYPING = None,
-        batch_size: Optional[torch.Size] = None,
+        batch_size: torch.Size | None = None,
         run_type_checks: bool = False,
         allow_done_after_reset: bool = False,
         spec_locked: bool = True,
@@ -587,10 +587,10 @@ def auto_specs_(
         policy: Callable[[TensorDictBase], TensorDictBase],
         *,
         tensordict: TensorDictBase | None = None,
-        action_key: NestedKey | List[NestedKey] = "action",
-        done_key: NestedKey | List[NestedKey] | None = None,
-        observation_key: NestedKey | List[NestedKey] = "observation",
-        reward_key: NestedKey | List[NestedKey] = "reward",
+        action_key: NestedKey | list[NestedKey] = "action",
+        done_key: NestedKey | list[NestedKey] | None = None,
+        observation_key: NestedKey | list[NestedKey] = "observation",
+        reward_key: NestedKey | list[NestedKey] = "reward",
     ):
         """Automatically sets the specifications (specs) of the environment based on a random rollout using a given policy.
 
@@ -692,7 +692,7 @@ def auto_specs_(
         if full_action_spec is not None:
             self.full_action_spec = full_action_spec
         if full_done_spec is not None:
-            self.full_done_specs = full_done_spec
+            self.full_done_spec = full_done_spec
         if full_observation_spec is not None:
             self.full_observation_spec = full_observation_spec
         if full_reward_spec is not None:
@@ -704,8 +704,7 @@ def auto_specs_(
 
     @wraps(check_env_specs_func)
     def check_env_specs(self, *args, **kwargs):
-        return_contiguous = kwargs.pop("return_contiguous", not self._has_dynamic_specs)
-        kwargs["return_contiguous"] = return_contiguous
+        kwargs.setdefault("return_contiguous", not self._has_dynamic_specs)
         return check_env_specs_func(self, *args, **kwargs)
 
     check_env_specs.__doc__ = check_env_specs_func.__doc__
@@ -850,8 +849,7 @@ def ndim(self):
 
     def append_transform(
         self,
-        transform: "Transform"  # noqa: F821
-        | Callable[[TensorDictBase], TensorDictBase],
+        transform: Transform | Callable[[TensorDictBase], TensorDictBase],  # noqa: F821
     ) -> EnvBase:
         """Returns a transformed environment where the callable/transform passed is applied.
 
@@ -995,7 +993,7 @@ def output_spec(self, value: TensorSpec) -> None:
 
     @property
     @_cache_value
-    def action_keys(self) -> List[NestedKey]:
+    def action_keys(self) -> list[NestedKey]:
         """The action keys of an environment.
 
         By default, there will only be one key named "action".
@@ -1008,7 +1006,7 @@ def action_keys(self) -> List[NestedKey]:
 
     @property
     @_cache_value
-    def state_keys(self) -> List[NestedKey]:
+    def state_keys(self) -> list[NestedKey]:
         """The state keys of an environment.
 
         By default, there will only be one key named "state".
@@ -1205,7 +1203,7 @@ def full_action_spec(self, spec: Composite) -> None:
     # Reward spec
     @property
     @_cache_value
-    def reward_keys(self) -> List[NestedKey]:
+    def reward_keys(self) -> list[NestedKey]:
         """The reward keys of an environment.
 
         By default, there will only be one key named "reward".
@@ -1215,6 +1213,20 @@ def reward_keys(self) -> List[NestedKey]:
         reward_keys = sorted(self.full_reward_spec.keys(True, True), key=_repr_by_depth)
         return reward_keys
 
+    @property
+    @_cache_value
+    def observation_keys(self) -> list[NestedKey]:
+        """The observation keys of an environment.
+
+        By default, there will only be one key named "observation".
+
+        Keys are sorted by depth in the data tree.
+        """
+        observation_keys = sorted(
+            self.full_observation_spec.keys(True, True), key=_repr_by_depth
+        )
+        return observation_keys
+
     @property
     def reward_key(self):
         """The reward key of an environment.
@@ -1402,7 +1414,7 @@ def full_reward_spec(self, spec: Composite) -> None:
     # done spec
     @property
     @_cache_value
-    def done_keys(self) -> List[NestedKey]:
+    def done_keys(self) -> list[NestedKey]:
         """The done keys of an environment.
 
         By default, there will only be one key named "done".
@@ -2190,8 +2202,8 @@ def register_gym(
         id: str,
         *,
         entry_point: Callable | None = None,
-        transform: "Transform" | None = None,  # noqa: F821
-        info_keys: List[NestedKey] | None = None,
+        transform: Transform | None = None,  # noqa: F821
+        info_keys: list[NestedKey] | None = None,
         backend: str = None,
         to_numpy: bool = False,
         reward_threshold: float | None = None,
@@ -2380,8 +2392,8 @@ def _register_gym(
         cls,
         id,
         entry_point: Callable | None = None,
-        transform: "Transform" | None = None,  # noqa: F821
-        info_keys: List[NestedKey] | None = None,
+        transform: Transform | None = None,  # noqa: F821
+        info_keys: list[NestedKey] | None = None,
         to_numpy: bool = False,
         reward_threshold: float | None = None,
         nondeterministic: bool = False,
@@ -2422,8 +2434,8 @@ def _register_gym(  # noqa: F811
         cls,
         id,
         entry_point: Callable | None = None,
-        transform: "Transform" | None = None,  # noqa: F821
-        info_keys: List[NestedKey] | None = None,
+        transform: Transform | None = None,  # noqa: F821
+        info_keys: list[NestedKey] | None = None,
         to_numpy: bool = False,
         reward_threshold: float | None = None,
         nondeterministic: bool = False,
@@ -2470,8 +2482,8 @@ def _register_gym(  # noqa: F811
         cls,
         id,
         entry_point: Callable | None = None,
-        transform: "Transform" | None = None,  # noqa: F821
-        info_keys: List[NestedKey] | None = None,
+        transform: Transform | None = None,  # noqa: F821
+        info_keys: list[NestedKey] | None = None,
         to_numpy: bool = False,
         reward_threshold: float | None = None,
         nondeterministic: bool = False,
@@ -2523,8 +2535,8 @@ def _register_gym(  # noqa: F811
         cls,
         id,
         entry_point: Callable | None = None,
-        transform: "Transform" | None = None,  # noqa: F821
-        info_keys: List[NestedKey] | None = None,
+        transform: Transform | None = None,  # noqa: F821
+        info_keys: list[NestedKey] | None = None,
         to_numpy: bool = False,
         reward_threshold: float | None = None,
         nondeterministic: bool = False,
@@ -2579,8 +2591,8 @@ def _register_gym(  # noqa: F811
         cls,
         id,
         entry_point: Callable | None = None,
-        transform: "Transform" | None = None,  # noqa: F821
-        info_keys: List[NestedKey] | None = None,
+        transform: Transform | None = None,  # noqa: F821
+        info_keys: list[NestedKey] | None = None,
         to_numpy: bool = False,
         reward_threshold: float | None = None,
         nondeterministic: bool = False,
@@ -2637,8 +2649,8 @@ def _register_gym(  # noqa: F811
         cls,
         id,
         entry_point: Callable | None = None,
-        transform: "Transform" | None = None,  # noqa: F821
-        info_keys: List[NestedKey] | None = None,
+        transform: Transform | None = None,  # noqa: F821
+        info_keys: list[NestedKey] | None = None,
         to_numpy: bool = False,
         reward_threshold: float | None = None,
         nondeterministic: bool = False,
@@ -2695,7 +2707,7 @@ def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
 
     def reset(
         self,
-        tensordict: Optional[TensorDictBase] = None,
+        tensordict: TensorDictBase | None = None,
         **kwargs,
     ) -> TensorDictBase:
         """Resets the environment.
@@ -2804,8 +2816,8 @@ def numel(self) -> int:
         return prod(self.batch_size)
 
     def set_seed(
-        self, seed: Optional[int] = None, static_seed: bool = False
-    ) -> Optional[int]:
+        self, seed: int | None = None, static_seed: bool = False
+    ) -> int | None:
         """Sets the seed of the environment and returns the next seed to be used (which is the input seed if a single environment is present).
 
         Args:
@@ -2826,7 +2838,7 @@ def set_seed(
         return seed
 
     @abc.abstractmethod
-    def _set_seed(self, seed: Optional[int]):
+    def _set_seed(self, seed: int | None):
         raise NotImplementedError
 
     def set_state(self):
@@ -2841,7 +2853,26 @@ def _assert_tensordict_shape(self, tensordict: TensorDictBase) -> None:
                 f"got {tensordict.batch_size} and {self.batch_size}"
             )
 
-    def rand_action(self, tensordict: Optional[TensorDictBase] = None):
+    def all_actions(self, tensordict: TensorDictBase | None = None) -> TensorDictBase:
+        """Generates all possible actions from the action spec.
+
+        This only works in environments with fully discrete actions.
+
+        Args:
+            tensordict (TensorDictBase, optional): If given, :meth:`~.reset`
+                is called with this tensordict.
+
+        Returns:
+            a tensordict object with the "action" entry updated with a batch of
+            all possible actions. The actions are stacked together in the
+            leading dimension.
+        """
+        if tensordict is not None:
+            self.reset(tensordict)
+
+        return self.full_action_spec.enumerate(use_mask=True)
+
+    def rand_action(self, tensordict: TensorDictBase | None = None):
         """Performs a random action given the action_spec attribute.
 
         Args:
@@ -2875,7 +2906,7 @@ def rand_action(self, tensordict: Optional[TensorDictBase] = None):
         tensordict.update(r)
         return tensordict
 
-    def rand_step(self, tensordict: Optional[TensorDictBase] = None) -> TensorDictBase:
+    def rand_step(self, tensordict: TensorDictBase | None = None) -> TensorDictBase:
         """Performs a random step in the environment given the action_spec attribute.
 
         Args:
@@ -2911,15 +2942,15 @@ def _has_dynamic_specs(self) -> bool:
     def rollout(
         self,
         max_steps: int,
-        policy: Optional[Callable[[TensorDictBase], TensorDictBase]] = None,
-        callback: Optional[Callable[[TensorDictBase, ...], Any]] = None,
+        policy: Callable[[TensorDictBase], TensorDictBase] | None = None,
+        callback: Callable[[TensorDictBase, ...], Any] | None = None,
         *,
         auto_reset: bool = True,
         auto_cast_to_device: bool = False,
         break_when_any_done: bool | None = None,
         break_when_all_done: bool | None = None,
         return_contiguous: bool | None = False,
-        tensordict: Optional[TensorDictBase] = None,
+        tensordict: TensorDictBase | None = None,
         set_truncated: bool = False,
         out=None,
         trust_policy: bool = False,
@@ -3441,7 +3472,7 @@ def _rollout_nonstop(
 
     def step_and_maybe_reset(
         self, tensordict: TensorDictBase
-    ) -> Tuple[TensorDictBase, TensorDictBase]:
+    ) -> tuple[TensorDictBase, TensorDictBase]:
         """Runs a step in the environment and (partially) resets it if needed.
 
         Args:
@@ -3549,7 +3580,7 @@ def empty_cache(self):
 
     @property
     @_cache_value
-    def reset_keys(self) -> List[NestedKey]:
+    def reset_keys(self) -> list[NestedKey]:
         """Returns a list of reset keys.
 
         Reset keys are keys that indicate partial reset, in batched, multitask or multiagent
@@ -3706,14 +3737,14 @@ class _EnvWrapper(EnvBase):
     """
 
     git_url: str = ""
-    available_envs: Dict[str, Any] = {}
+    available_envs: dict[str, Any] = {}
     libname: str = ""
 
     def __init__(
         self,
         *args,
         device: DEVICE_TYPING = None,
-        batch_size: Optional[torch.Size] = None,
+        batch_size: torch.Size | None = None,
         allow_done_after_reset: bool = False,
         spec_locked: bool = True,
         **kwargs,
@@ -3762,7 +3793,7 @@ def _sync_device(self):
         return sync_func
 
     @abc.abstractmethod
-    def _check_kwargs(self, kwargs: Dict):
+    def _check_kwargs(self, kwargs: dict):
         raise NotImplementedError
 
     def __getattr__(self, attr: str) -> Any:
@@ -3788,7 +3819,7 @@ def __getattr__(self, attr: str) -> Any:
         )
 
     @abc.abstractmethod
-    def _init_env(self) -> Optional[int]:
+    def _init_env(self) -> int | None:
         """Runs all the necessary steps such that the environment is ready to use.
 
         This step is intended to ensure that a seed is provided to the environment (if needed) and that the environment
@@ -3802,7 +3833,7 @@ def _init_env(self) -> Optional[int]:
         raise NotImplementedError
 
     @abc.abstractmethod
-    def _build_env(self, **kwargs) -> "gym.Env":  # noqa: F821
+    def _build_env(self, **kwargs) -> gym.Env:  # noqa: F821
         """Creates an environment from the target library and stores it with the `_env` attribute.
 
         When overwritten, this function should pass all the required kwargs to the env instantiation method.
@@ -3811,7 +3842,7 @@ def _build_env(self, **kwargs) -> "gym.Env":  # noqa: F821
         raise NotImplementedError
 
     @abc.abstractmethod
-    def _make_specs(self, env: "gym.Env") -> None:  # noqa: F821
+    def _make_specs(self, env: gym.Env) -> None:  # noqa: F821
         raise NotImplementedError
 
     def close(self) -> None:
@@ -3825,7 +3856,7 @@ def close(self) -> None:
 
 def make_tensordict(
     env: _EnvWrapper,
-    policy: Optional[Callable[[TensorDictBase, ...], TensorDictBase]] = None,
+    policy: Callable[[TensorDictBase, ...], TensorDictBase] | None = None,
 ) -> TensorDictBase:
     """Returns a zeroed-tensordict with fields matching those required for a full step (action selection and environment step) in the environment.