pytorch
diff --git a/‎.github/workflows/docs.yml
Lines changed: 6 additions & 6 deletions b/‎.github/workflows/docs.yml
Lines changed: 6 additions & 6 deletions
diff --git a/‎docs/requirements.txt
Lines changed: 1 addition & 0 deletions b/‎docs/requirements.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/conf.py
Lines changed: 20 additions & 3 deletions b/‎docs/source/conf.py
Lines changed: 20 additions & 3 deletions
diff --git a/‎docs/source/reference/envs.rst
Lines changed: 1 addition & 1 deletion b/‎docs/source/reference/envs.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/mocking_classes.py
Lines changed: 3 additions & 1 deletion b/‎test/mocking_classes.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎torchrl/_utils.py
Lines changed: 2 additions & 2 deletions b/‎torchrl/_utils.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎torchrl/collectors/collectors.py
Lines changed: 7 additions & 2 deletions b/‎torchrl/collectors/collectors.py
Lines changed: 7 additions & 2 deletions
diff --git a/‎torchrl/collectors/distributed/generic.py
Lines changed: 3 additions & 1 deletion b/‎torchrl/collectors/distributed/generic.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎torchrl/collectors/distributed/ray.py
Lines changed: 2 additions & 0 deletions b/‎torchrl/collectors/distributed/ray.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎torchrl/collectors/distributed/rpc.py
Lines changed: 3 additions & 1 deletion b/‎torchrl/collectors/distributed/rpc.py
Lines changed: 3 additions & 1 deletion
@@ -26,10 +26,11 @@ jobs:
   build-docs:
     strategy:
       matrix:
-        python_version: ["3.9"]
-        cuda_arch_version: ["12.4"]
+        python_version: [ "3.9" ]
+        cuda_arch_version: [ "12.4" ]
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
+      runner: linux.g5.4xlarge.nvidia.gpu
       repository: pytorch/rl
       upload-artifact: docs
       timeout: 120
@@ -38,7 +39,6 @@ jobs:
         set -v
         # apt-get update && apt-get install -y -f git wget gcc g++ dialog apt-utils
         yum makecache
-        # yum install -y glfw glew mesa-libGL mesa-libGL-devel mesa-libOSMesa-devel egl-utils freeglut
         # Install Mesa and OpenGL Libraries:
         yum install -y glfw mesa-libGL mesa-libGL-devel egl-utils freeglut mesa-libGLU mesa-libEGL
         # Install DRI Drivers:
@@ -112,7 +112,7 @@ jobs:
         cd ./docs
         # timeout 7m bash -ic "MUJOCO_GL=egl sphinx-build ./source _local_build" || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi
         # bash -ic "PYOPENGL_PLATFORM=egl MUJOCO_GL=egl sphinx-build ./source _local_build" || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi
-        PYOPENGL_PLATFORM=egl MUJOCO_GL=egl TORCHRL_CONSOLE_STREAM=stdout sphinx-build ./source _local_build
+        PYOPENGL_PLATFORM=egl MUJOCO_GL=egl TORCHRL_CONSOLE_STREAM=stdout sphinx-build ./source _local_build -v -j 4
         cd ..
 
         cp -r docs/_local_build/* "${RUNNER_ARTIFACT_DIR}"
@@ -123,8 +123,8 @@ jobs:
 
   upload:
     needs: build-docs
-    if: github.repository == 'pytorch/rl' && github.event_name == 'push' && 
-        ((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag')
+    if: github.repository == 'pytorch/rl' && github.event_name == 'push' &&
+      ((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag')
     permissions:
       contents: write
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
 
@@ -28,3 +28,4 @@ vmas
 onnxscript
 onnxruntime
 onnx
+psutil
@@ -28,8 +28,7 @@
 import pytorch_sphinx_theme
 import torchrl
 
-# Suppress warnings - TODO
-# suppress_warnings = [ 'misc.highlighting_failure' ]
+# Suppress warnings
 warnings.filterwarnings("ignore", category=UserWarning)
 
 project = "torchrl"
@@ -86,6 +85,21 @@
     "torchvision": ("https://pytorch.org/vision/stable/", None),
 }
 
+
+def kill_procs(gallery_conf, fname):
+    import os
+
+    import psutil
+
+    # Get the current process
+    current_proc = psutil.Process(os.getpid())
+    # Iterate over all child processes
+    for child in current_proc.children(recursive=True):
+        # Kill the child process
+        child.terminate()
+        print(f"Killed child process with PID {child.pid}")  # noqa: T201
+
+
 sphinx_gallery_conf = {
     "examples_dirs": "reference/generated/tutorials/",  # path to your example scripts
     "gallery_dirs": "tutorials",  # path to where to save gallery generated output
@@ -95,9 +109,12 @@
     "notebook_images": "reference/generated/tutorials/media/",  # images to parse
     "download_all_examples": True,
     "abort_on_example_error": True,
-    "show_memory": True,
+    # "show_memory": True,
+    "plot_gallery": "False",
     "capture_repr": ("_repr_html_", "__repr__"),  # capture representations
     "write_computation_times": True,
+    # "compress_images": ("images", "thumbnails"),
+    "reset_modules": (kill_procs, "matplotlib", "seaborn"),
 }
 
 napoleon_use_ivar = True
 
@@ -976,7 +976,7 @@ to be able to create this other composition:
     Hash
     InitTracker
     KLRewardTransform
-    LineariseReward
+    LineariseRewards
     MultiAction
     NoopResetEnv
     ObservationNorm
 
@@ -714,7 +714,9 @@ def _step(
         while done.shape != tensordict.shape:
             done = done.any(-1)
         done = reward = done.unsqueeze(-1)
-        tensordict.set("reward", reward.to(torch.get_default_dtype()))
+        tensordict.set(
+            "reward", reward.to(self.reward_spec.dtype).expand(self.reward_spec.shape)
+        )
         tensordict.set("done", done)
         tensordict.set("terminated", done)
         return tensordict
 
@@ -513,7 +513,7 @@ def reset(cls, setters_dict: Dict[str, implement_for] = None):
         """Resets the setters in setter_dict.
 
         ``setter_dict`` is a copy of implementations. We just need to iterate through its
-        values and call :meth:`~.module_set` for each.
+        values and call :meth:`module_set` for each.
 
         """
         if VERBOSE:
@@ -888,7 +888,7 @@ def _standardize(
         exclude_dims (Tuple[int]): dimensions to exclude from the statistics, can be negative. Default: ().
         mean (Tensor): a mean to be used for standardization. Must be of shape broadcastable to input. Default: None.
         std (Tensor): a standard deviation to be used for standardization. Must be of shape broadcastable to input. Default: None.
-        eps (float): epsilon to be used for numerical stability. Default: float32 resolution.
+        eps (:obj:`float`): epsilon to be used for numerical stability. Default: float32 resolution.
 
     """
     if eps is None:
 
@@ -339,10 +339,12 @@ class SyncDataCollector(DataCollectorBase):
             instances) it will be wrapped in a `nn.Module` first.
             Then, the collector will try to assess if these
             modules require wrapping in a :class:`~tensordict.nn.TensorDictModule` or not.
+
             - If the policy forward signature matches any of ``forward(self, tensordict)``,
               ``forward(self, td)`` or ``forward(self, <anything>: TensorDictBase)`` (or
               any typing with a single argument typed as a subclass of ``TensorDictBase``)
               then the policy won't be wrapped in a :class:`~tensordict.nn.TensorDictModule`.
+
             - In all other cases an attempt to wrap it will be undergone as such: ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
 
     Keyword Args:
@@ -1462,6 +1464,7 @@ class _MultiDataCollector(DataCollectorBase):
               ``forward(self, td)`` or ``forward(self, <anything>: TensorDictBase)`` (or
               any typing with a single argument typed as a subclass of ``TensorDictBase``)
               then the policy won't be wrapped in a :class:`~tensordict.nn.TensorDictModule`.
+
             - In all other cases an attempt to wrap it will be undergone as such:
               ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
 
@@ -1548,7 +1551,7 @@ class _MultiDataCollector(DataCollectorBase):
         reset_when_done (bool, optional): if ``True`` (default), an environment
             that return a ``True`` value in its ``"done"`` or ``"truncated"``
             entry will be reset at the corresponding indices.
-        update_at_each_batch (boolm optional): if ``True``, :meth:`~.update_policy_weight_()`
+        update_at_each_batch (boolm optional): if ``True``, :meth:`update_policy_weight_()`
             will be called before (sync) or after (async) each data collection.
             Defaults to ``False``.
         preemptive_threshold (:obj:`float`, optional): a value between 0.0 and 1.0 that specifies the ratio of workers
@@ -2774,10 +2777,12 @@ class aSyncDataCollector(MultiaSyncDataCollector):
             instances) it will be wrapped in a `nn.Module` first.
             Then, the collector will try to assess if these
             modules require wrapping in a :class:`~tensordict.nn.TensorDictModule` or not.
+
             - If the policy forward signature matches any of ``forward(self, tensordict)``,
               ``forward(self, td)`` or ``forward(self, <anything>: TensorDictBase)`` (or
               any typing with a single argument typed as a subclass of ``TensorDictBase``)
               then the policy won't be wrapped in a :class:`~tensordict.nn.TensorDictModule`.
+
             - In all other cases an attempt to wrap it will be undergone as such: ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
 
     Keyword Args:
@@ -2863,7 +2868,7 @@ class aSyncDataCollector(MultiaSyncDataCollector):
         reset_when_done (bool, optional): if ``True`` (default), an environment
             that return a ``True`` value in its ``"done"`` or ``"truncated"``
             entry will be reset at the corresponding indices.
-        update_at_each_batch (boolm optional): if ``True``, :meth:`~.update_policy_weight_()`
+        update_at_each_batch (boolm optional): if ``True``, :meth:`update_policy_weight_()`
             will be called before (sync) or after (async) each data collection.
             Defaults to ``False``.
         preemptive_threshold (:obj:`float`, optional): a value between 0.0 and 1.0 that specifies the ratio of workers
 
@@ -262,10 +262,12 @@ class DistributedDataCollector(DataCollectorBase):
             instances) it will be wrapped in a `nn.Module` first.
             Then, the collector will try to assess if these
             modules require wrapping in a :class:`~tensordict.nn.TensorDictModule` or not.
+
             - If the policy forward signature matches any of ``forward(self, tensordict)``,
               ``forward(self, td)`` or ``forward(self, <anything>: TensorDictBase)`` (or
               any typing with a single argument typed as a subclass of ``TensorDictBase``)
               then the policy won't be wrapped in a :class:`~tensordict.nn.TensorDictModule`.
+
             - In all other cases an attempt to wrap it will be undergone as such: ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
 
     Keyword Args:
@@ -341,7 +343,7 @@ class DistributedDataCollector(DataCollectorBase):
             collecting data. Must be one of ``torchrl.envs.utils.ExplorationType.DETERMINISTIC``,
             ``torchrl.envs.utils.ExplorationType.RANDOM``, ``torchrl.envs.utils.ExplorationType.MODE``
             or ``torchrl.envs.utils.ExplorationType.MEAN``.
-        collector_class (type or str, optional): a collector class for the remote node. Can be
+        collector_class (Type or str, optional): a collector class for the remote node. Can be
             :class:`~torchrl.collectors.SyncDataCollector`,
             :class:`~torchrl.collectors.MultiSyncDataCollector`,
             :class:`~torchrl.collectors.MultiaSyncDataCollector`
 
@@ -135,10 +135,12 @@ class RayCollector(DataCollectorBase):
             instances) it will be wrapped in a `nn.Module` first.
             Then, the collector will try to assess if these
             modules require wrapping in a :class:`~tensordict.nn.TensorDictModule` or not.
+
             - If the policy forward signature matches any of ``forward(self, tensordict)``,
               ``forward(self, td)`` or ``forward(self, <anything>: TensorDictBase)`` (or
               any typing with a single argument typed as a subclass of ``TensorDictBase``)
               then the policy won't be wrapped in a :class:`~tensordict.nn.TensorDictModule`.
+
             - In all other cases an attempt to wrap it will be undergone as such: ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
 
     Keyword Args:
 
@@ -110,10 +110,12 @@ class RPCDataCollector(DataCollectorBase):
             instances) it will be wrapped in a `nn.Module` first.
             Then, the collector will try to assess if these
             modules require wrapping in a :class:`~tensordict.nn.TensorDictModule` or not.
+
             - If the policy forward signature matches any of ``forward(self, tensordict)``,
               ``forward(self, td)`` or ``forward(self, <anything>: TensorDictBase)`` (or
               any typing with a single argument typed as a subclass of ``TensorDictBase``)
               then the policy won't be wrapped in a :class:`~tensordict.nn.TensorDictModule`.
+
             - In all other cases an attempt to wrap it will be undergone as such: ``TensorDictModule(policy, in_keys=env_obs_key, out_keys=env.action_keys)``.
 
     Keyword Args:
@@ -190,7 +192,7 @@ class RPCDataCollector(DataCollectorBase):
             ``torchrl.envs.utils.ExplorationType.RANDOM``, ``torchrl.envs.utils.ExplorationType.MODE``
             or ``torchrl.envs.utils.ExplorationType.MEAN``.
             Defaults to ``torchrl.envs.utils.ExplorationType.RANDOM``.
-        collector_class (type or str, optional): a collector class for the remote node. Can be
+        collector_class (Type or str, optional): a collector class for the remote node. Can be
             :class:`~torchrl.collectors.SyncDataCollector`,
             :class:`~torchrl.collectors.MultiSyncDataCollector`,
             :class:`~torchrl.collectors.MultiaSyncDataCollector`