Update

Vincent Moens · Vincent Moens · commit c0627411bdf3 · 2025-03-03T09:16:27.000Z
[ghstack-poisoned]
diff --git a/test/test_env.py b/test/test_env.py
@@ -60,14 +60,14 @@
 from torchrl.envs.libs.dm_control import _has_dmc, DMControlEnv
 from torchrl.envs.libs.gym import _has_gym, gym_backend, GymEnv, GymWrapper
 from torchrl.envs.transforms import Compose, StepCounter, TransformedEnv
+from torchrl.envs.transforms.rlhf import as_padded_tensor
 from torchrl.envs.transforms.transforms import (
     AutoResetEnv,
     AutoResetTransform,
     Tokenizer,
     Transform,
     UnsqueezeTransform,
 )
-from torchrl.envs.transforms.rlhf import as_padded_tensor
 from torchrl.envs.utils import (
     _StepMDP,
     _terminated_or_truncated,
diff --git a/torchrl/envs/custom/llm.py b/torchrl/envs/custom/llm.py
@@ -31,7 +31,14 @@ class LLMEnv(EnvBase):
     integers representing a sequence of tokens.
     The action is also a string or a tensor of integers, which is concatenated to the previous observation to form the
     new observation.
-     Prompts to the language model can be loaded when the environment is ``reset`` if the environment is created via :meth:`~from_dataloader`
+
+    By default, this environment is meant to track history for a prompt. Users can append transforms to tailor
+    this to their use case, such as Chain of Thought (CoT) reasoning or other custom processing.
+
+    Users must append a transform to set the "done" condition, which would trigger the loading of the next prompt.
+
+    Prompts to the language model can be loaded when the environment is ``reset`` if the environment is created via :meth:`~from_dataloader`
+
     Args:
         observation_key (NestedKey, optional): The key in the tensordict where the observation is stored. Defaults to
             ``"observation"``.