File tree Expand file tree Collapse file tree 2 files changed +9
-2
lines changed
Expand file tree Collapse file tree 2 files changed +9
-2
lines changed Original file line number Diff line number Diff line change 6060from torchrl .envs .libs .dm_control import _has_dmc , DMControlEnv
6161from torchrl .envs .libs .gym import _has_gym , gym_backend , GymEnv , GymWrapper
6262from torchrl .envs .transforms import Compose , StepCounter , TransformedEnv
63+ from torchrl .envs .transforms .rlhf import as_padded_tensor
6364from torchrl .envs .transforms .transforms import (
6465 AutoResetEnv ,
6566 AutoResetTransform ,
6667 Tokenizer ,
6768 Transform ,
6869 UnsqueezeTransform ,
6970)
70- from torchrl .envs .transforms .rlhf import as_padded_tensor
7171from torchrl .envs .utils import (
7272 _StepMDP ,
7373 _terminated_or_truncated ,
Original file line number Diff line number Diff line change @@ -31,7 +31,14 @@ class LLMEnv(EnvBase):
3131 integers representing a sequence of tokens.
3232 The action is also a string or a tensor of integers, which is concatenated to the previous observation to form the
3333 new observation.
34- Prompts to the language model can be loaded when the environment is ``reset`` if the environment is created via :meth:`~from_dataloader`
34+
35+ By default, this environment is meant to track history for a prompt. Users can append transforms to tailor
36+ this to their use case, such as Chain of Thought (CoT) reasoning or other custom processing.
37+
38+ Users must append a transform to set the "done" condition, which would trigger the loading of the next prompt.
39+
40+ Prompts to the language model can be loaded when the environment is ``reset`` if the environment is created via :meth:`~from_dataloader`
41+
3542 Args:
3643 observation_key (NestedKey, optional): The key in the tensordict where the observation is stored. Defaults to
3744 ``"observation"``.
You can’t perform that action at this time.
0 commit comments