File tree 2 files changed +9
-2
lines changed
2 files changed +9
-2
lines changed Original file line number Diff line number Diff line change 60
60
from torchrl .envs .libs .dm_control import _has_dmc , DMControlEnv
61
61
from torchrl .envs .libs .gym import _has_gym , gym_backend , GymEnv , GymWrapper
62
62
from torchrl .envs .transforms import Compose , StepCounter , TransformedEnv
63
+ from torchrl .envs .transforms .rlhf import as_padded_tensor
63
64
from torchrl .envs .transforms .transforms import (
64
65
AutoResetEnv ,
65
66
AutoResetTransform ,
66
67
Tokenizer ,
67
68
Transform ,
68
69
UnsqueezeTransform ,
69
70
)
70
- from torchrl .envs .transforms .rlhf import as_padded_tensor
71
71
from torchrl .envs .utils import (
72
72
_StepMDP ,
73
73
_terminated_or_truncated ,
Original file line number Diff line number Diff line change @@ -31,7 +31,14 @@ class LLMEnv(EnvBase):
31
31
integers representing a sequence of tokens.
32
32
The action is also a string or a tensor of integers, which is concatenated to the previous observation to form the
33
33
new observation.
34
- Prompts to the language model can be loaded when the environment is ``reset`` if the environment is created via :meth:`~from_dataloader`
34
+
35
+ By default, this environment is meant to track history for a prompt. Users can append transforms to tailor
36
+ this to their use case, such as Chain of Thought (CoT) reasoning or other custom processing.
37
+
38
+ Users must append a transform to set the "done" condition, which would trigger the loading of the next prompt.
39
+
40
+ Prompts to the language model can be loaded when the environment is ``reset`` if the environment is created via :meth:`~from_dataloader`
41
+
35
42
Args:
36
43
observation_key (NestedKey, optional): The key in the tensordict where the observation is stored. Defaults to
37
44
``"observation"``.
You can’t perform that action at this time.
0 commit comments