@@ -31,7 +31,7 @@ class TokenizedDatasetLoader:
31
31
max_length (int): the maximum sequence length.
32
32
dataset_name (str): the name of the dataset.
33
33
tokenizer_fn (callable): the tokeinizing method constructor, such as
34
- :class:`torchrl.data.rlhf .TensorDictTokenizer`. When called,
34
+ :class:`torchrl.data.llm .TensorDictTokenizer`. When called,
35
35
it should return a :class:`tensordict.TensorDict` instance
36
36
or a dictionary-like structure with the tokenized data.
37
37
pre_tokenization_hook (callable, optional): called on
@@ -62,8 +62,8 @@ class TokenizedDatasetLoader:
62
62
The dataset will be stored in ``<root_dir>/<split>/<max_length>/``.
63
63
64
64
Examples:
65
- >>> from torchrl.data.rlhf import TensorDictTokenizer
66
- >>> from torchrl.data.rlhf .reward import pre_tokenization_hook
65
+ >>> from torchrl.data.llm import TensorDictTokenizer
66
+ >>> from torchrl.data.llm .reward import pre_tokenization_hook
67
67
>>> split = "train"
68
68
>>> max_length = 550
69
69
>>> dataset_name = "CarperAI/openai_summarize_comparisons"
@@ -359,7 +359,7 @@ def get_dataloader(
359
359
Defaults to ``max(os.cpu_count() // 2, 1)``.
360
360
361
361
Examples:
362
- >>> from torchrl.data.rlhf .reward import PairwiseDataset
362
+ >>> from torchrl.data.llm .reward import PairwiseDataset
363
363
>>> dataloader = get_dataloader(
364
364
... batch_size=256, block_size=550, tensorclass_type=PairwiseDataset, device="cpu")
365
365
>>> for d in dataloader:
0 commit comments