Update

Vincent Moens · Vincent Moens · commit 19cc931ffe0a · 2025-03-11T10:50:53.000Z
[ghstack-poisoned]
diff --git a/torchrl/modules/llm/transformers_policy.py b/torchrl/modules/llm/transformers_policy.py
@@ -2,12 +2,10 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
-# TODO: lazy imports
+from __future__ import annotations
 
 import torch
 
-import transformers
 from tensordict import NestedKey, TensorDictBase
 from tensordict.nn import (
     TensorDictModule as Mod,
@@ -17,7 +15,6 @@
 )
 from tensordict.tensorclass import NonTensorData, NonTensorStack
 from torchrl.data.llm import LLMData
-from transformers import AutoTokenizer, GPT2Config, GPT2LMHeadModel
 
 
 def _maybe_clear_device(td):
@@ -107,11 +104,12 @@ def log_probs_from_logits(td: TensorDictBase) -> TensorDictBase:
 
 
 def from_hf_transformers(
-    model: transformers.modeling_utils.PreTrainedModel,
+    model: transformers.modeling_utils.PreTrainedModel,  # noqa
     *,
     generate: bool = True,
     return_log_probs: bool = True,
-    tokenizer: transformers.tokenization_utils.PreTrainedTokenizer | None = None,
+    tokenizer: transformers.tokenization_utils.PreTrainedTokenizer
+    | None = None,  # noqa
     from_text: bool = False,
     device: torch.device | None = None,
     kwargs: dict | None = None,
@@ -404,6 +402,9 @@ def remove_input_seq(tokens_in, tokens_out):
 
 
 if __name__ == "__main__":
+    import transformers
+    from transformers import AutoTokenizer, GPT2Config, GPT2LMHeadModel
+
     max_seq_length = 50000
 
     tokenizer = AutoTokenizer.from_pretrained("gpt2")
diff --git a/torchrl/modules/llm/vllm_policy.py b/torchrl/modules/llm/vllm_policy.py
@@ -2,11 +2,12 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
 import collections
+import importlib.util
 
 import torch
-import transformers
-import vllm
 from tensordict import (
     from_dataclass,
     maybe_dense_stack,
@@ -22,9 +23,15 @@
 )
 
 from torchrl.data import LLMData
-from vllm import LLM, SamplingParams
 
-CompletionOutput_tc = from_dataclass(vllm.outputs.CompletionOutput)
+_has_vllm = importlib.util.find_spec("vllm")
+
+if _has_vllm:
+    import vllm
+
+    CompletionOutput_tc = from_dataclass(vllm.outputs.CompletionOutput)
+else:
+    CompletionOutput_tc = None
 
 
 def _maybe_clear_device(td):
@@ -43,10 +50,11 @@ def _maybe_set_device(td):
 
 
 def from_vllm(
-    model: LLM,
+    model: vllm.LLM,  # noqa
     *,
     return_log_probs: bool = False,
-    tokenizer: transformers.tokenization_utils.PreTrainedTokenizer | None = None,
+    tokenizer: transformers.tokenization_utils.PreTrainedTokenizer  # noqa
+    | None = None,  # noqa
     from_text: bool = False,
     device: torch.device | None = None,
     generate: bool = True,
@@ -386,6 +394,8 @@ def from_request_output(cls, requests):
 
 
 if __name__ == "__main__":
+    from vllm import LLM, SamplingParams
+
     prompts = [
         "Hello, my name is",
         "The president of the United States is",