[Feature] Allow using lists of tensors in vllm instead of padded tensors

Vincent Moens · Vincent Moens · commit ed7512ffd371 · 2025-03-19T16:32:36.000-07:00
ghstack-source-id: c037e99 Pull Request resolved: #2861
diff --git a/test/test_actors.py b/test/test_actors.py
@@ -10,7 +10,7 @@
 
 import pytest
 import torch
-from tensordict import NonTensorStack, TensorDict
+from tensordict import LazyStackedTensorDict, NonTensorStack, TensorDict
 from tensordict.nn import CompositeDistribution, TensorDictModule
 from tensordict.nn.distributions import NormalParamExtractor
 
@@ -1122,6 +1122,8 @@ def _run_check(
 
         # If from text and not generating, the tokens are not returned for now
         if not (from_text and not generate):
+            assert td.tokens_response is not None
+            assert td.tokens is not None
             assert td.tokens_response.shape[:-1] == td.tokens.shape[:-1]
             # The convention is that the response only has new tokens
             assert (
@@ -1166,26 +1168,34 @@ def test_from_hf_logprobs(self, from_text, tokens, attention_mask):
         )
 
     @pytest.mark.parametrize(
-        "from_text, tokens, attention_mask",
+        "pad_output, from_text, tokens, attention_mask",
         [
-            (True, None, None),
+            (True, True, None, None),
+            (False, True, None, None),
             (
+                True,
                 False,
                 torch.randint(1024, (1, 10)),
                 torch.ones(1, 10, dtype=torch.int64),
             ),
-            (False, torch.randint(1024, (1, 10)), None),
+            (True, False, torch.randint(1024, (1, 10)), None),
         ],
     )
-    def test_from_vllm_logprobs(self, from_text, tokens, attention_mask):
+    def test_from_vllm_logprobs(self, from_text, tokens, attention_mask, pad_output):
         torch.manual_seed(0)
         from vllm import LLM
 
         model = LLM(model="facebook/opt-125m")
         m_generate = from_vllm(
-            model, from_text=from_text, generate=True, return_log_probs=True
+            model,
+            from_text=from_text,
+            generate=True,
+            return_log_probs=True,
+            pad_output=pad_output,
+        )
+        m_logprobs = from_vllm(
+            model, from_text=from_text, generate=False, pad_output=pad_output
         )
-        m_logprobs = from_vllm(model, from_text=from_text, generate=False)
         self._check_lps(
             m_generate, m_logprobs, tokens, attention_mask, from_text, has_logits=False
         )
@@ -1221,6 +1231,86 @@ def _check_lps(
             td_generate.log_probs, td_logprobs.log_probs, rtol=1e-2, atol=1e-2
         )
 
+    @pytest.fixture(scope="module")
+    def llm_model(self):
+        import vllm
+
+        llm_model = vllm.LLM("gpt2")
+        tokenizer = llm_model.get_tokenizer()
+        tokenizer.pad_token = tokenizer.eos_token
+        return llm_model
+
+    @pytest.mark.parametrize("pad", [True, False])
+    @pytest.mark.parametrize("generate", [True, False])
+    @pytest.mark.parametrize("use_tensorclass", [True, False])
+    def test_vllm_batch_run(self, pad, generate, use_tensorclass, llm_model):
+        # Test generate - padding combinations
+        policy = from_vllm(
+            llm_model,
+            from_text=True,
+            generate=generate,
+            return_log_probs=True,
+            pad_output=pad,
+            generate_kwargs={"max_tokens": 10000},
+        )
+        if generate:
+            data = LazyStackedTensorDict(
+                *TensorDict(
+                    text=NonTensorStack("a string", "another very long string"),
+                    batch_size=[2],
+                ).unbind(0)
+            )
+        else:
+            data = LazyStackedTensorDict(
+                *TensorDict(
+                    text=NonTensorStack("a string", "another very long string"),
+                    text_response=NonTensorStack(
+                        " is a string", " is still a very long string"
+                    ),
+                    batch_size=[2],
+                ).unbind(0)
+            )
+        if use_tensorclass:
+            data = LLMData.from_tensordict(data)
+        output = policy(data)
+        try:
+            log_probs = output.get("log_probs")
+        except Exception:
+            log_probs = output.get("log_probs", as_list=True)
+        if pad:
+            assert isinstance(log_probs, torch.Tensor)
+        else:
+            assert isinstance(log_probs, list)
+        text = output.get("text", as_list=True)
+        # TODO: this is not ideal...
+        if use_tensorclass:
+            assert isinstance(text, list)
+        else:
+            assert isinstance(text, NonTensorStack)
+        text_response = output.get("text_response", as_list=True)
+        if use_tensorclass:
+            assert isinstance(text_response, list)
+        else:
+            assert isinstance(text_response, NonTensorStack)
+        try:
+            tokens_response = output.get("tokens_response")
+        except Exception:
+            tokens_response = output.get("tokens_response", as_list=True)
+        if pad:
+            assert isinstance(tokens_response, torch.Tensor)
+        else:
+            assert isinstance(tokens_response, list)
+        try:
+            tokens = output.get("tokens")
+        except Exception:
+            tokens = output.get("tokens", as_list=True)
+        if not generate:
+            assert tokens is None
+        elif pad:
+            assert isinstance(tokens, torch.Tensor), tokens
+        else:
+            assert isinstance(tokens, list)
+
 
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
diff --git a/torchrl/envs/utils.py b/torchrl/envs/utils.py
@@ -508,6 +508,7 @@ def _set_single_key(
     if isinstance(key, str):
         key = (key,)
     for k in key:
+        # TODO: we can do better than try/except by leveraging the as_list / as_nested_tensor feature
         try:
             val = source._get_str(k, None)
             if is_tensor_collection(val):
@@ -528,7 +529,7 @@ def _set_single_key(
         # This is a temporary solution to understand if a key is heterogeneous
         # while not having performance impact when the exception is not raised
         except RuntimeError as err:
-            if re.match(r"Found more than one unique shape in the tensors", str(err)):
+            if re.match(r"Failed to stack tensors within a tensordict", str(err)):
                 # this is a het key
                 for s_td, d_td in zip(source.tensordicts, dest.tensordicts):
                     _set_single_key(s_td, d_td, k, clone=clone, device=device)
@@ -541,6 +542,7 @@ def _set(source, dest, key, total_key, excluded):
     total_key = total_key + (key,)
     non_empty = False
     if unravel_key(total_key) not in excluded:
+        # TODO: we can do better than try/except by leveraging the as_list / as_nested_tensor feature
         try:
             val = source.get(key)
             if is_tensor_collection(val) and not isinstance(
@@ -571,7 +573,7 @@ def _set(source, dest, key, total_key, excluded):
         # This is a temporary solution to understand if a key is heterogeneous
         # while not having performance impact when the exception is not raised
         except RuntimeError as err:
-            if re.match(r"Found more than one unique shape in the tensors", str(err)):
+            if re.match(r"Failed to stack tensors within a tensordict", str(err)):
                 # this is a het key
                 non_empty_local = False
                 for s_td, d_td in zip(source.tensordicts, dest.tensordicts):
diff --git a/torchrl/modules/llm/vllm_policy.py b/torchrl/modules/llm/vllm_policy.py