feature(nyz): add vllm collector interface definition

PaParaZz1 · PaParaZz1 · commit 3b7903a4de59 · 2025-02-07T13:15:40.000+08:00
diff --git a/ding/worker/collector/vllm_collector.py b/ding/worker/collector/vllm_collector.py
@@ -1,8 +1,16 @@
-from typing import List, Tuple
+from typing import List, Tuple, Optional, Any
 import os
 import uuid
+import asyncio
+import numpy as np
 from loguru import logger
+from easydict import EasyDict
 from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams, RequestOutput
+from transformers import AutoTokenizer
+
+from ding.utils.data.rlhf_online_dataset import OnlineRLDataset
+from ding.utils import SERIAL_COLLECTOR_REGISTRY
+from .base_serial_collector import ISerialCollector
 
 
 class VllmActor:
@@ -145,3 +153,156 @@ async def generate(
         # Use raw logprobs as confidence scores
         confidence_scores = [x.cumulative_logprob for x in response.outputs]
         return [(x.text.strip(), conf) for x, conf in zip(response.outputs, confidence_scores)]
+
+
+@SERIAL_COLLECTOR_REGISTRY.register('vllm')
+class VllmCollector(ISerialCollector):
+    """
+    Overview:
+        Collector implementation for vLLM-based language models (LLM/VLM).
+        This collector manages the interaction with vLLM models for text generation tasks.
+    """
+    config = dict(
+        # (str) LLM/VLM model path
+        model_path='',
+        # (int) Maximum number of tokens to generate per request
+        max_tokens=1024,
+        # (float) Temperature for sampling, 0 means greedy decoding
+        temperature=0.0,
+        # (dict) Multimodal processor kwargs for vision-language models
+        mm_processor_kwargs={
+            "min_pixels": 28 * 28,
+            "max_pixels": 1280 * 28 * 28,
+        },
+        # Dataset related configs
+        # (str) Key to access the input data in the dataset
+        input_key='input',
+        # (bool) Whether to apply a chat template to the input
+        apply_chat_template=False,
+        # (str) Template for the input
+        input_template=None,
+        # (bool) Whether to shuffle the dataset
+        shuffle=True,
+    )
+
+    def __init__(self, cfg: EasyDict) -> None:
+        """
+        Overview:
+            Initialize the VllmCollector with configuration.
+        Arguments:
+            - cfg (:obj:`EasyDict`): Configuration for the collector including model path, generation parameters,
+              and dataset configuration
+        """
+        super().__init__()
+        self._cfg = cfg
+        self._envstep = 0
+
+        # Initialize the tokenizer and dataset
+        self._tokenizer = AutoTokenizer.from_pretrained(cfg.model_path)
+        self._dataset = OnlineRLDataset(
+            dataset=cfg.dataset,
+            tokenizer=self._tokenizer,
+            input_key=cfg.input_key,
+            apply_chat_template=cfg.apply_chat_template,
+            input_template=cfg.input_template,
+        )
+
+        self._model = VllmActor(model_path=cfg.model_path, mm_processor_kwargs=cfg.mm_processor_kwargs)
+        self.reset()
+
+    def reset(self) -> None:
+        """
+        Overview:
+            Reset the collector, including the dataset index.
+        """
+        self._index = np.arange(len(self._dataset))
+        if self._cfg.shuffle:
+            np.random.shuffle(self._index)
+
+    def reset_policy(self, _model: Optional[str] = None) -> None:
+        """
+        Overview:
+            Since LLM generation does not require a explicit policy and env, this function is empty.
+        """
+        pass
+
+    def reset_env(self, _env: Optional[Any] = None) -> None:
+        """
+        Overview:
+            Since LLM generation does not require a explicit policy and env, this function is empty.
+        """
+        pass
+
+    def collect(
+            self,
+            n_samples: int = 100,
+            num_samples_per_prompt: int = 1,
+            train_iter: int = 0,
+    ) -> List[Tuple[str, float]]:
+        """
+        Overview:
+            Collect generated responses from the vLLM model.
+        Arguments:
+            - n_samples (:obj:`int`): Number of prompts to generate.
+            - num_samples_per_prompt (:obj:`int`): Number of samples to generate per prompt.
+            - train_iter (:obj:`int`): Current training iteration, used for logging.
+        Returns:
+            - responses (:obj:`List[Tuple[str, float]]`): List of (generated_text, confidence_score) pairs
+        """
+        if self._model is None:
+            raise RuntimeError("Model not initialized. Call `reset` method first.")
+
+        prompt = self._dataset[self._index[:n_samples]]
+        # recusively update the index
+        self._index = self._index[n_samples:] + self._index[:n_samples]
+
+        self._envstep += n_samples
+
+        # Get the current event loop or create a new one
+        try:
+            loop = asyncio.get_event_loop()
+        except RuntimeError:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+
+        # Run the async generate method in the event loop
+        return loop.run_until_complete(
+            self._model.generate(
+                prompt=prompt,
+                num_samples=num_samples_per_prompt,
+                max_tokens=self._cfg.max_tokens,
+                temperature=self._cfg.temperature
+            )
+        )
+
+    @property
+    def envstep(self) -> int:
+        """
+        Overview:
+            Get the current environment step count.
+        Returns:
+            - count (:obj:`int`): Current environment step count
+        """
+        return self._envstep
+
+    @envstep.setter
+    def envstep(self, value: int) -> None:
+        """
+        Overview:
+            Set the current environment step count.
+        """
+        self._envstep = value
+
+    def close(self) -> None:
+        """
+        Overview:
+            Close the collector.
+        """
+        pass
+
+    def __del__(self) -> None:
+        """
+        Overview:
+            Destructor for the collector.
+        """
+        self.close()