Skip to content

Commit 2caa7c7

Browse files
committed
formatted test_vllm_collector_multigpu
1 parent 44ec9ce commit 2caa7c7

File tree

1 file changed

+57
-49
lines changed

1 file changed

+57
-49
lines changed

ding/worker/collector/tests/test_vllm_collector__multigpu.py renamed to ding/worker/collector/tests/test_vllm_collector_multigpu.py

Lines changed: 57 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77

88
class VllmActor:
9-
def __init__(self, model_path: str,mm_processor_kwargs: dict,free_gpus:list) -> None:
9+
10+
def __init__(self, model_path: str, mm_processor_kwargs: dict, free_gpus: list) -> None:
1011
"""
1112
Overview:
1213
Initialize the vLLM actor. For more details, please refer to https://docs.vllm.ai/en/stable.
@@ -19,7 +20,7 @@ def __init__(self, model_path: str,mm_processor_kwargs: dict,free_gpus:list) ->
1920
# Set CUDA_VISIBLE_DEVICES to use only free GPUs
2021
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, self.free_gpus))
2122
self.model_path = model_path
22-
self.mm_processor_kwargs=mm_processor_kwargs
23+
self.mm_processor_kwargs = mm_processor_kwargs
2324
self._initialize()
2425

2526
def _initialize(self) -> None:
@@ -58,7 +59,7 @@ async def generate(self, prompt, num_samples: int, max_tokens: int, temperature:
5859
max_tokens=max_tokens,
5960
temperature=temperature,
6061
)
61-
62+
6263
# Using async iterator to handle vLLM's generation process
6364
# 1. vLLM's generate method is asynchronous to prevent blocking while waiting for model outputs
6465
# 2. async for allows streaming the generated outputs incrementally instead of waiting for all results
@@ -77,11 +78,17 @@ class HuggingFaceModelGenerator:
7778
A LLM/VLM generator that uses Hugging Face models with vLLM as the backend.
7879
"""
7980

80-
def __init__(self, model_path: str, free_gpus:list,
81-
max_tokens: int = 1024, temperature: float = 0, mm_processor_kwargs:dict = {
81+
def __init__(
82+
self,
83+
model_path: str,
84+
free_gpus: list,
85+
max_tokens: int = 1024,
86+
temperature: float = 0,
87+
mm_processor_kwargs: dict = {
8288
"min_pixels": 28 * 28,
8389
"max_pixels": 1280 * 28 * 28,
84-
}) -> None:
90+
}
91+
) -> None:
8592
"""
8693
Overview:
8794
Initialize the Hugging Face model generator.
@@ -90,14 +97,14 @@ def __init__(self, model_path: str, free_gpus:list,
9097
- max_tokens (int): The maximum number of tokens to generate, default to 1024.
9198
- temperature (float): The temperature for the language model, default to 0.
9299
"""
93-
self.vllm_actor = VllmActor(model_path,mm_processor_kwargs,free_gpus)
100+
self.vllm_actor = VllmActor(model_path, mm_processor_kwargs, free_gpus)
94101
self.max_tokens = max_tokens
95102
self.temperature = temperature
96103

97104
async def generate(
98-
self,
99-
prompt,
100-
num_samples: int,
105+
self,
106+
prompt,
107+
num_samples: int,
101108
) -> List[Tuple[str, float]]:
102109
"""
103110
Overview:
@@ -114,11 +121,8 @@ async def generate(
114121
response = await self.vllm_actor.generate(prompt, num_samples, self.max_tokens, self.temperature)
115122
# Use raw logprobs as confidence scores
116123
confidence_scores = [x.cumulative_logprob for x in response.outputs]
117-
return [
118-
(x.text.strip(), conf)
119-
for x, conf in zip(response.outputs, confidence_scores)
120-
]
121-
124+
return [(x.text.strip(), conf) for x, conf in zip(response.outputs, confidence_scores)]
125+
122126

123127
def get_free_gpus() -> List[int]:
124128
"""
@@ -144,7 +148,8 @@ def get_free_gpus() -> List[int]:
144148
logger.warning("Failed to get GPU stats, defaulting to GPU 0")
145149
return [0]
146150

147-
def chunk_list(original_list:list, t:int) -> List[list]:
151+
152+
def chunk_list(original_list: list, t: int) -> List[list]:
148153
# chunk the list into sub_lists
149154
new_list = [original_list[i:i + t] for i in range(0, len(original_list), t)]
150155
return new_list
@@ -156,12 +161,15 @@ def chunk_list(original_list:list, t:int) -> List[list]:
156161
from vllm.assets.image import ImageAsset
157162
from enum import Enum
158163
import concurrent.futures
164+
165+
159166
class Modality(Enum):
160167
IMAGE = "image"
161168
TEXT = "text"
162169
VIDEO = "video"
163170

164-
def get_prompts_qwen(questions: list, modality: Modality) -> Tuple[List[str],Optional[List[int]]]:
171+
172+
def get_prompts_qwen(questions: list, modality: Modality) -> Tuple[List[str], Optional[List[int]]]:
165173
if modality == Modality.IMAGE:
166174
placeholder = "<|image_pad|>"
167175
elif modality == Modality.VIDEO:
@@ -179,7 +187,7 @@ def get_prompts_qwen(questions: list, modality: Modality) -> Tuple[List[str],Opt
179187
) for question in questions
180188
]
181189
stop_token_ids = None
182-
return prompts,stop_token_ids
190+
return prompts, stop_token_ids
183191

184192

185193
def get_multi_modal_input(modality: Modality, filenames: list, questions: list) -> dict:
@@ -205,11 +213,11 @@ def get_multi_modal_input(modality: Modality, filenames: list, questions: list)
205213
return ret
206214

207215

208-
async def run_vllm_collector(gpu_id:int, prompts:List, model_path:str,temperature:float) ->List[str]:
216+
async def run_vllm_collector(gpu_id: int, prompts: List, model_path: str, temperature: float) -> List[str]:
209217
# set visible gpu
210218
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
211219
# get a model on a single gpu
212-
model = HuggingFaceModelGenerator(model_path,free_gpus=[gpu_id],temperature=temperature)
220+
model = HuggingFaceModelGenerator(model_path, free_gpus=[gpu_id], temperature=temperature)
213221

214222
responses_list = []
215223
for prompt in prompts:
@@ -220,21 +228,25 @@ async def run_vllm_collector(gpu_id:int, prompts:List, model_path:str,temperatur
220228

221229
return responses_list
222230

231+
223232
import asyncio
224-
def start_collector(gpu_id:int, prompts:list, model_path:str,temperature:float) ->List[str]:
225-
# event loop in a process
226-
results = asyncio.run(run_vllm_collector(gpu_id, prompts, model_path,temperature))
233+
234+
235+
def start_collector(gpu_id: int, prompts: list, model_path: str, temperature: float) -> List[str]:
236+
# event loop in a process
237+
results = asyncio.run(run_vllm_collector(gpu_id, prompts, model_path, temperature))
227238
return results
228239

229-
def main(prompts:list, model_path:str, free_gpus:List[int],temperature:float) -> None:
230-
num_tot=len(prompts)
231-
num_gpu=len(free_gpus)
232-
num_per_gpu=num_tot//num_gpu
233-
prompts_per_gpu=chunk_list(prompts,num_per_gpu)
240+
241+
def main(prompts: list, model_path: str, free_gpus: List[int], temperature: float) -> None:
242+
num_tot = len(prompts)
243+
num_gpu = len(free_gpus)
244+
num_per_gpu = num_tot // num_gpu
245+
prompts_per_gpu = chunk_list(prompts, num_per_gpu)
234246
with concurrent.futures.ProcessPoolExecutor(max_workers=len(free_gpus)) as executor:
235247
futures = []
236-
for gpu_id,prompts_gpu in zip(free_gpus,prompts_per_gpu):
237-
futures.append(executor.submit(start_collector, gpu_id, prompts_gpu, model_path,temperature))
248+
for gpu_id, prompts_gpu in zip(free_gpus, prompts_per_gpu):
249+
futures.append(executor.submit(start_collector, gpu_id, prompts_gpu, model_path, temperature))
238250

239251
# get all results
240252
all_results = []
@@ -245,23 +257,19 @@ def main(prompts:list, model_path:str, free_gpus:List[int],temperature:float) -
245257
with open("/mnt/afs/wangqijian/tests/vllm_multi_gpu.txt", "w") as f:
246258
for response in all_results:
247259
f.write(f"{response}\n")
248-
249-
250260

251261

252262
if __name__ == "__main__":
253-
questions=['Please describe the image.','Please describe the image.',
254-
'What\'s the text in the image?','What\'s the text in the image?',
255-
'What is in the image?','What is in the image?',
256-
'How many people are in the image?','How many people are in the image?',
257-
'What is the emotion of the main character of the image?',
258-
'What is the emotion of the main character of the image?',
259-
'How many animals are in the image?',
260-
'How many animals are in the image?',
261-
'What is the place of the image?','What is the place of the image?',
262-
'What is the peroson doing?','What is the peroson doing?'
263-
]
264-
img_names=[
263+
questions = [
264+
'Please describe the image.', 'Please describe the image.', 'What\'s the text in the image?',
265+
'What\'s the text in the image?', 'What is in the image?', 'What is in the image?',
266+
'How many people are in the image?', 'How many people are in the image?',
267+
'What is the emotion of the main character of the image?',
268+
'What is the emotion of the main character of the image?', 'How many animals are in the image?',
269+
'How many animals are in the image?', 'What is the place of the image?', 'What is the place of the image?',
270+
'What is the peroson doing?', 'What is the peroson doing?'
271+
]
272+
img_names = [
265273
'/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2127)',
266274
'/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5394)',
267275
'/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(1160)',
@@ -278,13 +286,13 @@ def main(prompts:list, model_path:str, free_gpus:List[int],temperature:float) -
278286
'/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2284)',
279287
'/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4533)',
280288
'/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5495)'
281-
]
282-
free_gpus=get_free_gpus()
289+
]
290+
free_gpus = get_free_gpus()
283291
modality = Modality.IMAGE
284292
mm_input = get_multi_modal_input(modality, img_names, questions)
285293
data = mm_input["data"]
286294
question = mm_input["question"]
287295
prompts, stop_token_ids = get_prompts_qwen(question, modality)
288-
model_path='/mnt/afs/share/Qwen2-VL-7B'
289-
temperature=0.5
290-
main(prompts,model_path,free_gpus,temperature)
296+
model_path = '/mnt/afs/share/Qwen2-VL-7B'
297+
temperature = 0.5
298+
main(prompts, model_path, free_gpus, temperature)

0 commit comments

Comments
 (0)