Skip to content

Commit cf58b9c

Browse files
authored
[MISC] Remove model input dumping when exception (#12582)
Signed-off-by: Cody Yu <[email protected]>
1 parent 4797dad commit cf58b9c

File tree

4 files changed

+3
-128
lines changed

4 files changed

+3
-128
lines changed

.github/ISSUE_TEMPLATE/400-bug-report.yml

-9
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,6 @@ body:
3030
</details>
3131
validations:
3232
required: true
33-
- type: textarea
34-
attributes:
35-
label: Model Input Dumps
36-
description: |
37-
If you are facing crashing due to illegal memory access or other issues with model execution, vLLM may dump the problematic input of the model. In this case, you will see the message `Error in model execution (input dumped to /tmp/err_xxx.pkl)`. If you see this message, please zip the file (because GitHub doesn't support .pkl file format) and upload it here. This will help us to reproduce the issue and facilitate the debugging process.
38-
placeholder: |
39-
Upload the dumped input file.
40-
validations:
41-
required: false
4233
- type: textarea
4334
attributes:
4435
label: 🐛 Describe the bug

tests/basic_correctness/test_basic_correctness.py

-58
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,12 @@
44
Run `pytest tests/basic_correctness/test_basic_correctness.py`.
55
"""
66
import os
7-
import pickle
8-
import re
97
import weakref
10-
from unittest.mock import patch
118

129
import pytest
1310

1411
from vllm import LLM
1512
from vllm.platforms import current_platform
16-
from vllm.worker.model_runner import ModelInputForGPUWithSamplingMetadata
1713

1814
from ..conftest import VllmRunner
1915
from ..models.utils import check_outputs_equal
@@ -151,57 +147,3 @@ def test_models_distributed(
151147
name_0="hf",
152148
name_1="vllm",
153149
)
154-
155-
156-
@pytest.mark.skip_v1
157-
def test_model_with_failure(vllm_runner) -> None:
158-
try:
159-
with patch("vllm.model_executor.models.opt.OPTForCausalLM.forward",
160-
side_effect=ValueError()):
161-
with pytest.raises(ValueError) as exc_info:
162-
vllm_runner("facebook/opt-125m",
163-
dtype="half",
164-
enforce_eager=False,
165-
gpu_memory_utilization=0.7)
166-
matches = re.search(r"input dumped to (.+).pkl",
167-
str(exc_info.value))
168-
assert matches is not None
169-
filename = f"{matches.group(1)}.pkl"
170-
171-
with open(filename, "rb") as filep:
172-
inputs = pickle.load(filep)
173-
174-
if any(key not in inputs for key in ("arg_1", "arg_2", "arg_3")):
175-
raise AssertionError("Missing keys in dumped inputs. Dumped keys: "
176-
f"{list(inputs.keys())}")
177-
assert isinstance(inputs["arg_1"],
178-
ModelInputForGPUWithSamplingMetadata)
179-
finally:
180-
os.remove(filename)
181-
182-
183-
@pytest.mark.skip_v1
184-
def test_failure_with_async_out_proc(vllm_runner) -> None:
185-
186-
filename = None
187-
try:
188-
with vllm_runner("facebook/opt-125m",
189-
dtype="half",
190-
enforce_eager=False,
191-
gpu_memory_utilization=0.7) as vllm_model,\
192-
patch("vllm.model_executor.models.opt.OPTForCausalLM.forward",
193-
side_effect=ValueError()):
194-
model_config = vllm_model.model.llm_engine.model_config
195-
assert model_config.use_async_output_proc
196-
with pytest.raises(ValueError) as exc_info:
197-
vllm_model.generate_greedy('how to make pizza?', 250)
198-
matches = re.search(r"input dumped to (.+).pkl",
199-
str(exc_info.value))
200-
assert matches is not None
201-
202-
filename = f"{matches.group(1)}.pkl"
203-
finally:
204-
# Clean up
205-
if filename is not None:
206-
os.remove(filename)
207-
pass

vllm/worker/model_runner.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
_add_attn_metadata_broadcastable_dict,
5858
_add_sampling_metadata_broadcastable_dict,
5959
_init_attn_metadata_from_tensor_dict,
60-
_init_sampling_metadata_from_tensor_dict, dump_input_when_exception)
60+
_init_sampling_metadata_from_tensor_dict)
6161

6262
if TYPE_CHECKING:
6363
from vllm.attention.backends.abstract import AttentionBackend
@@ -1647,7 +1647,6 @@ def prepare_model_input(
16471647
virtual_engine=virtual_engine)
16481648

16491649
@torch.inference_mode()
1650-
@dump_input_when_exception(exclude_args=[0], exclude_kwargs=["self"])
16511650
def execute_model(
16521651
self,
16531652
model_input: ModelInputForGPUWithSamplingMetadata,

vllm/worker/model_runner_base.py

+2-59
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,12 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
import dataclasses
4-
import pickle
54
from abc import ABC, abstractmethod
6-
from datetime import datetime
7-
from functools import wraps
8-
from typing import (TYPE_CHECKING, Any, Dict, Generic, Iterable, List,
9-
Optional, Type, TypeVar)
5+
from typing import (TYPE_CHECKING, Any, Dict, Generic, List, Optional, Type,
6+
TypeVar)
107

118
import torch
129
import torch.nn as nn
13-
from torch import is_tensor
1410

1511
from vllm.config import VllmConfig
1612
from vllm.logger import init_logger
@@ -107,59 +103,6 @@ def _init_frozen_model_input_from_tensor_dict(
107103
return tensor_dict
108104

109105

110-
def dump_input_when_exception(exclude_args: Optional[List[int]] = None,
111-
exclude_kwargs: Optional[List[str]] = None):
112-
113-
def _inner(func):
114-
115-
@wraps(func)
116-
def _wrapper(*args, **kwargs):
117-
try:
118-
return func(*args, **kwargs)
119-
except Exception as err:
120-
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
121-
filename = f"/tmp/err_{func.__name__}_input_{timestamp}.pkl"
122-
logger.info("Writing input of failed execution to %s...",
123-
filename)
124-
with open(filename, "wb") as filep:
125-
dumped_inputs = {
126-
k: v
127-
for k, v in kwargs.items()
128-
if k not in (exclude_kwargs or [])
129-
}
130-
for i, arg in enumerate(args):
131-
if i not in (exclude_args or []):
132-
dumped_inputs[f"arg_{i}"] = arg
133-
134-
# Only persist dtype and shape for kvcache tensors
135-
# (can be way to big otherwise)
136-
if (kv_caches := dumped_inputs.get("kv_caches")) \
137-
and isinstance(kv_caches, Iterable):
138-
dumped_inputs["kv_caches"] = [(t.dtype, t.shape)
139-
for t in kv_caches
140-
if is_tensor(t)]
141-
142-
try:
143-
pickle.dump(dumped_inputs, filep)
144-
except Exception as pickle_err:
145-
logger.warning(
146-
"Failed to pickle inputs of failed execution: %s",
147-
str(pickle_err))
148-
raise type(err)(f"Error in model execution: "
149-
f"{str(err)}") from err
150-
151-
logger.info(
152-
"Completed writing input of failed execution to %s.",
153-
filename)
154-
raise type(err)(
155-
f"Error in model execution (input dumped to {filename}): "
156-
f"{str(err)}") from err
157-
158-
return _wrapper
159-
160-
return _inner
161-
162-
163106
class BroadcastableModelInput(ABC):
164107

165108
@abstractmethod

0 commit comments

Comments
 (0)