Skip to content

Commit 6dd5e52

Browse files
authored
Squelch MLA warning for Compressed-Tensors Models (#12704)
Signed-off-by: Kyle Sayers <[email protected]>
1 parent c11de33 commit 6dd5e52

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

vllm/config.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -986,6 +986,9 @@ def is_cross_encoder(self) -> bool:
986986

987987
@property
988988
def use_mla(self) -> bool:
989+
if not self.is_deepseek_mla or envs.VLLM_MLA_DISABLE:
990+
return False
991+
989992
if self.quantization is not None and self.quantization not in [\
990993
"fp8", "compressed-tensors"]:
991994
logger.warning(
@@ -1012,8 +1015,7 @@ def use_mla(self) -> bool:
10121015
quant_config)
10131016
return False
10141017

1015-
use_mla = (self.is_deepseek_mla and not envs.VLLM_MLA_DISABLE)
1016-
return use_mla
1018+
return True
10171019

10181020
@property
10191021
def supported_runner_types(self) -> Set[RunnerType]:

0 commit comments

Comments
 (0)