Skip to content

Commit 4cb6fa0

Browse files
wallashssjoerunde
andauthoredFeb 26, 2025··
[Bugfix] Backend option to disable xgrammar any_whitespace (#12744)
Signed-off-by: Wallas Santos <wallashss@ibm.com> Signed-off-by: Joe Runde <Joseph.Runde@ibm.com> Co-authored-by: Joe Runde <Joseph.Runde@ibm.com>
1 parent d08b285 commit 4cb6fa0

File tree

3 files changed

+88
-3
lines changed

3 files changed

+88
-3
lines changed
 

‎tests/entrypoints/llm/test_guided_generate.py

+54
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import jsonschema
88
import pytest
9+
from pydantic import BaseModel
910

1011
from vllm.distributed import cleanup_dist_env_and_memory
1112
from vllm.entrypoints.llm import LLM
@@ -322,3 +323,56 @@ def test_guided_json_object(llm, guided_decoding_backend: str):
322323
# Parse to verify it is valid JSON
323324
parsed_json = json.loads(generated_text)
324325
assert isinstance(parsed_json, dict)
326+
327+
328+
@pytest.mark.skip_global_cleanup
329+
def test_json_with_any_whitespace_disabled(llm):
330+
331+
class ResponseSchema(BaseModel):
332+
clarifying_question: str
333+
cost_per_serving: str
334+
calories: str
335+
type_dish_ids: str
336+
type_meal_ids: str
337+
product_ids: list[str]
338+
exclude_product_ids: list[str]
339+
allergen_ids: list[str]
340+
total_cooking_time: str
341+
kitchen_ids: str
342+
holiday_ids: str
343+
344+
# Note: Without this setting, the response is sometimes full of `\n`
345+
# for some models. This option prevents that.
346+
guided_decoding_backend = 'xgrammar:disable-any-whitespace'
347+
348+
schema = ResponseSchema.model_json_schema()
349+
guided_params = GuidedDecodingParams(json=schema,
350+
backend=\
351+
guided_decoding_backend)
352+
sampling_params = SamplingParams(max_tokens=2000,
353+
frequency_penalty=0,
354+
presence_penalty=-1.1,
355+
repetition_penalty=1.3,
356+
guided_decoding=guided_params)
357+
358+
prompt = ("<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You"
359+
"are a helpful assistant.<|im_end|>\n<|im_start|>user\nI want a "
360+
"quick launch fast with $10.<|im_end|>\n<|im_start|>assistant\n")
361+
outputs = llm.generate(prompts=prompt,
362+
sampling_params=sampling_params,
363+
use_tqdm=True)
364+
365+
assert outputs is not None
366+
367+
for output in outputs:
368+
assert output is not None
369+
assert isinstance(output, RequestOutput)
370+
371+
generated_text = output.outputs[0].text
372+
assert generated_text is not None
373+
assert "\n" not in generated_text
374+
375+
# Parse to verify it is valid JSON
376+
parsed_json = json.loads(generated_text)
377+
assert isinstance(parsed_json, dict)
378+
jsonschema.validate(instance=parsed_json, schema=schema)

‎vllm/engine/arg_utils.py

+1
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
385385
'Backend-specific options can be supplied in a comma-separated '
386386
'list following a colon after the backend name. Valid backends and '
387387
'all available options are: [xgrammar:no-fallback, '
388+
'xgrammar:disable-any-whitespace, '
388389
'outlines:no-fallback, lm-format-enforcer:no-fallback]')
389390
parser.add_argument(
390391
'--logits-processor-pattern',

‎vllm/model_executor/guided_decoding/xgrammar_decoding.py

+33-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
xgr_installed = False
2020
pass
2121

22+
from vllm.logger import init_logger
2223
from vllm.model_executor.guided_decoding.utils import (convert_lark_to_gbnf,
2324
grammar_is_likely_lark)
2425
from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
@@ -29,6 +30,8 @@
2930
from vllm.config import ModelConfig
3031
from vllm.sampling_params import GuidedDecodingParams
3132

33+
logger = init_logger(__name__)
34+
3235

3336
# TODO: passing batch size to max threads here
3437
def get_local_xgrammar_guided_decoding_logits_processor(
@@ -161,6 +164,7 @@ class GrammarConfig:
161164
json_str: str | None = None
162165
grammar_str: str | None = None
163166
json_object: bool | None = None
167+
any_whitespace: bool = True
164168
max_threads: int = 8
165169
tokenizer_data: TokenizerData | None = None
166170

@@ -180,19 +184,42 @@ def from_guided_params(cls,
180184
else:
181185
json_str = guided_params.json
182186

187+
any_whitespace = 'disable-any-whitespace' not in \
188+
guided_params.backend_options()
189+
190+
# Check and log if model with xgrammar and whitespace have history
191+
# of runaway generation of whitespaces.
192+
# References:
193+
# https://github.com/vllm-project/vllm/pull/12744
194+
# https://github.com/mlc-ai/xgrammar/issues/212
195+
model_with_warn = None
196+
197+
if 'Mistral' in model_config.model:
198+
model_with_warn = 'Mistral'
199+
elif 'Qwen' in model_config.model:
200+
model_with_warn = 'Qwen'
201+
202+
if model_with_warn is not None and any_whitespace:
203+
msg = (f"{model_with_warn} "
204+
f"model detected, consider set "
205+
f"`guided_backend=xgrammar:disable-any-whitespace` "
206+
f"to prevent runaway generation of whitespaces.")
207+
logger.info_once(msg)
183208
# Validate the schema and raise ValueError here if it is invalid.
184209
# This is to avoid exceptions in model execution, which will crash
185210
# the engine worker process.
186211
try:
187-
xgr.Grammar.from_json_schema(json_str)
212+
xgr.Grammar.from_json_schema(json_str,
213+
any_whitespace=any_whitespace)
188214
except RuntimeError as err:
189215
raise ValueError(str(err)) from err
190216

191217
return cls(json_str=json_str,
192218
vocab_size=model_config.hf_text_config.vocab_size,
193219
tokenizer_hash=tokenizer_hash,
194220
max_threads=max_threads,
195-
tokenizer_data=tokenizer_data)
221+
tokenizer_data=tokenizer_data,
222+
any_whitespace=any_whitespace)
196223
elif guided_params.grammar:
197224
# XGrammar only supports GBNF grammars, so we must convert Lark
198225
if grammar_is_likely_lark(guided_params.grammar):
@@ -290,7 +317,10 @@ def _ensure_ctx(self):
290317
if self.ctx is None:
291318
compiler = GrammarCompilerCache.get_compiler(self.config)
292319
if self.config.json_str is not None:
293-
self.ctx = compiler.compile_json_schema(self.config.json_str)
320+
any_whitespace = self.config.any_whitespace
321+
self.ctx = compiler\
322+
.compile_json_schema(self.config.json_str,
323+
any_whitespace=any_whitespace)
294324
elif self.config.grammar_str is not None:
295325
self.ctx = compiler.compile_grammar(self.config.grammar_str)
296326
elif self.config.json_object:

0 commit comments

Comments
 (0)
Please sign in to comment.