[Trt-llm] always perform verbose dump of defaults for forward compatibility #1338
GitHub Actions / JUnit Test Report
failed
Jan 24, 2025 in 0s
465 tests run, 462 passed, 2 skipped, 1 failed.
Annotations
Check failure on line 471 in .venv/lib/python3.9/site-packages/libcst/_parser/tests/test_config.py
github-actions / JUnit Test Report
test_config.test_to_dict_trtllm[False-True]
AssertionError: assert ({'build_comma...ata': {}, ...} == {'build_comma...ata': {}, ...}
Omitting 10 identical items, use -vv to show
Differing items:
{'trt_llm': {'build': {'base_model': 'llama', 'checkpoint_repository': {'repo': 'meta/llama4-500B', 'revision': None, ...ict', 'enable_chunked_context': True, 'kv_cache_free_gpu_mem_fraction': 0.9, 'kv_cache_host_memory_bytes': None, ...}}} != {'trt_llm': {'build': {'base_model': 'llama', 'checkpoint_repository': {'repo': 'meta/llama4-500B', 'source': 'HF'}, 'gather_all_token_logits': False, 'max_batch_size': 512, ...}, 'runtime': {}}}
Full diff:
{
'build_commands': [],
'environment_variables': {},
'external_package_dirs': [],
'model_metadata': {},
'model_name': None,
'python_version': 'py39',
'requirements': [],
'resources': {'accelerator': 'L4',
'cpu': '1',
'memory': '24Gi',
'use_gpu': True},
'secrets': {},
'system_packages': [],
'trt_llm': {'build': {'base_model': 'llama',
'checkpoint_repository': {'repo': 'meta/llama4-500B',
+ 'revision': None,
'source': 'HF'},
'gather_all_token_logits': False,
'max_batch_size': 512,
+ 'max_beam_width': 1,
+ 'max_num_tokens': 8192,
+ 'max_prompt_embedding_table_size': 0,
- 'max_seq_len': 2048},
? -
+ 'max_seq_len': 2048,
- 'runtime': {}},
+ 'num_builder_gpus': None,
+ 'pipeline_parallel_count': 1,
+ 'plugin_configuration': {'gemm_plugin': 'auto',
+ 'paged_kv_cache': True,
+ 'use_fp8_context_fmha': False,
+ 'use_paged_context_fmha': True},
+ 'quantization_type': 'no_quant',
+ 'speculator': None,
+ 'strongly_typed': False,
+ 'tensor_parallel_count': 1},
+ 'runtime': {'batch_scheduler_policy': 'guaranteed_no_evict',
+ 'enable_chunked_context': True,
+ 'kv_cache_free_gpu_mem_fraction': 0.9,
+ 'kv_cache_host_memory_bytes': None,
+ 'request_default_max_tokens': None,
+ 'total_token_limit': 500000}},
}) == True
Raw output
verbose = False, expect_equal = True
trtllm_config = {'build_commands': [], 'environment_variables': {}, 'external_package_dirs': [], 'model_metadata': {}, ...}
trtllm_spec_dec_config = {'build_commands': [], 'environment_variables': {}, 'external_package_dirs': [], 'model_metadata': {}, ...}
trtllm_spec_dec_config_full = {'build_commands': [], 'environment_variables': {}, 'external_package_dirs': [], 'model_metadata': {}, ...}
@pytest.mark.parametrize("verbose, expect_equal", [(False, True), (True, False)])
def test_to_dict_trtllm(
verbose,
expect_equal,
trtllm_config,
trtllm_spec_dec_config,
trtllm_spec_dec_config_full,
):
> assert (
TrussConfig.from_dict(trtllm_config).to_dict(verbose=verbose) == trtllm_config
) == expect_equal
E AssertionError: assert ({'build_comma...ata': {}, ...} == {'build_comma...ata': {}, ...}
E Omitting 10 identical items, use -vv to show
E Differing items:
E {'trt_llm': {'build': {'base_model': 'llama', 'checkpoint_repository': {'repo': 'meta/llama4-500B', 'revision': None, ...ict', 'enable_chunked_context': True, 'kv_cache_free_gpu_mem_fraction': 0.9, 'kv_cache_host_memory_bytes': None, ...}}} != {'trt_llm': {'build': {'base_model': 'llama', 'checkpoint_repository': {'repo': 'meta/llama4-500B', 'source': 'HF'}, 'gather_all_token_logits': False, 'max_batch_size': 512, ...}, 'runtime': {}}}
E Full diff:
E {
E 'build_commands': [],
E 'environment_variables': {},
E 'external_package_dirs': [],
E 'model_metadata': {},
E 'model_name': None,
E 'python_version': 'py39',
E 'requirements': [],
E 'resources': {'accelerator': 'L4',
E 'cpu': '1',
E 'memory': '24Gi',
E 'use_gpu': True},
E 'secrets': {},
E 'system_packages': [],
E 'trt_llm': {'build': {'base_model': 'llama',
E 'checkpoint_repository': {'repo': 'meta/llama4-500B',
E + 'revision': None,
E 'source': 'HF'},
E 'gather_all_token_logits': False,
E 'max_batch_size': 512,
E + 'max_beam_width': 1,
E + 'max_num_tokens': 8192,
E + 'max_prompt_embedding_table_size': 0,
E - 'max_seq_len': 2048},
E ? -
E + 'max_seq_len': 2048,
E - 'runtime': {}},
E + 'num_builder_gpus': None,
E + 'pipeline_parallel_count': 1,
E + 'plugin_configuration': {'gemm_plugin': 'auto',
E + 'paged_kv_cache': True,
E + 'use_fp8_context_fmha': False,
E + 'use_paged_context_fmha': True},
E + 'quantization_type': 'no_quant',
E + 'speculator': None,
E + 'strongly_typed': False,
E + 'tensor_parallel_count': 1},
E + 'runtime': {'batch_scheduler_policy': 'guaranteed_no_evict',
E + 'enable_chunked_context': True,
E + 'kv_cache_free_gpu_mem_fraction': 0.9,
E + 'kv_cache_host_memory_bytes': None,
E + 'request_default_max_tokens': None,
E + 'total_token_limit': 500000}},
E }) == True
truss/tests/test_config.py:471: AssertionError
Loading