Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Trt-llm] always perform verbose dump of defaults for forward compatibility #1338

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from

ensure_dump

4455b50
Select commit
Loading
Failed to load commit list.
Sign in for the full log view
Draft

[Trt-llm] always perform verbose dump of defaults for forward compatibility #1338

ensure_dump
4455b50
Select commit
Loading
Failed to load commit list.
GitHub Actions / JUnit Test Report failed Jan 24, 2025 in 0s

465 tests run, 462 passed, 2 skipped, 1 failed.

Annotations

Check failure on line 471 in .venv/lib/python3.9/site-packages/libcst/_parser/tests/test_config.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_config.test_to_dict_trtllm[False-True]

AssertionError: assert ({'build_comma...ata': {}, ...} == {'build_comma...ata': {}, ...}
  Omitting 10 identical items, use -vv to show
  Differing items:
  {'trt_llm': {'build': {'base_model': 'llama', 'checkpoint_repository': {'repo': 'meta/llama4-500B', 'revision': None, ...ict', 'enable_chunked_context': True, 'kv_cache_free_gpu_mem_fraction': 0.9, 'kv_cache_host_memory_bytes': None, ...}}} != {'trt_llm': {'build': {'base_model': 'llama', 'checkpoint_repository': {'repo': 'meta/llama4-500B', 'source': 'HF'}, 'gather_all_token_logits': False, 'max_batch_size': 512, ...}, 'runtime': {}}}
  Full diff:
    {
     'build_commands': [],
     'environment_variables': {},
     'external_package_dirs': [],
     'model_metadata': {},
     'model_name': None,
     'python_version': 'py39',
     'requirements': [],
     'resources': {'accelerator': 'L4',
                   'cpu': '1',
                   'memory': '24Gi',
                   'use_gpu': True},
     'secrets': {},
     'system_packages': [],
     'trt_llm': {'build': {'base_model': 'llama',
                           'checkpoint_repository': {'repo': 'meta/llama4-500B',
  +                                                  'revision': None,
                                                     'source': 'HF'},
                           'gather_all_token_logits': False,
                           'max_batch_size': 512,
  +                        'max_beam_width': 1,
  +                        'max_num_tokens': 8192,
  +                        'max_prompt_embedding_table_size': 0,
  -                        'max_seq_len': 2048},
  ?                                           -
  +                        'max_seq_len': 2048,
  -              'runtime': {}},
  +                        'num_builder_gpus': None,
  +                        'pipeline_parallel_count': 1,
  +                        'plugin_configuration': {'gemm_plugin': 'auto',
  +                                                 'paged_kv_cache': True,
  +                                                 'use_fp8_context_fmha': False,
  +                                                 'use_paged_context_fmha': True},
  +                        'quantization_type': 'no_quant',
  +                        'speculator': None,
  +                        'strongly_typed': False,
  +                        'tensor_parallel_count': 1},
  +              'runtime': {'batch_scheduler_policy': 'guaranteed_no_evict',
  +                          'enable_chunked_context': True,
  +                          'kv_cache_free_gpu_mem_fraction': 0.9,
  +                          'kv_cache_host_memory_bytes': None,
  +                          'request_default_max_tokens': None,
  +                          'total_token_limit': 500000}},
    }) == True
Raw output
verbose = False, expect_equal = True
trtllm_config = {'build_commands': [], 'environment_variables': {}, 'external_package_dirs': [], 'model_metadata': {}, ...}
trtllm_spec_dec_config = {'build_commands': [], 'environment_variables': {}, 'external_package_dirs': [], 'model_metadata': {}, ...}
trtllm_spec_dec_config_full = {'build_commands': [], 'environment_variables': {}, 'external_package_dirs': [], 'model_metadata': {}, ...}

    @pytest.mark.parametrize("verbose, expect_equal", [(False, True), (True, False)])
    def test_to_dict_trtllm(
        verbose,
        expect_equal,
        trtllm_config,
        trtllm_spec_dec_config,
        trtllm_spec_dec_config_full,
    ):
>       assert (
            TrussConfig.from_dict(trtllm_config).to_dict(verbose=verbose) == trtllm_config
        ) == expect_equal
E       AssertionError: assert ({'build_comma...ata': {}, ...} == {'build_comma...ata': {}, ...}
E         Omitting 10 identical items, use -vv to show
E         Differing items:
E         {'trt_llm': {'build': {'base_model': 'llama', 'checkpoint_repository': {'repo': 'meta/llama4-500B', 'revision': None, ...ict', 'enable_chunked_context': True, 'kv_cache_free_gpu_mem_fraction': 0.9, 'kv_cache_host_memory_bytes': None, ...}}} != {'trt_llm': {'build': {'base_model': 'llama', 'checkpoint_repository': {'repo': 'meta/llama4-500B', 'source': 'HF'}, 'gather_all_token_logits': False, 'max_batch_size': 512, ...}, 'runtime': {}}}
E         Full diff:
E           {
E            'build_commands': [],
E            'environment_variables': {},
E            'external_package_dirs': [],
E            'model_metadata': {},
E            'model_name': None,
E            'python_version': 'py39',
E            'requirements': [],
E            'resources': {'accelerator': 'L4',
E                          'cpu': '1',
E                          'memory': '24Gi',
E                          'use_gpu': True},
E            'secrets': {},
E            'system_packages': [],
E            'trt_llm': {'build': {'base_model': 'llama',
E                                  'checkpoint_repository': {'repo': 'meta/llama4-500B',
E         +                                                  'revision': None,
E                                                            'source': 'HF'},
E                                  'gather_all_token_logits': False,
E                                  'max_batch_size': 512,
E         +                        'max_beam_width': 1,
E         +                        'max_num_tokens': 8192,
E         +                        'max_prompt_embedding_table_size': 0,
E         -                        'max_seq_len': 2048},
E         ?                                           -
E         +                        'max_seq_len': 2048,
E         -              'runtime': {}},
E         +                        'num_builder_gpus': None,
E         +                        'pipeline_parallel_count': 1,
E         +                        'plugin_configuration': {'gemm_plugin': 'auto',
E         +                                                 'paged_kv_cache': True,
E         +                                                 'use_fp8_context_fmha': False,
E         +                                                 'use_paged_context_fmha': True},
E         +                        'quantization_type': 'no_quant',
E         +                        'speculator': None,
E         +                        'strongly_typed': False,
E         +                        'tensor_parallel_count': 1},
E         +              'runtime': {'batch_scheduler_policy': 'guaranteed_no_evict',
E         +                          'enable_chunked_context': True,
E         +                          'kv_cache_free_gpu_mem_fraction': 0.9,
E         +                          'kv_cache_host_memory_bytes': None,
E         +                          'request_default_max_tokens': None,
E         +                          'total_token_limit': 500000}},
E           }) == True

truss/tests/test_config.py:471: AssertionError