Skip to content

Commit 4a1357d

Browse files
committed
add a docstring and remove a commented out line
1 parent a0a589e commit 4a1357d

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

sharktank/sharktank/examples/export_paged_llm_v1.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,17 @@ def main():
9090
def generate_params_json(
9191
hp: LlamaHParams, prefill_bs: list[int], decode_bs: list[int]
9292
) -> Dict[str, Any]:
93+
"""
94+
Generate config.json for shortfin.
95+
96+
97+
For shortfin, we only write attention_head_count_kv because that's all shortfin needs.
98+
Note that this is different from hp.attn_head_count when grouped attention shares kvcache between heads.
99+
"""
93100
return {
94101
"module_name": "module",
95102
"module_abi_version": 1,
96103
"max_seq_len": hp.context_length,
97-
# "attn_head_count": hp.attention_head_count, # we don't need the attention head count we just need the kvcache attention head count for shortfin
98104
"attn_head_dim": hp.attn_head_dim,
99105
"prefill_batch_sizes": prefill_bs,
100106
"decode_batch_sizes": decode_bs,

0 commit comments

Comments
 (0)