File tree Expand file tree Collapse file tree 1 file changed +7
-1
lines changed
sharktank/sharktank/examples Expand file tree Collapse file tree 1 file changed +7
-1
lines changed Original file line number Diff line number Diff line change @@ -90,11 +90,17 @@ def main():
9090 def generate_params_json (
9191 hp : LlamaHParams , prefill_bs : list [int ], decode_bs : list [int ]
9292 ) -> Dict [str , Any ]:
93+ """
94+ Generate config.json for shortfin.
95+
96+
97+ For shortfin, we only write attention_head_count_kv because that's all shortfin needs.
98+ Note that this is different from hp.attn_head_count when grouped attention shares kvcache between heads.
99+ """
93100 return {
94101 "module_name" : "module" ,
95102 "module_abi_version" : 1 ,
96103 "max_seq_len" : hp .context_length ,
97- # "attn_head_count": hp.attention_head_count, # we don't need the attention head count we just need the kvcache attention head count for shortfin
98104 "attn_head_dim" : hp .attn_head_dim ,
99105 "prefill_batch_sizes" : prefill_bs ,
100106 "decode_batch_sizes" : decode_bs ,
You can’t perform that action at this time.
0 commit comments