File tree Expand file tree Collapse file tree 4 files changed +12
-8
lines changed Expand file tree Collapse file tree 4 files changed +12
-8
lines changed Original file line number Diff line number Diff line change @@ -21,9 +21,10 @@ class Keys:
2121 PER_ITERATION_LATENCY = "per_iteration_latency"
2222 MEAN_LATENCY = "mean_latency"
2323 STD_DEV_LATENCY = "std_dev_latency"
24- MEAN_TOKENS_PER_SECOND = "mean_tokens_per_second "
24+ TOKEN_GENERATION_TOKENS_PER_SECOND = "token_generation_tokens_per_second "
2525 STD_DEV_TOKENS_PER_SECOND = "std_dev_tokens_per_second"
2626 SECONDS_TO_FIRST_TOKEN = "seconds_to_first_token"
27+ PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
2728 STD_DEV_SECONDS_TO_FIRST_TOKEN = "std_dev_seconds_to_first_token"
2829 CHECKPOINT = "checkpoint"
2930 DTYPE = "dtype"
Original file line number Diff line number Diff line change @@ -110,7 +110,7 @@ class HuggingfaceBench(Tool):
110110 def __init__ (self ):
111111 super ().__init__ (monitor_message = "Benchmarking Huggingface LLM" )
112112
113- self .status_stats = [Keys .SECONDS_TO_FIRST_TOKEN , Keys .MEAN_TOKENS_PER_SECOND ]
113+ self .status_stats = [Keys .SECONDS_TO_FIRST_TOKEN , Keys .TOKEN_GENERATION_TOKENS_PER_SECOND ]
114114
115115 @staticmethod
116116 def parser (parser : argparse .ArgumentParser = None , add_help : bool = True ):
@@ -283,11 +283,11 @@ def run(
283283 [token_len for _ , token_len in decode_per_iteration_result ]
284284 )
285285 # Subtract 1 so that we don't count the prefill token
286- mean_tokens_per_second = (mean_token_len - 1 ) / mean_decode_latency
286+ token_generation_tokens_per_second = (mean_token_len - 1 ) / mean_decode_latency
287287
288288 # Save performance data to stats
289289 state .save_stat (Keys .SECONDS_TO_FIRST_TOKEN , mean_time_to_first_token )
290- state .save_stat (Keys .MEAN_TOKENS_PER_SECOND , mean_tokens_per_second )
290+ state .save_stat (Keys .TOKEN_GENERATION_TOKENS_PER_SECOND , token_generation_tokens_per_second )
291291 state .save_stat (Keys .PROMPT_TOKENS , input_ids .shape [1 ])
292292
293293 return state
Original file line number Diff line number Diff line change @@ -32,7 +32,8 @@ def __init__(self):
3232
3333 self .status_stats = [
3434 Keys .SECONDS_TO_FIRST_TOKEN ,
35- Keys .MEAN_TOKENS_PER_SECOND ,
35+ Keys .PREFILL_TOKENS_PER_SECOND ,
36+ Keys .TOKEN_GENERATION_TOKENS_PER_SECOND ,
3637 Keys .PROMPT_TOKENS ,
3738 ]
3839
@@ -144,10 +145,12 @@ def run(
144145 per_iteration_tokens_per_second .append (model .tokens_per_second )
145146
146147 mean_time_to_first_token = statistics .mean (per_iteration_time_to_first_token )
147- mean_tokens_per_second = statistics .mean (per_iteration_tokens_per_second )
148+ prefill_tokens_per_second = input_ids_len / mean_time_to_first_token
149+ token_generation_tokens_per_second = statistics .mean (per_iteration_tokens_per_second )
148150
149151 state .save_stat (Keys .SECONDS_TO_FIRST_TOKEN , mean_time_to_first_token )
150- state .save_stat (Keys .MEAN_TOKENS_PER_SECOND , mean_tokens_per_second )
152+ state .save_stat (Keys .PREFILL_TOKENS_PER_SECOND , prefill_tokens_per_second )
153+ state .save_stat (Keys .TOKEN_GENERATION_TOKENS_PER_SECOND , token_generation_tokens_per_second )
151154 state .save_stat (Keys .PROMPT_TOKENS , input_ids_len )
152155
153156 return state
Original file line number Diff line number Diff line change @@ -78,7 +78,7 @@ def test_001_huggingface_bench(self):
7878
7979 stats = fs .Stats (state .cache_dir , state .build_name ).stats
8080
81- assert stats [Keys .MEAN_TOKENS_PER_SECOND ] > 0
81+ assert stats [Keys .TOKEN_GENERATION_TOKENS_PER_SECOND ] > 0
8282
8383
8484if __name__ == "__main__" :
You can’t perform that action at this time.
0 commit comments