File tree 4 files changed +12
-8
lines changed
4 files changed +12
-8
lines changed Original file line number Diff line number Diff line change @@ -21,9 +21,10 @@ class Keys:
21
21
PER_ITERATION_LATENCY = "per_iteration_latency"
22
22
MEAN_LATENCY = "mean_latency"
23
23
STD_DEV_LATENCY = "std_dev_latency"
24
- MEAN_TOKENS_PER_SECOND = "mean_tokens_per_second "
24
+ TOKEN_GENERATION_TOKENS_PER_SECOND = "token_generation_tokens_per_second "
25
25
STD_DEV_TOKENS_PER_SECOND = "std_dev_tokens_per_second"
26
26
SECONDS_TO_FIRST_TOKEN = "seconds_to_first_token"
27
+ PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
27
28
STD_DEV_SECONDS_TO_FIRST_TOKEN = "std_dev_seconds_to_first_token"
28
29
CHECKPOINT = "checkpoint"
29
30
DTYPE = "dtype"
Original file line number Diff line number Diff line change @@ -110,7 +110,7 @@ class HuggingfaceBench(Tool):
110
110
def __init__ (self ):
111
111
super ().__init__ (monitor_message = "Benchmarking Huggingface LLM" )
112
112
113
- self .status_stats = [Keys .SECONDS_TO_FIRST_TOKEN , Keys .MEAN_TOKENS_PER_SECOND ]
113
+ self .status_stats = [Keys .SECONDS_TO_FIRST_TOKEN , Keys .TOKEN_GENERATION_TOKENS_PER_SECOND ]
114
114
115
115
@staticmethod
116
116
def parser (parser : argparse .ArgumentParser = None , add_help : bool = True ):
@@ -283,11 +283,11 @@ def run(
283
283
[token_len for _ , token_len in decode_per_iteration_result ]
284
284
)
285
285
# Subtract 1 so that we don't count the prefill token
286
- mean_tokens_per_second = (mean_token_len - 1 ) / mean_decode_latency
286
+ token_generation_tokens_per_second = (mean_token_len - 1 ) / mean_decode_latency
287
287
288
288
# Save performance data to stats
289
289
state .save_stat (Keys .SECONDS_TO_FIRST_TOKEN , mean_time_to_first_token )
290
- state .save_stat (Keys .MEAN_TOKENS_PER_SECOND , mean_tokens_per_second )
290
+ state .save_stat (Keys .TOKEN_GENERATION_TOKENS_PER_SECOND , token_generation_tokens_per_second )
291
291
state .save_stat (Keys .PROMPT_TOKENS , input_ids .shape [1 ])
292
292
293
293
return state
Original file line number Diff line number Diff line change @@ -32,7 +32,8 @@ def __init__(self):
32
32
33
33
self .status_stats = [
34
34
Keys .SECONDS_TO_FIRST_TOKEN ,
35
- Keys .MEAN_TOKENS_PER_SECOND ,
35
+ Keys .PREFILL_TOKENS_PER_SECOND ,
36
+ Keys .TOKEN_GENERATION_TOKENS_PER_SECOND ,
36
37
Keys .PROMPT_TOKENS ,
37
38
]
38
39
@@ -144,10 +145,12 @@ def run(
144
145
per_iteration_tokens_per_second .append (model .tokens_per_second )
145
146
146
147
mean_time_to_first_token = statistics .mean (per_iteration_time_to_first_token )
147
- mean_tokens_per_second = statistics .mean (per_iteration_tokens_per_second )
148
+ prefill_tokens_per_second = input_ids_len / mean_time_to_first_token
149
+ token_generation_tokens_per_second = statistics .mean (per_iteration_tokens_per_second )
148
150
149
151
state .save_stat (Keys .SECONDS_TO_FIRST_TOKEN , mean_time_to_first_token )
150
- state .save_stat (Keys .MEAN_TOKENS_PER_SECOND , mean_tokens_per_second )
152
+ state .save_stat (Keys .PREFILL_TOKENS_PER_SECOND , prefill_tokens_per_second )
153
+ state .save_stat (Keys .TOKEN_GENERATION_TOKENS_PER_SECOND , token_generation_tokens_per_second )
151
154
state .save_stat (Keys .PROMPT_TOKENS , input_ids_len )
152
155
153
156
return state
Original file line number Diff line number Diff line change @@ -78,7 +78,7 @@ def test_001_huggingface_bench(self):
78
78
79
79
stats = fs .Stats (state .cache_dir , state .build_name ).stats
80
80
81
- assert stats [Keys .MEAN_TOKENS_PER_SECOND ] > 0
81
+ assert stats [Keys .TOKEN_GENERATION_TOKENS_PER_SECOND ] > 0
82
82
83
83
84
84
if __name__ == "__main__" :
You can’t perform that action at this time.
0 commit comments