File tree Expand file tree Collapse file tree 4 files changed +21
-8
lines changed Expand file tree Collapse file tree 4 files changed +21
-8
lines changed Original file line number Diff line number Diff line change @@ -21,9 +21,10 @@ class Keys:
21
21
PER_ITERATION_LATENCY = "per_iteration_latency"
22
22
MEAN_LATENCY = "mean_latency"
23
23
STD_DEV_LATENCY = "std_dev_latency"
24
- MEAN_TOKENS_PER_SECOND = "mean_tokens_per_second "
24
+ TOKEN_GENERATION_TOKENS_PER_SECOND = "token_generation_tokens_per_second "
25
25
STD_DEV_TOKENS_PER_SECOND = "std_dev_tokens_per_second"
26
26
SECONDS_TO_FIRST_TOKEN = "seconds_to_first_token"
27
+ PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
27
28
STD_DEV_SECONDS_TO_FIRST_TOKEN = "std_dev_seconds_to_first_token"
28
29
CHECKPOINT = "checkpoint"
29
30
DTYPE = "dtype"
Original file line number Diff line number Diff line change @@ -110,7 +110,10 @@ class HuggingfaceBench(Tool):
110
110
def __init__ (self ):
111
111
super ().__init__ (monitor_message = "Benchmarking Huggingface LLM" )
112
112
113
- self .status_stats = [Keys .SECONDS_TO_FIRST_TOKEN , Keys .MEAN_TOKENS_PER_SECOND ]
113
+ self .status_stats = [
114
+ Keys .SECONDS_TO_FIRST_TOKEN ,
115
+ Keys .TOKEN_GENERATION_TOKENS_PER_SECOND ,
116
+ ]
114
117
115
118
@staticmethod
116
119
def parser (parser : argparse .ArgumentParser = None , add_help : bool = True ):
@@ -283,11 +286,13 @@ def run(
283
286
[token_len for _ , token_len in decode_per_iteration_result ]
284
287
)
285
288
# Subtract 1 so that we don't count the prefill token
286
- mean_tokens_per_second = (mean_token_len - 1 ) / mean_decode_latency
289
+ token_generation_tokens_per_second = (mean_token_len - 1 ) / mean_decode_latency
287
290
288
291
# Save performance data to stats
289
292
state .save_stat (Keys .SECONDS_TO_FIRST_TOKEN , mean_time_to_first_token )
290
- state .save_stat (Keys .MEAN_TOKENS_PER_SECOND , mean_tokens_per_second )
293
+ state .save_stat (
294
+ Keys .TOKEN_GENERATION_TOKENS_PER_SECOND , token_generation_tokens_per_second
295
+ )
291
296
state .save_stat (Keys .PROMPT_TOKENS , input_ids .shape [1 ])
292
297
293
298
return state
Original file line number Diff line number Diff line change @@ -32,7 +32,8 @@ def __init__(self):
32
32
33
33
self .status_stats = [
34
34
Keys .SECONDS_TO_FIRST_TOKEN ,
35
- Keys .MEAN_TOKENS_PER_SECOND ,
35
+ Keys .PREFILL_TOKENS_PER_SECOND ,
36
+ Keys .TOKEN_GENERATION_TOKENS_PER_SECOND ,
36
37
Keys .PROMPT_TOKENS ,
37
38
]
38
39
@@ -144,10 +145,16 @@ def run(
144
145
per_iteration_tokens_per_second .append (model .tokens_per_second )
145
146
146
147
mean_time_to_first_token = statistics .mean (per_iteration_time_to_first_token )
147
- mean_tokens_per_second = statistics .mean (per_iteration_tokens_per_second )
148
+ prefill_tokens_per_second = input_ids_len / mean_time_to_first_token
149
+ token_generation_tokens_per_second = statistics .mean (
150
+ per_iteration_tokens_per_second
151
+ )
148
152
149
153
state .save_stat (Keys .SECONDS_TO_FIRST_TOKEN , mean_time_to_first_token )
150
- state .save_stat (Keys .MEAN_TOKENS_PER_SECOND , mean_tokens_per_second )
154
+ state .save_stat (Keys .PREFILL_TOKENS_PER_SECOND , prefill_tokens_per_second )
155
+ state .save_stat (
156
+ Keys .TOKEN_GENERATION_TOKENS_PER_SECOND , token_generation_tokens_per_second
157
+ )
151
158
state .save_stat (Keys .PROMPT_TOKENS , input_ids_len )
152
159
153
160
return state
Original file line number Diff line number Diff line change @@ -78,7 +78,7 @@ def test_001_huggingface_bench(self):
78
78
79
79
stats = fs .Stats (state .cache_dir , state .build_name ).stats
80
80
81
- assert stats [Keys .MEAN_TOKENS_PER_SECOND ] > 0
81
+ assert stats [Keys .TOKEN_GENERATION_TOKENS_PER_SECOND ] > 0
82
82
83
83
84
84
if __name__ == "__main__" :
You can’t perform that action at this time.
0 commit comments