Skip to content

Commit eaaaf38

Browse files
committed
Add prompt processing tps in oga-bench
Signed-off-by: David Fan <[email protected]>
1 parent 63f11db commit eaaaf38

File tree

2 files changed

+4
-0
lines changed

2 files changed

+4
-0
lines changed

src/turnkeyml/llm/cache.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class Keys:
2424
MEAN_TOKENS_PER_SECOND = "mean_tokens_per_second"
2525
STD_DEV_TOKENS_PER_SECOND = "std_dev_tokens_per_second"
2626
SECONDS_TO_FIRST_TOKEN = "seconds_to_first_token"
27+
PROMPT_PROCESSING_TOKENS_PER_SECOND = "prompt_processing_tokens_per_second"
2728
STD_DEV_SECONDS_TO_FIRST_TOKEN = "std_dev_seconds_to_first_token"
2829
CHECKPOINT = "checkpoint"
2930
DTYPE = "dtype"

src/turnkeyml/llm/tools/ort_genai/oga_bench.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def __init__(self):
3232

3333
self.status_stats = [
3434
Keys.SECONDS_TO_FIRST_TOKEN,
35+
Keys.PROMPT_PROCESSING_TOKENS_PER_SECOND,
3536
Keys.MEAN_TOKENS_PER_SECOND,
3637
Keys.PROMPT_TOKENS,
3738
]
@@ -144,9 +145,11 @@ def run(
144145
per_iteration_tokens_per_second.append(model.tokens_per_second)
145146

146147
mean_time_to_first_token = statistics.mean(per_iteration_time_to_first_token)
148+
prompt_processing_tokens_per_second = input_ids_len / mean_time_to_first_token
147149
mean_tokens_per_second = statistics.mean(per_iteration_tokens_per_second)
148150

149151
state.save_stat(Keys.SECONDS_TO_FIRST_TOKEN, mean_time_to_first_token)
152+
state.save_stat(Keys.PROMPT_PROCESSING_TOKENS_PER_SECOND, prompt_processing_tokens_per_second)
150153
state.save_stat(Keys.MEAN_TOKENS_PER_SECOND, mean_tokens_per_second)
151154
state.save_stat(Keys.PROMPT_TOKENS, input_ids_len)
152155

0 commit comments

Comments
 (0)