@@ -112,26 +112,77 @@ async def get_best_benchmarks():
112
112
113
113
return best_results
114
114
115
- async def send_discord_notification (benchmark_data ):
115
+ async def get_previous_benchmark (config_data , config_name , current_timestamp ):
116
+ """Get the previous benchmark for a given configuration."""
117
+ benchmarks = config_data .get (config_name , [])
118
+ # Sort by timestamp and find the most recent benchmark before current_timestamp
119
+ previous = None
120
+ for b in sorted (benchmarks , key = lambda x : x ['timestamp' ]):
121
+ if b ['timestamp' ] < current_timestamp :
122
+ previous = b
123
+ else :
124
+ break
125
+ return previous
126
+
127
+ async def format_metric_comparison (current , previous , metric , format_str = ".2f" , lower_is_better = False ):
128
+ """Format a metric with trend indicator."""
129
+ current_val = current .get (metric , 0 )
130
+ if not previous :
131
+ return f"**{ current_val :{format_str }} **"
132
+
133
+ prev_val = previous .get (metric , 0 )
134
+ diff = current_val - prev_val
135
+
136
+ # Invert the comparison logic if lower values are better
137
+ if lower_is_better :
138
+ diff = - diff # This makes negative diffs good and positive diffs bad
139
+
140
+ if diff > 0 :
141
+ return f"**{ current_val :{format_str }} ** 🟢↑ ({ '-' if lower_is_better else '+' } { abs (current_val - prev_val ):{format_str }} )"
142
+ elif diff < 0 :
143
+ return f"**{ current_val :{format_str }} ** 🔴↓ ({ '+' if lower_is_better else '-' } { abs (current_val - prev_val ):{format_str }} )"
144
+ else :
145
+ return f"**{ current_val :{format_str }} ** ⚪"
146
+
147
+ async def send_discord_notification (benchmark_data , config_data ):
116
148
if not DISCORD_WEBHOOK_URL :
117
149
print ("Discord webhook URL not configured, skipping notification" )
118
150
return
119
151
120
152
# Create a formatted message
121
153
config_name = f"{ benchmark_data ['config' ]} /{ benchmark_data ['model' ]} "
122
154
155
+ # Use the passed config_data instead of fetching again
156
+ previous_benchmark = await get_previous_benchmark (
157
+ config_data ,
158
+ f"{ benchmark_data ['config' ]} /{ benchmark_data ['model' ]} " ,
159
+ benchmark_data ['timestamp' ]
160
+ )
161
+
162
+ # Format metrics with comparisons
163
+ gen_tps = await format_metric_comparison (benchmark_data , previous_benchmark , 'generation_tps' )
164
+ prompt_tps = await format_metric_comparison (benchmark_data , previous_benchmark , 'prompt_tps' )
165
+ ttft = await format_metric_comparison (
166
+ {'ttft' : benchmark_data ['ttft' ] * 1000 },
167
+ {'ttft' : previous_benchmark ['ttft' ] * 1000 } if previous_benchmark else None ,
168
+ 'ttft' ,
169
+ lower_is_better = True
170
+ )
171
+ prompt_len = await format_metric_comparison (benchmark_data , previous_benchmark , 'prompt_len' , "d" )
172
+ response_len = await format_metric_comparison (benchmark_data , previous_benchmark , 'response_len' , "d" )
173
+
123
174
# Create a simple JSON string of the topology
124
175
topology = benchmark_data .get ('configuration' , {})
125
176
topology_str = "```json\n " + json .dumps (topology , indent = 2 ) + "\n ```"
126
177
127
178
message = (
128
179
f"🚀 New Benchmark Result for **{ config_name } **\n \n "
129
180
f"📊 Performance Metrics:\n "
130
- f"• Generation TPS: ** { benchmark_data [ 'generation_tps' ]:.2f } ** \n "
131
- f"• Prompt TPS: ** { benchmark_data [ ' prompt_tps' ]:.2f } ** \n "
132
- f"• TTFT: ** { benchmark_data [ ' ttft' ] * 1000 :.2f } ms** \n "
133
- f"• Prompt Length: { benchmark_data [ ' prompt_len' ] } \n "
134
- f"• Response Length: { benchmark_data [ ' response_len' ] } \n \n "
181
+ f"• Generation TPS: { gen_tps } \n "
182
+ f"• Prompt TPS: { prompt_tps } \n "
183
+ f"• TTFT: { ttft } ms\n "
184
+ f"• Prompt Length: { prompt_len } \n "
185
+ f"• Response Length: { response_len } \n \n "
135
186
f"🔍 Run Details:\n "
136
187
f"• Commit: { benchmark_data ['commit' ][:7 ]} \n "
137
188
f"• Branch: { benchmark_data ['branch' ]} \n "
@@ -165,7 +216,7 @@ async def generate_best():
165
216
print (f"Last processed timestamp: { last_processed } " )
166
217
167
218
async with session .client ('s3' ) as s3 :
168
- # Load all benchmark data
219
+ # Load all benchmark data once
169
220
config_data = await load_data_from_s3 ()
170
221
best_benchmarks = await get_best_benchmarks ()
171
222
@@ -185,7 +236,8 @@ async def generate_best():
185
236
'config' : config ,
186
237
'model' : model ,
187
238
})
188
- await send_discord_notification (benchmark_with_info )
239
+ # Pass the already loaded config_data to avoid refetching
240
+ await send_discord_notification (benchmark_with_info , config_data )
189
241
190
242
# Update the latest timestamp if this is the newest we've seen
191
243
if timestamp > new_latest :
0 commit comments