Skip to content

Commit 149849f

Browse files
committed
format metric changes nicely in discord
1 parent 0fa8f1f commit 149849f

File tree

1 file changed

+60
-8
lines changed

1 file changed

+60
-8
lines changed

extra/dashboard/dashboard.py

+60-8
Original file line numberDiff line numberDiff line change
@@ -112,26 +112,77 @@ async def get_best_benchmarks():
112112

113113
return best_results
114114

115-
async def send_discord_notification(benchmark_data):
115+
async def get_previous_benchmark(config_data, config_name, current_timestamp):
116+
"""Get the previous benchmark for a given configuration."""
117+
benchmarks = config_data.get(config_name, [])
118+
# Sort by timestamp and find the most recent benchmark before current_timestamp
119+
previous = None
120+
for b in sorted(benchmarks, key=lambda x: x['timestamp']):
121+
if b['timestamp'] < current_timestamp:
122+
previous = b
123+
else:
124+
break
125+
return previous
126+
127+
async def format_metric_comparison(current, previous, metric, format_str=".2f", lower_is_better=False):
128+
"""Format a metric with trend indicator."""
129+
current_val = current.get(metric, 0)
130+
if not previous:
131+
return f"**{current_val:{format_str}}**"
132+
133+
prev_val = previous.get(metric, 0)
134+
diff = current_val - prev_val
135+
136+
# Invert the comparison logic if lower values are better
137+
if lower_is_better:
138+
diff = -diff # This makes negative diffs good and positive diffs bad
139+
140+
if diff > 0:
141+
return f"**{current_val:{format_str}}** 🟢↑ ({'-' if lower_is_better else '+'}{abs(current_val - prev_val):{format_str}})"
142+
elif diff < 0:
143+
return f"**{current_val:{format_str}}** 🔴↓ ({'+' if lower_is_better else '-'}{abs(current_val - prev_val):{format_str}})"
144+
else:
145+
return f"**{current_val:{format_str}}** ⚪"
146+
147+
async def send_discord_notification(benchmark_data, config_data):
116148
if not DISCORD_WEBHOOK_URL:
117149
print("Discord webhook URL not configured, skipping notification")
118150
return
119151

120152
# Create a formatted message
121153
config_name = f"{benchmark_data['config']}/{benchmark_data['model']}"
122154

155+
# Use the passed config_data instead of fetching again
156+
previous_benchmark = await get_previous_benchmark(
157+
config_data,
158+
f"{benchmark_data['config']}/{benchmark_data['model']}",
159+
benchmark_data['timestamp']
160+
)
161+
162+
# Format metrics with comparisons
163+
gen_tps = await format_metric_comparison(benchmark_data, previous_benchmark, 'generation_tps')
164+
prompt_tps = await format_metric_comparison(benchmark_data, previous_benchmark, 'prompt_tps')
165+
ttft = await format_metric_comparison(
166+
{'ttft': benchmark_data['ttft'] * 1000},
167+
{'ttft': previous_benchmark['ttft'] * 1000} if previous_benchmark else None,
168+
'ttft',
169+
lower_is_better=True
170+
)
171+
prompt_len = await format_metric_comparison(benchmark_data, previous_benchmark, 'prompt_len', "d")
172+
response_len = await format_metric_comparison(benchmark_data, previous_benchmark, 'response_len', "d")
173+
123174
# Create a simple JSON string of the topology
124175
topology = benchmark_data.get('configuration', {})
125176
topology_str = "```json\n" + json.dumps(topology, indent=2) + "\n```"
126177

127178
message = (
128179
f"🚀 New Benchmark Result for **{config_name}**\n\n"
129180
f"📊 Performance Metrics:\n"
130-
f"• Generation TPS: **{benchmark_data['generation_tps']:.2f}**\n"
131-
f"• Prompt TPS: **{benchmark_data['prompt_tps']:.2f}**\n"
132-
f"• TTFT: **{benchmark_data['ttft'] * 1000:.2f}ms**\n"
133-
f"• Prompt Length: {benchmark_data['prompt_len']}\n"
134-
f"• Response Length: {benchmark_data['response_len']}\n\n"
181+
f"• Generation TPS: {gen_tps}\n"
182+
f"• Prompt TPS: {prompt_tps}\n"
183+
f"• TTFT: {ttft}ms\n"
184+
f"• Prompt Length: {prompt_len}\n"
185+
f"• Response Length: {response_len}\n\n"
135186
f"🔍 Run Details:\n"
136187
f"• Commit: {benchmark_data['commit'][:7]}\n"
137188
f"• Branch: {benchmark_data['branch']}\n"
@@ -165,7 +216,7 @@ async def generate_best():
165216
print(f"Last processed timestamp: {last_processed}")
166217

167218
async with session.client('s3') as s3:
168-
# Load all benchmark data
219+
# Load all benchmark data once
169220
config_data = await load_data_from_s3()
170221
best_benchmarks = await get_best_benchmarks()
171222

@@ -185,7 +236,8 @@ async def generate_best():
185236
'config': config,
186237
'model': model,
187238
})
188-
await send_discord_notification(benchmark_with_info)
239+
# Pass the already loaded config_data to avoid refetching
240+
await send_discord_notification(benchmark_with_info, config_data)
189241

190242
# Update the latest timestamp if this is the newest we've seen
191243
if timestamp > new_latest:

0 commit comments

Comments
 (0)