Skip to content

Commit 33bd3d9

Browse files
authored
Revert "Add edit leaderboard + citation" (#3691)
1 parent bfc26a0 commit 33bd3d9

File tree

3 files changed

+43
-124
lines changed

3 files changed

+43
-124
lines changed

fastchat/serve/gradio_web_server.py

-13
Original file line numberDiff line numberDiff line change
@@ -66,19 +66,6 @@
6666
enable_moderation = False
6767
use_remote_storage = False
6868

69-
default_citation_md = """
70-
### Citation
71-
Please cite the following paper if you find our leaderboard or dataset helpful.
72-
```
73-
@misc{chiang2024chatbot,
74-
title={Chatbot Arena: An Open Platform for Evaluating LLMs by Human Preference},
75-
author={Wei-Lin Chiang and Lianmin Zheng and Ying Sheng and Anastasios Nikolas Angelopoulos and Tianle Li and Dacheng Li and Hao Zhang and Banghua Zhu and Michael Jordan and Joseph E. Gonzalez and Ion Stoica},
76-
year={2024},
77-
eprint={2403.04132},
78-
archivePrefix={arXiv},
79-
primaryClass={cs.AI}
80-
}
81-
"""
8269
acknowledgment_md = """
8370
### Terms of Service
8471

fastchat/serve/monitor/copilot_arena.py

+27-90
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,6 @@
66
from fastchat.serve.monitor.monitor import recompute_final_ranking
77

88
copilot_arena_leaderboard_url = os.getenv("COPILOT_ARENA_LEADERBOARD_URL")
9-
copilot_arena_citation_md = """
10-
### Citation
11-
Please cite the following paper if you find our leaderboard helpful.
12-
```
13-
@misc{chi2025copilot,
14-
title={Copilot Arena: A Platform for Code LLM Evaluation in the Wild},
15-
author={Wayne Chi and Valerie Chen and Wei-Lin Chiang and Anastasios N. Angelopoulos and Aditya Mittal and Naman Jain and Tianjun Zhang and Ion Stoica and Chris Donahue and Ameet Talwalkar}
16-
year={2025},
17-
}
18-
"""
199

2010

2111
def process_copilot_arena_leaderboard(leaderboard):
@@ -41,88 +31,41 @@ def process_copilot_arena_leaderboard(leaderboard):
4131
by=["Rank* (UB)", "score"], ascending=[True, False]
4232
)
4333

44-
leaderboard = leaderboard.rename(
45-
columns={
46-
"name": "Model",
47-
"confidence_interval": "Confidence Interval",
48-
"score": "Arena Score",
49-
"organization": "Organization",
50-
"votes": "Votes",
51-
}
52-
)
53-
54-
column_order = [
55-
"Rank* (UB)",
56-
"Model",
57-
"Arena Score",
58-
"Confidence Interval",
59-
"Votes",
60-
"Organization",
61-
]
62-
leaderboard = leaderboard[column_order]
63-
6434
return leaderboard
6535

6636

67-
def make_copilot_arena_leaderboard_md(leaderboard, interaction_mode):
68-
num_models = len(leaderboard)
69-
total_battles = int(leaderboard["Votes"].sum()) // 2
70-
space = "   "
71-
leaderboard_md = f"""### {interaction_mode}
72-
#### {space} #models: **{num_models}** {space} #votes: **{"{:,}".format(total_battles)}** {space}
73-
"""
74-
return leaderboard_md
75-
76-
7737
def build_copilot_arena_tab():
7838
response = requests.get(copilot_arena_leaderboard_url)
7939
if response.status_code == 200:
80-
response_json = response.json()
81-
82-
def update_copilot_arena_leaderboard(interaction_mode):
83-
if interaction_mode == "Code Completion":
84-
leaderboard = pd.DataFrame(response_json["elo_data"])
85-
else:
86-
leaderboard = pd.DataFrame(response_json["edit_elo_data"])
87-
leaderboard = process_copilot_arena_leaderboard(leaderboard)
88-
leaderboard_df = gr.DataFrame(
89-
leaderboard,
90-
datatype=["str" for _ in leaderboard.columns],
91-
elem_id="arena_hard_leaderboard",
92-
height=600,
93-
wrap=True,
94-
interactive=False,
95-
column_widths=[70, 130, 60, 80, 50, 80],
96-
)
97-
98-
md = make_copilot_arena_leaderboard_md(leaderboard, interaction_mode)
99-
leaderboard_md = gr.Markdown(md, elem_id="leaderboard_markdown")
100-
101-
return leaderboard_df, leaderboard_md
102-
103-
gr.Markdown(
104-
"[Copilot Arena](https://blog.lmarena.ai/blog/2024/copilot-arena/) is a free AI coding assistant that provides paired responses from different state-of-the-art LLMs.",
105-
elem_id="copilot_arena_introduction",
106-
)
107-
108-
leaderboard = pd.DataFrame(response_json["elo_data"])
40+
leaderboard = pd.DataFrame(response.json()["elo_data"])
10941
leaderboard = process_copilot_arena_leaderboard(leaderboard)
110-
with gr.Row():
111-
with gr.Column(scale=2):
112-
interaction_mode_dropdown = gr.Radio(
113-
choices=["Code Completion", "Code Edit"],
114-
label="Interaction Mode",
115-
value="Code Completion",
116-
)
117-
vote_data = make_copilot_arena_leaderboard_md(
118-
leaderboard, "Code Completion"
119-
)
120-
with gr.Column(scale=3, variant="panel"):
121-
interaction_mode_details = gr.Markdown(
122-
vote_data, elem_id="interaction_mode_details"
123-
)
42+
leaderboard = leaderboard.rename(
43+
columns={
44+
"name": "Model",
45+
"confidence_interval": "Confidence Interval",
46+
"score": "Arena Score",
47+
"organization": "Organization",
48+
"votes": "Votes",
49+
}
50+
)
12451

125-
leaderboard_df = gr.DataFrame(
52+
column_order = [
53+
"Rank* (UB)",
54+
"Model",
55+
"Arena Score",
56+
"Confidence Interval",
57+
"Votes",
58+
"Organization",
59+
]
60+
leaderboard = leaderboard[column_order]
61+
num_models = len(leaderboard)
62+
total_battles = int(leaderboard["Votes"].sum()) // 2
63+
md = f"""
64+
[Copilot Arena](https://blog.lmarena.ai/blog/2024/copilot-arena/) is a free AI coding assistant that provides paired responses from different state-of-the-art LLMs. This leaderboard contains the relative performance and ranking of {num_models} models over {total_battles} battles.
65+
"""
66+
67+
gr.Markdown(md, elem_id="leaderboard_markdown")
68+
gr.DataFrame(
12669
leaderboard,
12770
datatype=["str" for _ in leaderboard.columns],
12871
elem_id="arena_hard_leaderboard",
@@ -140,11 +83,5 @@ def update_copilot_arena_leaderboard(interaction_mode):
14083
""",
14184
elem_id="leaderboard_markdown",
14285
)
143-
144-
interaction_mode_dropdown.change(
145-
update_copilot_arena_leaderboard,
146-
inputs=[interaction_mode_dropdown],
147-
outputs=[leaderboard_df, interaction_mode_details],
148-
)
14986
else:
15087
gr.Markdown("Error with fetching Copilot Arena data. Check back in later.")

fastchat/serve/monitor/monitor.py

+16-21
Original file line numberDiff line numberDiff line change
@@ -1038,13 +1038,10 @@ def build_leaderboard_tab(
10381038
from fastchat.serve.monitor.copilot_arena import (
10391039
build_copilot_arena_tab,
10401040
copilot_arena_leaderboard_url,
1041-
copilot_arena_citation_md,
10421041
)
10431042

10441043
if copilot_arena_leaderboard_url:
1045-
with gr.Tab(
1046-
"Copilot Arena Leaderboard", id=5
1047-
) as copilot_arena_leaderboard_tab:
1044+
with gr.Tab("Copilot Arena Leaderboard", id=5):
10481045
build_copilot_arena_tab()
10491046
else:
10501047
print(
@@ -1063,29 +1060,27 @@ def build_leaderboard_tab(
10631060
else:
10641061
pass
10651062

1066-
from fastchat.serve.gradio_web_server import default_citation_md, acknowledgment_md
1063+
from fastchat.serve.gradio_web_server import acknowledgment_md
10671064

10681065
with gr.Accordion(
10691066
"Citation",
10701067
open=True,
10711068
):
1072-
leaderboard_citation_md = gr.Markdown(
1073-
default_citation_md, elem_id="leaderboard_markdown"
1074-
)
1069+
citation_md = """
1070+
### Citation
1071+
Please cite the following paper if you find our leaderboard or dataset helpful.
1072+
```
1073+
@misc{chiang2024chatbot,
1074+
title={Chatbot Arena: An Open Platform for Evaluating LLMs by Human Preference},
1075+
author={Wei-Lin Chiang and Lianmin Zheng and Ying Sheng and Anastasios Nikolas Angelopoulos and Tianle Li and Dacheng Li and Hao Zhang and Banghua Zhu and Michael Jordan and Joseph E. Gonzalez and Ion Stoica},
1076+
year={2024},
1077+
eprint={2403.04132},
1078+
archivePrefix={arXiv},
1079+
primaryClass={cs.AI}
1080+
}
1081+
"""
1082+
gr.Markdown(citation_md, elem_id="leaderboard_markdown")
10751083
gr.Markdown(acknowledgment_md, elem_id="ack_markdown")
1076-
if copilot_arena_leaderboard_tab:
1077-
copilot_arena_leaderboard_tab.select(
1078-
fn=lambda: gr.Markdown(copilot_arena_citation_md),
1079-
inputs=[],
1080-
outputs=[leaderboard_citation_md],
1081-
)
1082-
for tab in tabs.children:
1083-
if (not copilot_arena_leaderboard_tab) or tab != copilot_arena_leaderboard_tab:
1084-
tab.select(
1085-
fn=lambda: gr.Markdown(default_citation_md),
1086-
inputs=[],
1087-
outputs=[leaderboard_citation_md],
1088-
)
10891084

10901085
return [md_1] + gr_plots
10911086

0 commit comments

Comments
 (0)