Skip to content

Commit 2ddd45f

Browse files
committed
Add examples_v_models script
1 parent 81fcad2 commit 2ddd45f

File tree

1 file changed

+56
-0
lines changed

1 file changed

+56
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# using existing swe-bench results logged to weave (see ingest dir),
2+
# produce a table with instance_id as rows, and models as columns.
3+
# useful for finding easy / hard examples
4+
5+
import sys
6+
import pandas as pd
7+
8+
import weave
9+
10+
from ...weave_next.weave_query import calls
11+
12+
13+
def main():
14+
if len(sys.argv) > 1:
15+
wc = weave.init("weavedev-swebench5")
16+
c = calls(wc, "Evaluation.predict_and_score", expand_refs=["inputs.example"])
17+
df = c.to_pandas()
18+
19+
df.to_parquet("verified.parquet", engine="pyarrow")
20+
else:
21+
df = pd.read_parquet("verified.parquet")
22+
# Pivot the dataframe
23+
pivot_df = df.pivot(
24+
index="inputs.example.instance_id",
25+
columns="inputs.model",
26+
values="output.model_output.resolved",
27+
)
28+
29+
# Extract model names from the column names
30+
pivot_df.columns = pivot_df.columns.str.extract(r"object/(.+):")[0]
31+
32+
# Count models with resolved True for each instance
33+
pivot_df["models_resolved_true"] = pivot_df.apply(lambda row: row.sum(), axis=1)
34+
35+
# Move the count column to the leftmost position
36+
cols = pivot_df.columns.tolist()
37+
cols = cols[-1:] + cols[:-1]
38+
pivot_df = pivot_df[cols]
39+
40+
# Sort the pivot table by 'models_resolved_true' in descending order
41+
pivot_df = pivot_df.sort_values(by="models_resolved_true", ascending=False)
42+
43+
# Sort columns by the model that got the most resolved
44+
model_success_count = pivot_df.sum().sort_values(ascending=False)
45+
sorted_columns = ["models_resolved_true"] + model_success_count.index.tolist()
46+
pivot_df = pivot_df[sorted_columns]
47+
48+
# Display the first few rows of the resulting table
49+
print(pivot_df.head())
50+
51+
# Optionally, save the pivot table to a new file
52+
pivot_df.to_csv("pivot_table.csv")
53+
54+
55+
if __name__ == "__main__":
56+
main()

0 commit comments

Comments
 (0)