|
| 1 | +# using existing swe-bench results logged to weave (see ingest dir), |
| 2 | +# produce a table with instance_id as rows, and models as columns. |
| 3 | +# useful for finding easy / hard examples |
| 4 | + |
| 5 | +import sys |
| 6 | +import pandas as pd |
| 7 | + |
| 8 | +import weave |
| 9 | + |
| 10 | +from ...weave_next.weave_query import calls |
| 11 | + |
| 12 | + |
| 13 | +def main(): |
| 14 | + if len(sys.argv) > 1: |
| 15 | + wc = weave.init("weavedev-swebench5") |
| 16 | + c = calls(wc, "Evaluation.predict_and_score", expand_refs=["inputs.example"]) |
| 17 | + df = c.to_pandas() |
| 18 | + |
| 19 | + df.to_parquet("verified.parquet", engine="pyarrow") |
| 20 | + else: |
| 21 | + df = pd.read_parquet("verified.parquet") |
| 22 | + # Pivot the dataframe |
| 23 | + pivot_df = df.pivot( |
| 24 | + index="inputs.example.instance_id", |
| 25 | + columns="inputs.model", |
| 26 | + values="output.model_output.resolved", |
| 27 | + ) |
| 28 | + |
| 29 | + # Extract model names from the column names |
| 30 | + pivot_df.columns = pivot_df.columns.str.extract(r"object/(.+):")[0] |
| 31 | + |
| 32 | + # Count models with resolved True for each instance |
| 33 | + pivot_df["models_resolved_true"] = pivot_df.apply(lambda row: row.sum(), axis=1) |
| 34 | + |
| 35 | + # Move the count column to the leftmost position |
| 36 | + cols = pivot_df.columns.tolist() |
| 37 | + cols = cols[-1:] + cols[:-1] |
| 38 | + pivot_df = pivot_df[cols] |
| 39 | + |
| 40 | + # Sort the pivot table by 'models_resolved_true' in descending order |
| 41 | + pivot_df = pivot_df.sort_values(by="models_resolved_true", ascending=False) |
| 42 | + |
| 43 | + # Sort columns by the model that got the most resolved |
| 44 | + model_success_count = pivot_df.sum().sort_values(ascending=False) |
| 45 | + sorted_columns = ["models_resolved_true"] + model_success_count.index.tolist() |
| 46 | + pivot_df = pivot_df[sorted_columns] |
| 47 | + |
| 48 | + # Display the first few rows of the resulting table |
| 49 | + print(pivot_df.head()) |
| 50 | + |
| 51 | + # Optionally, save the pivot table to a new file |
| 52 | + pivot_df.to_csv("pivot_table.csv") |
| 53 | + |
| 54 | + |
| 55 | +if __name__ == "__main__": |
| 56 | + main() |
0 commit comments