Skip to content

Commit 4bde0b3

Browse files
authored
Merge pull request #179 from danmcp/handlenoresult
Handle no valid eval results for mt_bench
2 parents 8e32704 + 6385e99 commit 4bde0b3

File tree

2 files changed

+22
-2
lines changed

2 files changed

+22
-2
lines changed

src/instructlab/eval/exceptions.py

+12
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,18 @@ def __init__(self, tasks_dir) -> None:
124124
self.message = f"Invalid Tasks Dir: {tasks_dir}"
125125

126126

127+
class InvalidEvaluationResult(EvalError):
128+
"""
129+
Error raised for invalid eval results
130+
Attributes
131+
message error message to be printed on raise
132+
"""
133+
134+
def __init__(self, message) -> None:
135+
super().__init__()
136+
self.message = message
137+
138+
127139
class ModelServingAPIError(EvalError):
128140
"""
129141
Error raised when reply retrieval from model serving fails.

src/instructlab/eval/mt_bench_judgment.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
import numpy as np
99
import pandas as pd
1010

11+
# First Party
12+
from instructlab.eval import exceptions
13+
1114
# Local
1215
from .logger_config import setup_logger
1316
from .mt_bench_common import (
@@ -97,8 +100,13 @@ def make_judgment(
97100
turn_scores = []
98101
# First turn
99102
df_1 = judgment_df[judgment_df["turn"] == 1].groupby(["model", "turn"]).mean()
100-
overall_score = df_1["score"].iloc[0]
101-
turn_scores.append(overall_score)
103+
if len(df_1.index) > 0:
104+
overall_score = df_1["score"].iloc[0]
105+
turn_scores.append(overall_score)
106+
else:
107+
raise exceptions.InvalidEvaluationResult(
108+
"Evaluation provided no result. See logs for more details."
109+
)
102110

103111
if bench_name == "mt_bench":
104112
# Second turn

0 commit comments

Comments
 (0)