Skip to content

Commit dd95ee9

Browse files
committed
Changes to support NeMo Evaluator 25.06
* New evaluation images support (BFCL, Agentic Evaluation) * Remove the evaluation image validation markers (as some are optional across versions) * Update Unit tests Signed-off-by: Shiva Krishna, Merla <[email protected]>
1 parent 5057724 commit dd95ee9

File tree

6 files changed

+44
-67
lines changed

6 files changed

+44
-67
lines changed

api/apps/v1alpha1/nemo_evaluator_types.go

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -119,21 +119,17 @@ type NemoEvaluatorSpec struct {
119119
EvaluationImages EvaluationImages `json:"evaluationImages"`
120120
}
121121

122+
// EvaluationImages for different evaluation targets
122123
type EvaluationImages struct {
123-
// +kubebuilder:validation:MinLength=1
124-
BigcodeEvalHarness string `json:"bigcodeEvalHarness"`
125-
// +kubebuilder:validation:MinLength=1
126-
LmEvalHarness string `json:"lmEvalHarness"`
127-
// +kubebuilder:validation:MinLength=1
128-
SimilarityMetrics string `json:"similarityMetrics"`
129-
// +kubebuilder:validation:MinLength=1
130-
LlmAsJudge string `json:"llmAsJudge"`
131-
// +kubebuilder:validation:MinLength=1
132-
MtBench string `json:"mtBench"`
133-
// +kubebuilder:validation:MinLength=1
134-
Retriever string `json:"retriever"`
135-
// +kubebuilder:validation:MinLength=1
136-
Rag string `json:"rag"`
124+
BigcodeEvalHarness string `json:"bigcodeEvalHarness,omitempty"`
125+
LmEvalHarness string `json:"lmEvalHarness,omitempty"`
126+
SimilarityMetrics string `json:"similarityMetrics,omitempty"`
127+
LlmAsJudge string `json:"llmAsJudge,omitempty"`
128+
MtBench string `json:"mtBench,omitempty"`
129+
Retriever string `json:"retriever,omitempty"`
130+
Rag string `json:"rag,omitempty"`
131+
BFCL string `json:"bfcl,omitempty"`
132+
AgenticEval string `json:"agenticEval,omitempty"`
137133
}
138134

139135
// NemoEvaluatorStatus defines the observed state of NemoEvaluator.
@@ -197,6 +193,14 @@ func (ei EvaluationImages) GetEvaluationImageEnv() []corev1.EnvVar {
197193
Name: "RAG",
198194
Value: ei.Rag,
199195
},
196+
{
197+
Name: "BFCL",
198+
Value: ei.BFCL,
199+
},
200+
{
201+
Name: "AGENTIC_EVAL",
202+
Value: ei.AgenticEval,
203+
},
200204
}
201205
}
202206

bundle/manifests/apps.nvidia.com_nemoevaluators.yaml

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -291,35 +291,24 @@ spec:
291291
description: EvaluationImages defines the external images used for
292292
evaluation
293293
properties:
294+
agenticEval:
295+
type: string
296+
bfcl:
297+
type: string
294298
bigcodeEvalHarness:
295-
minLength: 1
296299
type: string
297300
llmAsJudge:
298-
minLength: 1
299301
type: string
300302
lmEvalHarness:
301-
minLength: 1
302303
type: string
303304
mtBench:
304-
minLength: 1
305305
type: string
306306
rag:
307-
minLength: 1
308307
type: string
309308
retriever:
310-
minLength: 1
311309
type: string
312310
similarityMetrics:
313-
minLength: 1
314311
type: string
315-
required:
316-
- bigcodeEvalHarness
317-
- llmAsJudge
318-
- lmEvalHarness
319-
- mtBench
320-
- rag
321-
- retriever
322-
- similarityMetrics
323312
type: object
324313
expose:
325314
description: ExposeV1 defines attributes to expose the service.

config/crd/bases/apps.nvidia.com_nemoevaluators.yaml

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -291,35 +291,24 @@ spec:
291291
description: EvaluationImages defines the external images used for
292292
evaluation
293293
properties:
294+
agenticEval:
295+
type: string
296+
bfcl:
297+
type: string
294298
bigcodeEvalHarness:
295-
minLength: 1
296299
type: string
297300
llmAsJudge:
298-
minLength: 1
299301
type: string
300302
lmEvalHarness:
301-
minLength: 1
302303
type: string
303304
mtBench:
304-
minLength: 1
305305
type: string
306306
rag:
307-
minLength: 1
308307
type: string
309308
retriever:
310-
minLength: 1
311309
type: string
312310
similarityMetrics:
313-
minLength: 1
314311
type: string
315-
required:
316-
- bigcodeEvalHarness
317-
- llmAsJudge
318-
- lmEvalHarness
319-
- mtBench
320-
- rag
321-
- retriever
322-
- similarityMetrics
323312
type: object
324313
expose:
325314
description: ExposeV1 defines attributes to expose the service.

config/samples/nemo/latest/apps_v1alpha1_nemoevaluator.yaml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,18 @@ metadata:
55
namespace: nemo
66
spec:
77
evaluationImages:
8-
bigcodeEvalHarness: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-bigcode:0.12.13"
9-
lmEvalHarness: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-lm-eval-harness:0.12.15"
10-
similarityMetrics: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-custom-eval:0.12.13"
11-
llmAsJudge: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-llm-as-a-judge:0.12.15"
12-
mtBench: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-llm-as-a-judge:0.12.15"
13-
retriever: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-retriever:0.12.13"
14-
rag: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-rag:0.12.13"
8+
bigcodeEvalHarness: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-bigcode:0.12.21"
9+
lmEvalHarness: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-lm-eval-harness:0.12.21"
10+
similarityMetrics: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-custom-eval:0.12.21"
11+
llmAsJudge: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-llm-as-a-judge:0.12.21"
12+
mtBench: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-llm-as-a-judge:0.12.21"
13+
retriever: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-retriever:0.12.21"
14+
rag: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-rag:0.12.21"
15+
bfcl: "nvcr.io/nvidia/nemo-microservices/eval-factory-benchmark-bfcl:25.6.1"
16+
agenticEval: "nvcr.io/nvidia/nemo-microservices/eval-factory-benchmark-agentic-eval:25.6.1"
1517
image:
1618
repository: nvcr.io/nvidia/nemo-microservices/evaluator
17-
tag: "25.04"
19+
tag: "25.06"
1820
pullPolicy: IfNotPresent
1921
pullSecrets:
2022
- ngc-secret

deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoevaluators.yaml

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -291,35 +291,24 @@ spec:
291291
description: EvaluationImages defines the external images used for
292292
evaluation
293293
properties:
294+
agenticEval:
295+
type: string
296+
bfcl:
297+
type: string
294298
bigcodeEvalHarness:
295-
minLength: 1
296299
type: string
297300
llmAsJudge:
298-
minLength: 1
299301
type: string
300302
lmEvalHarness:
301-
minLength: 1
302303
type: string
303304
mtBench:
304-
minLength: 1
305305
type: string
306306
rag:
307-
minLength: 1
308307
type: string
309308
retriever:
310-
minLength: 1
311309
type: string
312310
similarityMetrics:
313-
minLength: 1
314311
type: string
315-
required:
316-
- bigcodeEvalHarness
317-
- llmAsJudge
318-
- lmEvalHarness
319-
- mtBench
320-
- rag
321-
- retriever
322-
- similarityMetrics
323312
type: object
324313
expose:
325314
description: ExposeV1 defines attributes to expose the service.

internal/controller/nemo_evaluator_controller_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ var _ = Describe("NemoEvaluator Controller", func() {
206206
MtBench: "MtBench",
207207
Retriever: "Retriever",
208208
Rag: "Rag",
209+
BFCL: "BFCL",
210+
AgenticEval: "AgenticEval",
209211
},
210212
},
211213
Status: appsv1alpha1.NemoEvaluatorStatus{
@@ -462,6 +464,8 @@ var _ = Describe("NemoEvaluator Controller", func() {
462464
corev1.EnvVar{Name: "MT_BENCH", Value: nemoEvaluator.Spec.EvaluationImages.MtBench},
463465
corev1.EnvVar{Name: "RETRIEVER", Value: nemoEvaluator.Spec.EvaluationImages.Retriever},
464466
corev1.EnvVar{Name: "RAG", Value: nemoEvaluator.Spec.EvaluationImages.Rag},
467+
corev1.EnvVar{Name: "BFCL", Value: nemoEvaluator.Spec.EvaluationImages.BFCL},
468+
corev1.EnvVar{Name: "AgenticEval", Value: nemoEvaluator.Spec.EvaluationImages.AgenticEval},
465469
))
466470
})
467471

0 commit comments

Comments
 (0)