Skip to content

Commit 9139f96

Browse files
committed
Update Evaluator spec and add unit tests
Signed-off-by: Shiva Krishna, Merla <[email protected]>
1 parent f7d09eb commit 9139f96

File tree

11 files changed

+1041
-179
lines changed

11 files changed

+1041
-179
lines changed

api/apps/v1alpha1/nemo_common_types.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,22 @@ limitations under the License.
1616

1717
package v1alpha1
1818

19-
type ArgoWorkFlows struct {
19+
// ArgoWorkflows defines configuration to connect to Argo Workflows service
20+
type ArgoWorkflows struct {
21+
// +kubebuilder:validation:MinLength=1
2022
Endpoint string `json:"endpoint"`
2123
ServiceAccount string `json:"serviceAccount"`
2224
}
2325

24-
type Milvus struct {
26+
// VectorDB defines configuration for connecting to external VectorDB
27+
type VectorDB struct {
28+
// +kubebuilder:validation:MinLength=1
2529
Endpoint string `json:"endpoint"`
2630
}
2731

28-
type DataStore struct {
32+
// Datastore defines configuration for connecting to NeMo Datastore service
33+
type Datastore struct {
34+
// +kubebuilder:validation:MinLength=1
2935
Endpoint string `json:"endpoint"`
3036
}
3137

api/apps/v1alpha1/nemo_evaluator_types.go

Lines changed: 141 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import (
2020
"fmt"
2121
"maps"
2222
"os"
23+
"strconv"
24+
"strings"
2325

2426
rendertypes "github.com/NVIDIA/k8s-nim-operator/internal/render/types"
2527
utils "github.com/NVIDIA/k8s-nim-operator/internal/utils"
@@ -74,14 +76,39 @@ type NemoEvaluatorSpec struct {
7476
GroupID *int64 `json:"groupID,omitempty"`
7577
RuntimeClass string `json:"runtimeClass,omitempty"`
7678

77-
// DatabaseConfig stores the database configuration for NEMO entitystore.
78-
// Required, must not be nil.
79-
//
80-
// +kubebuilder:validation:Required
81-
DatabaseConfig *DatabaseConfig `json:"databaseConfig,omitempty"`
82-
ArgoWorkFlows *ArgoWorkFlows `json:"argoWorkFlows,omitempty"`
83-
Milvus *Milvus `json:"milvus,omitempty"`
84-
DataStore *DataStore `json:"dataStore,omitempty"`
79+
// DatabaseConfig stores the database configuration for NeMo entitystore.
80+
DatabaseConfig *DatabaseConfig `json:"databaseConfig"`
81+
// ArgoWorkflows stores the argo workflow service endpoint.
82+
ArgoWorkflows ArgoWorkflows `json:"argoWorkflows"`
83+
// VectorDB stores the vector db endpoint.
84+
VectorDB VectorDB `json:"vectorDB"`
85+
// Datastore stores the datastore endpoint.
86+
Datastore Datastore `json:"datastore"`
87+
88+
// OpenTelemetry Settings
89+
// +kubebuilder:validation:Optional
90+
OpenTelemetry OTelSpec `json:"otel,omitempty"`
91+
92+
// EvalLogLevel defines the evaluator log level (e.g., INFO, DEBUG).
93+
// +kubebuilder:validation:Optional
94+
// +kubebuilder:validation:Enum=INFO;DEBUG
95+
// +kubebuilder:default="INFO"
96+
EvalLogLevel string `json:"evalLogLevel,omitempty"`
97+
98+
// LogHandlers defines the log sink handlers (e.g., INFO, DEBUG).
99+
// +kubebuilder:validation:Optional
100+
// +kubebuilder:validation:Enum=console;file
101+
// +kubebuilder:default="console"
102+
LogHandlers string `json:"logHandlers,omitempty"`
103+
104+
// ConsoleLogLevel defines the console log level (e.g., INFO, DEBUG).
105+
// +kubebuilder:validation:Optional
106+
// +kubebuilder:validation:Enum=INFO;DEBUG
107+
// +kubebuilder:default="INFO"
108+
ConsoleLogLevel string `json:"consoleLogLevel,omitempty"`
109+
110+
// EnableValidation indicates that the validation jobs to be enabled
111+
EnableValidation *bool `json:"enableValidation,omitempty"`
85112
}
86113

87114
// NemoEvaluatorStatus defines the observed state of NemoEvaluator
@@ -157,80 +184,141 @@ func (n *NemoEvaluator) GetStandardEnv() []corev1.EnvVar {
157184
},
158185
{
159186
Name: "EVALUATOR_PORT",
160-
Value: "7331",
161-
},
162-
{
163-
Name: "POSTGRES_DB_PASSWORD",
164-
ValueFrom: &corev1.EnvVarSource{
165-
SecretKeyRef: &corev1.SecretKeySelector{
166-
Key: n.Spec.DatabaseConfig.Credentials.PasswordKey,
167-
LocalObjectReference: corev1.LocalObjectReference{
168-
Name: n.Spec.DatabaseConfig.Credentials.SecretName,
169-
},
170-
},
171-
},
172-
},
173-
{
174-
Name: "POSTGRES_URI",
175-
Value: fmt.Sprintf("postgresql://%s:$(POSTGRES_DB_PASSWORD)@%s:%d/%s", n.Spec.DatabaseConfig.Credentials.User, n.Spec.DatabaseConfig.Host, n.Spec.DatabaseConfig.Port, n.Spec.DatabaseConfig.DatabaseName),
187+
Value: fmt.Sprintf("%d", n.GetServicePort()),
176188
},
177189
{
178190
Name: "ARGO_HOST",
179-
Value: n.Spec.ArgoWorkFlows.Endpoint,
191+
Value: n.Spec.ArgoWorkflows.Endpoint,
180192
},
181193
{
182194
Name: "MILVUS_URL",
183-
Value: n.Spec.Milvus.Endpoint,
195+
Value: n.Spec.VectorDB.Endpoint,
184196
},
185197
{
186198
Name: "SERVICE_ACCOUNT",
187-
Value: n.Spec.ArgoWorkFlows.ServiceAccount,
199+
Value: n.Spec.ArgoWorkflows.ServiceAccount,
188200
},
189201
{
190202
Name: "DATA_STORE_HOST",
191-
Value: n.Spec.DataStore.Endpoint,
203+
Value: n.Spec.Datastore.Endpoint,
192204
},
193205
{
194206
Name: "EVAL_CONTAINER",
195207
Value: n.GetImage(),
196208
},
197209
{
198-
Name: "EVAL_ENABLE_VALIDATION",
199-
Value: "True",
200-
},
201-
{
202-
Name: "OTEL_TRACES_EXPORTER",
203-
Value: "none",
204-
},
205-
{
206-
Name: "OTEL_METRICS_EXPORTER",
207-
Value: "none",
208-
},
209-
{
210-
Name: "OTEL_LOGS_EXPORTER",
211-
Value: "none",
210+
Name: "LOG_HANDLERS",
211+
Value: n.Spec.LogHandlers,
212212
},
213213
{
214-
Name: "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED",
215-
Value: "false",
214+
Name: "CONSOLE_LOG_LEVEL",
215+
Value: n.Spec.ConsoleLogLevel,
216216
},
217217
{
218-
Name: "LOG_HANDLERS",
219-
Value: "console",
218+
Name: "EVAL_LOG_LEVEL",
219+
Value: n.Spec.EvalLogLevel,
220220
},
221+
}
222+
223+
if n.IsValidationEnabled() {
224+
envVars = append(envVars,
225+
corev1.EnvVar{Name: "EVAL_ENABLE_VALIDATION", Value: "True"})
226+
}
227+
228+
// Append the environment variables for Postgres
229+
envVars = append(envVars, n.GetPostgresEnv()...)
230+
231+
// Append the environment variables for OTel
232+
if n.IsOtelEnabled() {
233+
envVars = append(envVars, n.GetOtelEnv()...)
234+
}
235+
236+
return envVars
237+
}
238+
239+
// IsValidationEnabled returns if the validation jobs are enabled by default
240+
func (n *NemoEvaluator) IsValidationEnabled() bool {
241+
if n.Spec.EnableValidation == nil {
242+
// validation jobs are enabled by default
243+
return true
244+
}
245+
return *n.Spec.EnableValidation
246+
}
247+
248+
// IsOtelEnabled returns true if Open Telemetry Collector is enabled
249+
func (n *NemoEvaluator) IsOtelEnabled() bool {
250+
return n.Spec.OpenTelemetry.Enabled != nil && *n.Spec.OpenTelemetry.Enabled
251+
}
252+
253+
// GetOtelEnv generates OpenTelemetry-related environment variables.
254+
func (n *NemoEvaluator) GetOtelEnv() []corev1.EnvVar {
255+
var otelEnvVars []corev1.EnvVar
256+
257+
otelEnvVars = append(otelEnvVars,
258+
corev1.EnvVar{Name: "OTEL_EXPORTER_OTLP_ENDPOINT", Value: n.Spec.OpenTelemetry.ExporterOtlpEndpoint},
259+
corev1.EnvVar{Name: "OTEL_TRACES_EXPORTER", Value: n.Spec.OpenTelemetry.ExporterConfig.TracesExporter},
260+
corev1.EnvVar{Name: "OTEL_METRICS_EXPORTER", Value: n.Spec.OpenTelemetry.ExporterConfig.MetricsExporter},
261+
corev1.EnvVar{Name: "OTEL_LOGS_EXPORTER", Value: n.Spec.OpenTelemetry.ExporterConfig.LogsExporter},
262+
corev1.EnvVar{Name: "OTEL_LOG_LEVEL", Value: n.Spec.OpenTelemetry.LogLevel},
263+
)
264+
265+
if len(n.Spec.OpenTelemetry.ExcludedUrls) > 0 {
266+
otelEnvVars = append(otelEnvVars, corev1.EnvVar{
267+
Name: "OTEL_PYTHON_EXCLUDED_URLS",
268+
Value: strings.Join(n.Spec.OpenTelemetry.ExcludedUrls, ","),
269+
})
270+
}
271+
272+
var enableLog bool = true
273+
if n.Spec.OpenTelemetry.DisableLogging != nil {
274+
enableLog = !*n.Spec.OpenTelemetry.DisableLogging
275+
}
276+
otelEnvVars = append(otelEnvVars, corev1.EnvVar{
277+
Name: "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED",
278+
Value: strconv.FormatBool(enableLog),
279+
})
280+
281+
return otelEnvVars
282+
}
283+
284+
// GetPostgresEnv returns the PostgreSQL environment variables for a Kubernetes pod.
285+
func (n *NemoEvaluator) GetPostgresEnv() []corev1.EnvVar {
286+
envVars := []corev1.EnvVar{
221287
{
222-
Name: "CONSOLE_LOG_LEVEL",
223-
Value: "INFO",
288+
Name: "POSTGRES_DB_PASSWORD",
289+
ValueFrom: &corev1.EnvVarSource{
290+
SecretKeyRef: &corev1.SecretKeySelector{
291+
Key: n.Spec.DatabaseConfig.Credentials.PasswordKey,
292+
LocalObjectReference: corev1.LocalObjectReference{
293+
Name: n.Spec.DatabaseConfig.Credentials.SecretName,
294+
},
295+
},
296+
},
224297
},
225298
{
226-
Name: "EVAL_LOG_LEVEL",
227-
Value: "INFO",
299+
Name: "POSTGRES_URI",
300+
Value: n.GeneratePostgresConnString(),
228301
},
229302
}
230303

231304
return envVars
232305
}
233306

307+
// GeneratePostgresConnString generates a PostgreSQL connection string using the database config.
308+
func (n *NemoEvaluator) GeneratePostgresConnString() string {
309+
// Construct the connection string
310+
connString := fmt.Sprintf(
311+
"postgresql://%s:%s@%s:%d/%s",
312+
n.Spec.DatabaseConfig.Credentials.User,
313+
"$(POSTGRES_DB_PASSWORD)",
314+
n.Spec.DatabaseConfig.Host,
315+
n.Spec.DatabaseConfig.Port,
316+
n.Spec.DatabaseConfig.DatabaseName,
317+
)
318+
319+
return connString
320+
}
321+
234322
// GetStandardAnnotations returns default annotations to apply to the NemoEvaluator instance
235323
func (n *NemoEvaluator) GetStandardAnnotations() map[string]string {
236324
standardAnnotations := map[string]string{
@@ -521,7 +609,7 @@ func (n *NemoEvaluator) GetDeploymentParams() *rendertypes.DeploymentParams {
521609
// Set runtime class
522610
params.RuntimeClassName = n.GetRuntimeClass()
523611

524-
params.Ports = []corev1.ContainerPort{{Name: "http", Protocol: corev1.ProtocolTCP, ContainerPort: 7331}}
612+
params.Ports = []corev1.ContainerPort{{Name: "http", Protocol: corev1.ProtocolTCP, ContainerPort: n.GetServicePort()}}
525613
return params
526614
}
527615

@@ -588,8 +676,8 @@ func (n *NemoEvaluator) GetServiceParams() *rendertypes.ServiceParams {
588676
params.Type = "ClusterIP"
589677

590678
// Set service ports
591-
params.Port = 7331
592-
params.TargetPort = 7331
679+
params.Port = n.GetServicePort()
680+
params.TargetPort = n.GetServicePort()
593681
params.PortName = "http"
594682
return params
595683
}

0 commit comments

Comments
 (0)