@@ -20,6 +20,8 @@ import (
20
20
"fmt"
21
21
"maps"
22
22
"os"
23
+ "strconv"
24
+ "strings"
23
25
24
26
rendertypes "github.com/NVIDIA/k8s-nim-operator/internal/render/types"
25
27
utils "github.com/NVIDIA/k8s-nim-operator/internal/utils"
@@ -74,14 +76,39 @@ type NemoEvaluatorSpec struct {
74
76
GroupID * int64 `json:"groupID,omitempty"`
75
77
RuntimeClass string `json:"runtimeClass,omitempty"`
76
78
77
- // DatabaseConfig stores the database configuration for NEMO entitystore.
78
- // Required, must not be nil.
79
- //
80
- // +kubebuilder:validation:Required
81
- DatabaseConfig * DatabaseConfig `json:"databaseConfig,omitempty"`
82
- ArgoWorkFlows * ArgoWorkFlows `json:"argoWorkFlows,omitempty"`
83
- Milvus * Milvus `json:"milvus,omitempty"`
84
- DataStore * DataStore `json:"dataStore,omitempty"`
79
+ // DatabaseConfig stores the database configuration for NeMo entitystore.
80
+ DatabaseConfig * DatabaseConfig `json:"databaseConfig"`
81
+ // ArgoWorkflows stores the argo workflow service endpoint.
82
+ ArgoWorkflows ArgoWorkflows `json:"argoWorkflows"`
83
+ // VectorDB stores the vector db endpoint.
84
+ VectorDB VectorDB `json:"vectorDB"`
85
+ // Datastore stores the datastore endpoint.
86
+ Datastore Datastore `json:"datastore"`
87
+
88
+ // OpenTelemetry Settings
89
+ // +kubebuilder:validation:Optional
90
+ OpenTelemetry OTelSpec `json:"otel,omitempty"`
91
+
92
+ // EvalLogLevel defines the evaluator log level (e.g., INFO, DEBUG).
93
+ // +kubebuilder:validation:Optional
94
+ // +kubebuilder:validation:Enum=INFO;DEBUG
95
+ // +kubebuilder:default="INFO"
96
+ EvalLogLevel string `json:"evalLogLevel,omitempty"`
97
+
98
+ // LogHandlers defines the log sink handlers (e.g., INFO, DEBUG).
99
+ // +kubebuilder:validation:Optional
100
+ // +kubebuilder:validation:Enum=console;file
101
+ // +kubebuilder:default="console"
102
+ LogHandlers string `json:"logHandlers,omitempty"`
103
+
104
+ // ConsoleLogLevel defines the console log level (e.g., INFO, DEBUG).
105
+ // +kubebuilder:validation:Optional
106
+ // +kubebuilder:validation:Enum=INFO;DEBUG
107
+ // +kubebuilder:default="INFO"
108
+ ConsoleLogLevel string `json:"consoleLogLevel,omitempty"`
109
+
110
+ // EnableValidation indicates that the validation jobs to be enabled
111
+ EnableValidation * bool `json:"enableValidation,omitempty"`
85
112
}
86
113
87
114
// NemoEvaluatorStatus defines the observed state of NemoEvaluator
@@ -157,80 +184,141 @@ func (n *NemoEvaluator) GetStandardEnv() []corev1.EnvVar {
157
184
},
158
185
{
159
186
Name : "EVALUATOR_PORT" ,
160
- Value : "7331" ,
161
- },
162
- {
163
- Name : "POSTGRES_DB_PASSWORD" ,
164
- ValueFrom : & corev1.EnvVarSource {
165
- SecretKeyRef : & corev1.SecretKeySelector {
166
- Key : n .Spec .DatabaseConfig .Credentials .PasswordKey ,
167
- LocalObjectReference : corev1.LocalObjectReference {
168
- Name : n .Spec .DatabaseConfig .Credentials .SecretName ,
169
- },
170
- },
171
- },
172
- },
173
- {
174
- Name : "POSTGRES_URI" ,
175
- Value : fmt .Sprintf ("postgresql://%s:$(POSTGRES_DB_PASSWORD)@%s:%d/%s" , n .Spec .DatabaseConfig .Credentials .User , n .Spec .DatabaseConfig .Host , n .Spec .DatabaseConfig .Port , n .Spec .DatabaseConfig .DatabaseName ),
187
+ Value : fmt .Sprintf ("%d" , n .GetServicePort ()),
176
188
},
177
189
{
178
190
Name : "ARGO_HOST" ,
179
- Value : n .Spec .ArgoWorkFlows .Endpoint ,
191
+ Value : n .Spec .ArgoWorkflows .Endpoint ,
180
192
},
181
193
{
182
194
Name : "MILVUS_URL" ,
183
- Value : n .Spec .Milvus .Endpoint ,
195
+ Value : n .Spec .VectorDB .Endpoint ,
184
196
},
185
197
{
186
198
Name : "SERVICE_ACCOUNT" ,
187
- Value : n .Spec .ArgoWorkFlows .ServiceAccount ,
199
+ Value : n .Spec .ArgoWorkflows .ServiceAccount ,
188
200
},
189
201
{
190
202
Name : "DATA_STORE_HOST" ,
191
- Value : n .Spec .DataStore .Endpoint ,
203
+ Value : n .Spec .Datastore .Endpoint ,
192
204
},
193
205
{
194
206
Name : "EVAL_CONTAINER" ,
195
207
Value : n .GetImage (),
196
208
},
197
209
{
198
- Name : "EVAL_ENABLE_VALIDATION" ,
199
- Value : "True" ,
200
- },
201
- {
202
- Name : "OTEL_TRACES_EXPORTER" ,
203
- Value : "none" ,
204
- },
205
- {
206
- Name : "OTEL_METRICS_EXPORTER" ,
207
- Value : "none" ,
208
- },
209
- {
210
- Name : "OTEL_LOGS_EXPORTER" ,
211
- Value : "none" ,
210
+ Name : "LOG_HANDLERS" ,
211
+ Value : n .Spec .LogHandlers ,
212
212
},
213
213
{
214
- Name : "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED " ,
215
- Value : "false" ,
214
+ Name : "CONSOLE_LOG_LEVEL " ,
215
+ Value : n . Spec . ConsoleLogLevel ,
216
216
},
217
217
{
218
- Name : "LOG_HANDLERS " ,
219
- Value : "console" ,
218
+ Name : "EVAL_LOG_LEVEL " ,
219
+ Value : n . Spec . EvalLogLevel ,
220
220
},
221
+ }
222
+
223
+ if n .IsValidationEnabled () {
224
+ envVars = append (envVars ,
225
+ corev1.EnvVar {Name : "EVAL_ENABLE_VALIDATION" , Value : "True" })
226
+ }
227
+
228
+ // Append the environment variables for Postgres
229
+ envVars = append (envVars , n .GetPostgresEnv ()... )
230
+
231
+ // Append the environment variables for OTel
232
+ if n .IsOtelEnabled () {
233
+ envVars = append (envVars , n .GetOtelEnv ()... )
234
+ }
235
+
236
+ return envVars
237
+ }
238
+
239
+ // IsValidationEnabled returns if the validation jobs are enabled by default
240
+ func (n * NemoEvaluator ) IsValidationEnabled () bool {
241
+ if n .Spec .EnableValidation == nil {
242
+ // validation jobs are enabled by default
243
+ return true
244
+ }
245
+ return * n .Spec .EnableValidation
246
+ }
247
+
248
+ // IsOtelEnabled returns true if Open Telemetry Collector is enabled
249
+ func (n * NemoEvaluator ) IsOtelEnabled () bool {
250
+ return n .Spec .OpenTelemetry .Enabled != nil && * n .Spec .OpenTelemetry .Enabled
251
+ }
252
+
253
+ // GetOtelEnv generates OpenTelemetry-related environment variables.
254
+ func (n * NemoEvaluator ) GetOtelEnv () []corev1.EnvVar {
255
+ var otelEnvVars []corev1.EnvVar
256
+
257
+ otelEnvVars = append (otelEnvVars ,
258
+ corev1.EnvVar {Name : "OTEL_EXPORTER_OTLP_ENDPOINT" , Value : n .Spec .OpenTelemetry .ExporterOtlpEndpoint },
259
+ corev1.EnvVar {Name : "OTEL_TRACES_EXPORTER" , Value : n .Spec .OpenTelemetry .ExporterConfig .TracesExporter },
260
+ corev1.EnvVar {Name : "OTEL_METRICS_EXPORTER" , Value : n .Spec .OpenTelemetry .ExporterConfig .MetricsExporter },
261
+ corev1.EnvVar {Name : "OTEL_LOGS_EXPORTER" , Value : n .Spec .OpenTelemetry .ExporterConfig .LogsExporter },
262
+ corev1.EnvVar {Name : "OTEL_LOG_LEVEL" , Value : n .Spec .OpenTelemetry .LogLevel },
263
+ )
264
+
265
+ if len (n .Spec .OpenTelemetry .ExcludedUrls ) > 0 {
266
+ otelEnvVars = append (otelEnvVars , corev1.EnvVar {
267
+ Name : "OTEL_PYTHON_EXCLUDED_URLS" ,
268
+ Value : strings .Join (n .Spec .OpenTelemetry .ExcludedUrls , "," ),
269
+ })
270
+ }
271
+
272
+ var enableLog bool = true
273
+ if n .Spec .OpenTelemetry .DisableLogging != nil {
274
+ enableLog = ! * n .Spec .OpenTelemetry .DisableLogging
275
+ }
276
+ otelEnvVars = append (otelEnvVars , corev1.EnvVar {
277
+ Name : "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED" ,
278
+ Value : strconv .FormatBool (enableLog ),
279
+ })
280
+
281
+ return otelEnvVars
282
+ }
283
+
284
+ // GetPostgresEnv returns the PostgreSQL environment variables for a Kubernetes pod.
285
+ func (n * NemoEvaluator ) GetPostgresEnv () []corev1.EnvVar {
286
+ envVars := []corev1.EnvVar {
221
287
{
222
- Name : "CONSOLE_LOG_LEVEL" ,
223
- Value : "INFO" ,
288
+ Name : "POSTGRES_DB_PASSWORD" ,
289
+ ValueFrom : & corev1.EnvVarSource {
290
+ SecretKeyRef : & corev1.SecretKeySelector {
291
+ Key : n .Spec .DatabaseConfig .Credentials .PasswordKey ,
292
+ LocalObjectReference : corev1.LocalObjectReference {
293
+ Name : n .Spec .DatabaseConfig .Credentials .SecretName ,
294
+ },
295
+ },
296
+ },
224
297
},
225
298
{
226
- Name : "EVAL_LOG_LEVEL " ,
227
- Value : "INFO" ,
299
+ Name : "POSTGRES_URI " ,
300
+ Value : n . GeneratePostgresConnString () ,
228
301
},
229
302
}
230
303
231
304
return envVars
232
305
}
233
306
307
+ // GeneratePostgresConnString generates a PostgreSQL connection string using the database config.
308
+ func (n * NemoEvaluator ) GeneratePostgresConnString () string {
309
+ // Construct the connection string
310
+ connString := fmt .Sprintf (
311
+ "postgresql://%s:%s@%s:%d/%s" ,
312
+ n .Spec .DatabaseConfig .Credentials .User ,
313
+ "$(POSTGRES_DB_PASSWORD)" ,
314
+ n .Spec .DatabaseConfig .Host ,
315
+ n .Spec .DatabaseConfig .Port ,
316
+ n .Spec .DatabaseConfig .DatabaseName ,
317
+ )
318
+
319
+ return connString
320
+ }
321
+
234
322
// GetStandardAnnotations returns default annotations to apply to the NemoEvaluator instance
235
323
func (n * NemoEvaluator ) GetStandardAnnotations () map [string ]string {
236
324
standardAnnotations := map [string ]string {
@@ -521,7 +609,7 @@ func (n *NemoEvaluator) GetDeploymentParams() *rendertypes.DeploymentParams {
521
609
// Set runtime class
522
610
params .RuntimeClassName = n .GetRuntimeClass ()
523
611
524
- params .Ports = []corev1.ContainerPort {{Name : "http" , Protocol : corev1 .ProtocolTCP , ContainerPort : 7331 }}
612
+ params .Ports = []corev1.ContainerPort {{Name : "http" , Protocol : corev1 .ProtocolTCP , ContainerPort : n . GetServicePort () }}
525
613
return params
526
614
}
527
615
@@ -588,8 +676,8 @@ func (n *NemoEvaluator) GetServiceParams() *rendertypes.ServiceParams {
588
676
params .Type = "ClusterIP"
589
677
590
678
// Set service ports
591
- params .Port = 7331
592
- params .TargetPort = 7331
679
+ params .Port = n . GetServicePort ()
680
+ params .TargetPort = n . GetServicePort ()
593
681
params .PortName = "http"
594
682
return params
595
683
}
0 commit comments