Skip to content

Update Evaluator spec and add unit tests #310

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions api/apps/v1alpha1/nemo_common_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,22 @@ limitations under the License.

package v1alpha1

type ArgoWorkFlows struct {
// ArgoWorkflows defines configuration to connect to Argo Workflows service
type ArgoWorkflows struct {
// +kubebuilder:validation:MinLength=1
Endpoint string `json:"endpoint"`
ServiceAccount string `json:"serviceAccount"`
}

type Milvus struct {
// VectorDB defines configuration for connecting to external VectorDB
type VectorDB struct {
// +kubebuilder:validation:MinLength=1
Endpoint string `json:"endpoint"`
}

type DataStore struct {
// Datastore defines configuration for connecting to NeMo Datastore service
type Datastore struct {
// +kubebuilder:validation:MinLength=1
Endpoint string `json:"endpoint"`
}

Expand Down
194 changes: 141 additions & 53 deletions api/apps/v1alpha1/nemo_evaluator_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"fmt"
"maps"
"os"
"strconv"
"strings"

rendertypes "github.com/NVIDIA/k8s-nim-operator/internal/render/types"
utils "github.com/NVIDIA/k8s-nim-operator/internal/utils"
Expand Down Expand Up @@ -74,14 +76,39 @@ type NemoEvaluatorSpec struct {
GroupID *int64 `json:"groupID,omitempty"`
RuntimeClass string `json:"runtimeClass,omitempty"`

// DatabaseConfig stores the database configuration for NEMO entitystore.
// Required, must not be nil.
//
// +kubebuilder:validation:Required
DatabaseConfig *DatabaseConfig `json:"databaseConfig,omitempty"`
ArgoWorkFlows *ArgoWorkFlows `json:"argoWorkFlows,omitempty"`
Milvus *Milvus `json:"milvus,omitempty"`
DataStore *DataStore `json:"dataStore,omitempty"`
// DatabaseConfig stores the database configuration for NeMo entitystore.
DatabaseConfig *DatabaseConfig `json:"databaseConfig"`
// ArgoWorkflows stores the argo workflow service endpoint.
ArgoWorkflows ArgoWorkflows `json:"argoWorkflows"`
// VectorDB stores the vector db endpoint.
VectorDB VectorDB `json:"vectorDB"`
// Datastore stores the datastore endpoint.
Datastore Datastore `json:"datastore"`

// OpenTelemetry Settings
// +kubebuilder:validation:Optional
OpenTelemetry OTelSpec `json:"otel,omitempty"`

// EvalLogLevel defines the evaluator log level (e.g., INFO, DEBUG).
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum=INFO;DEBUG
// +kubebuilder:default="INFO"
EvalLogLevel string `json:"evalLogLevel,omitempty"`

// LogHandlers defines the log sink handlers (e.g., INFO, DEBUG).
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum=console;file
// +kubebuilder:default="console"
LogHandlers string `json:"logHandlers,omitempty"`

// ConsoleLogLevel defines the console log level (e.g., INFO, DEBUG).
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum=INFO;DEBUG
// +kubebuilder:default="INFO"
ConsoleLogLevel string `json:"consoleLogLevel,omitempty"`

// EnableValidation indicates that the validation jobs to be enabled
EnableValidation *bool `json:"enableValidation,omitempty"`
}

// NemoEvaluatorStatus defines the observed state of NemoEvaluator
Expand Down Expand Up @@ -157,80 +184,141 @@ func (n *NemoEvaluator) GetStandardEnv() []corev1.EnvVar {
},
{
Name: "EVALUATOR_PORT",
Value: "7331",
},
{
Name: "POSTGRES_DB_PASSWORD",
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
Key: n.Spec.DatabaseConfig.Credentials.PasswordKey,
LocalObjectReference: corev1.LocalObjectReference{
Name: n.Spec.DatabaseConfig.Credentials.SecretName,
},
},
},
},
{
Name: "POSTGRES_URI",
Value: fmt.Sprintf("postgresql://%s:$(POSTGRES_DB_PASSWORD)@%s:%d/%s", n.Spec.DatabaseConfig.Credentials.User, n.Spec.DatabaseConfig.Host, n.Spec.DatabaseConfig.Port, n.Spec.DatabaseConfig.DatabaseName),
Value: fmt.Sprintf("%d", n.GetServicePort()),
},
{
Name: "ARGO_HOST",
Value: n.Spec.ArgoWorkFlows.Endpoint,
Value: n.Spec.ArgoWorkflows.Endpoint,
},
{
Name: "MILVUS_URL",
Value: n.Spec.Milvus.Endpoint,
Value: n.Spec.VectorDB.Endpoint,
},
{
Name: "SERVICE_ACCOUNT",
Value: n.Spec.ArgoWorkFlows.ServiceAccount,
Value: n.Spec.ArgoWorkflows.ServiceAccount,
},
{
Name: "DATA_STORE_HOST",
Value: n.Spec.DataStore.Endpoint,
Value: n.Spec.Datastore.Endpoint,
},
{
Name: "EVAL_CONTAINER",
Value: n.GetImage(),
},
{
Name: "EVAL_ENABLE_VALIDATION",
Value: "True",
},
{
Name: "OTEL_TRACES_EXPORTER",
Value: "none",
},
{
Name: "OTEL_METRICS_EXPORTER",
Value: "none",
},
{
Name: "OTEL_LOGS_EXPORTER",
Value: "none",
Name: "LOG_HANDLERS",
Value: n.Spec.LogHandlers,
},
{
Name: "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED",
Value: "false",
Name: "CONSOLE_LOG_LEVEL",
Value: n.Spec.ConsoleLogLevel,
},
{
Name: "LOG_HANDLERS",
Value: "console",
Name: "EVAL_LOG_LEVEL",
Value: n.Spec.EvalLogLevel,
},
}

if n.IsValidationEnabled() {
envVars = append(envVars,
corev1.EnvVar{Name: "EVAL_ENABLE_VALIDATION", Value: "True"})
}

// Append the environment variables for Postgres
envVars = append(envVars, n.GetPostgresEnv()...)

// Append the environment variables for OTel
if n.IsOtelEnabled() {
envVars = append(envVars, n.GetOtelEnv()...)
}

return envVars
}

// IsValidationEnabled returns if the validation jobs are enabled by default
func (n *NemoEvaluator) IsValidationEnabled() bool {
if n.Spec.EnableValidation == nil {
// validation jobs are enabled by default
return true
}
return *n.Spec.EnableValidation
}

// IsOtelEnabled returns true if Open Telemetry Collector is enabled
func (n *NemoEvaluator) IsOtelEnabled() bool {
return n.Spec.OpenTelemetry.Enabled != nil && *n.Spec.OpenTelemetry.Enabled
}

// GetOtelEnv generates OpenTelemetry-related environment variables.
func (n *NemoEvaluator) GetOtelEnv() []corev1.EnvVar {
var otelEnvVars []corev1.EnvVar

otelEnvVars = append(otelEnvVars,
corev1.EnvVar{Name: "OTEL_EXPORTER_OTLP_ENDPOINT", Value: n.Spec.OpenTelemetry.ExporterOtlpEndpoint},
corev1.EnvVar{Name: "OTEL_TRACES_EXPORTER", Value: n.Spec.OpenTelemetry.ExporterConfig.TracesExporter},
corev1.EnvVar{Name: "OTEL_METRICS_EXPORTER", Value: n.Spec.OpenTelemetry.ExporterConfig.MetricsExporter},
corev1.EnvVar{Name: "OTEL_LOGS_EXPORTER", Value: n.Spec.OpenTelemetry.ExporterConfig.LogsExporter},
corev1.EnvVar{Name: "OTEL_LOG_LEVEL", Value: n.Spec.OpenTelemetry.LogLevel},
)

if len(n.Spec.OpenTelemetry.ExcludedUrls) > 0 {
otelEnvVars = append(otelEnvVars, corev1.EnvVar{
Name: "OTEL_PYTHON_EXCLUDED_URLS",
Value: strings.Join(n.Spec.OpenTelemetry.ExcludedUrls, ","),
})
}

var enableLog bool = true
if n.Spec.OpenTelemetry.DisableLogging != nil {
enableLog = !*n.Spec.OpenTelemetry.DisableLogging
}
otelEnvVars = append(otelEnvVars, corev1.EnvVar{
Name: "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED",
Value: strconv.FormatBool(enableLog),
})

return otelEnvVars
}

// GetPostgresEnv returns the PostgreSQL environment variables for a Kubernetes pod.
func (n *NemoEvaluator) GetPostgresEnv() []corev1.EnvVar {
envVars := []corev1.EnvVar{
{
Name: "CONSOLE_LOG_LEVEL",
Value: "INFO",
Name: "POSTGRES_DB_PASSWORD",
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
Key: n.Spec.DatabaseConfig.Credentials.PasswordKey,
LocalObjectReference: corev1.LocalObjectReference{
Name: n.Spec.DatabaseConfig.Credentials.SecretName,
},
},
},
},
{
Name: "EVAL_LOG_LEVEL",
Value: "INFO",
Name: "POSTGRES_URI",
Value: n.GeneratePostgresConnString(),
},
}

return envVars
}

// GeneratePostgresConnString generates a PostgreSQL connection string using the database config.
func (n *NemoEvaluator) GeneratePostgresConnString() string {
// Construct the connection string
connString := fmt.Sprintf(
"postgresql://%s:%s@%s:%d/%s",
n.Spec.DatabaseConfig.Credentials.User,
"$(POSTGRES_DB_PASSWORD)",
n.Spec.DatabaseConfig.Host,
n.Spec.DatabaseConfig.Port,
n.Spec.DatabaseConfig.DatabaseName,
)

return connString
}

// GetStandardAnnotations returns default annotations to apply to the NemoEvaluator instance
func (n *NemoEvaluator) GetStandardAnnotations() map[string]string {
standardAnnotations := map[string]string{
Expand Down Expand Up @@ -521,7 +609,7 @@ func (n *NemoEvaluator) GetDeploymentParams() *rendertypes.DeploymentParams {
// Set runtime class
params.RuntimeClassName = n.GetRuntimeClass()

params.Ports = []corev1.ContainerPort{{Name: "http", Protocol: corev1.ProtocolTCP, ContainerPort: 7331}}
params.Ports = []corev1.ContainerPort{{Name: "http", Protocol: corev1.ProtocolTCP, ContainerPort: n.GetServicePort()}}
return params
}

Expand Down Expand Up @@ -588,8 +676,8 @@ func (n *NemoEvaluator) GetServiceParams() *rendertypes.ServiceParams {
params.Type = "ClusterIP"

// Set service ports
params.Port = 7331
params.TargetPort = 7331
params.Port = n.GetServicePort()
params.TargetPort = n.GetServicePort()
params.PortName = "http"
return params
}
Expand Down
Loading