diff --git a/api/apps/v1alpha1/nemo_datastore_types.go b/api/apps/v1alpha1/nemo_datastore_types.go index 884cb677..c5b011ff 100644 --- a/api/apps/v1alpha1/nemo_datastore_types.go +++ b/api/apps/v1alpha1/nemo_datastore_types.go @@ -20,6 +20,7 @@ import ( "fmt" "maps" "os" + "strconv" rendertypes "github.com/NVIDIA/k8s-nim-operator/internal/render/types" utils "github.com/NVIDIA/k8s-nim-operator/internal/utils" @@ -30,6 +31,7 @@ import ( rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" ) // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! @@ -78,7 +80,22 @@ type NemoDatastoreSpec struct { GroupID *int64 `json:"groupID,omitempty"` RuntimeClass string `json:"runtimeClass,omitempty"` - DataStoreParams NemoDataStoreParams `json:"dataStoreParams"` + // ObjectStore specifies the location and credentials for accessing the external Object Storage + ObjectStoreConfig ObjectStoreConfig `json:"objectStoreConfig"` // e.g. minio + // ExternalDatabase contains external PostgreSQL configuration + DatabaseConfig DatabaseConfig `json:"databaseConfig"` // e.g. postgres + // secrets contains the pre-requisite secrets that must be created before deploying the datastore CR + Secrets Secrets `json:"secrets"` + // PVC defines the PersistentVolumeClaim for the datastore + PVC *PersistentVolumeClaim `json:"pvc,omitempty"` +} + +type Secrets struct { + GiteaAdminSecret string `json:"giteaAdminSecret"` + LfsJwtSecret string `json:"lfsJwtSecret"` + DataStoreInitSecret string `json:"datastoreInitSecret"` + DataStoreConfigSecret string `json:"datastoreConfigSecret"` // config_environment.sh + DataStoreInlineConfigSecret string `json:"datastoreInlineConfigSecret"` } // NemoDatastoreStatus defines the observed state of NemoDatastore @@ -88,20 +105,32 @@ type NemoDatastoreStatus struct { State string `json:"state,omitempty"` } -type NemoDataStoreParams struct { - AppVersion string `json:"appVersion"` - GiteaEndpoint string `json:"giteaEndpoint"` - GiteaSecret string `json:"giteaSecret"` - DatabaseURL string `json:"databaseURL"` - DatabaseHost string `json:"databaseHost"` - DatabasePort string `json:"databasePort"` - DBSecret string `json:"dbSecret"` +type ObjectStoreConfig struct { // e.g. Minio, s3 + // ObjectStoreCredentials stores the configuration to retrieve the object store credentials + Credentials ObjectStoreCredentials `json:"credentials"` + + // +kubebuilder:default:=true + ServeDirect bool `json:"serveDirect,omitempty"` + + // endpoint is the fully qualidfied object store endpoint + Endpoint string `json:"endpoint"` + // BucketName is the bucket where LFS files will be stored + BucketName string `json:"bucketName"` + // Region is the region where bucket is hosted + Region string `json:"region"` + // SSL enable ssl for object store transport + SSL bool `json:"ssl"` +} + +type ObjectStoreCredentials struct { + // User is the non-root username for a NEMO Service in the object store. + User string `json:"user"` - EnvConfigMap string `json:"envConfigmap"` - EnvSecret string `json:"envSecret"` + // SecretName is the name of the secret which has the object credentials for a NEMO service user. + SecretName string `json:"secretName"` - InitContainerImage string `json:"initContainerImage,omitempty"` - InitContainerCommand []string `json:"initContainerCommand,omitempty"` + // PasswordKey is the name of the key in the `CredentialsSecret` secret for the object store credentials. + PasswordKey string `json:"passwordKey"` } // +genclient @@ -130,10 +159,10 @@ type NemoDatastoreList struct { // GetPVCName returns the name to be used for the PVC based on the custom spec // Prefers pvc.Name if explicitly set by the user in the NemoDatastore instance -func (n *NemoDatastore) GetPVCName(pvc PersistentVolumeClaim) string { +func (n *NemoDatastore) GetPVCName() string { pvcName := fmt.Sprintf("%s-pvc", n.GetName()) - if pvc.Name != "" { - pvcName = pvc.Name + if n.Spec.PVC != nil && n.Spec.PVC.Name != "" { + pvcName = n.Spec.PVC.Name } return pvcName } @@ -157,99 +186,301 @@ func (n *NemoDatastore) GetStandardLabels() map[string]string { } } -// GetStandardEnv returns the standard set of env variables for the NemoDatastore container +// GetMainContainerEnv returns the standard set of env variables for the NemoDatastore main container func (n *NemoDatastore) GetStandardEnv() []corev1.EnvVar { // add standard env required for NIM service envVars := []corev1.EnvVar{ { - Name: "APP_VERSION", - Value: n.Spec.DataStoreParams.AppVersion, + Name: "SSH_LISTEN_PORT", + Value: "2222", + }, + { + Name: "SSH_PORT", + Value: "22", }, { - Name: "GITEA_ENDPOINT", - Value: n.Spec.DataStoreParams.GiteaEndpoint, + Name: "GITEA_APP_INI", + Value: "/data/gitea/conf/app.ini", }, { - Name: "GITEA_ORG_NAME", + Name: "GITEA_CUSTOM", + Value: "/data/gitea", + }, + { + Name: "GITEA_WORK_DIR", + Value: "/data", + }, + { + Name: "TMPDIR", + Value: "/tmp/gitea", + }, + { + Name: "GITEA_TEMP", + Value: "/tmp/gitea", + }, + { + Name: "HOME", + Value: "/data/gitea/git", + }, + { + Name: "GITEA__LFS__MINIO_ACCESS_KEY_ID", + Value: n.Spec.ObjectStoreConfig.Credentials.User, + }, + { + Name: "GITEA__LFS__MINIO_SECRET_ACCESS_KEY", ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ - Key: "username", + Key: n.Spec.ObjectStoreConfig.Credentials.PasswordKey, LocalObjectReference: corev1.LocalObjectReference{ - Name: n.Spec.DataStoreParams.GiteaSecret, + Name: n.Spec.ObjectStoreConfig.Credentials.SecretName, }, }, }, }, { - Name: "GITEA_PASSWORD", + Name: "GITEA__SERVER__LFS_JWT_SECRET", ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ - Key: "password", + Key: "jwtSecret", LocalObjectReference: corev1.LocalObjectReference{ - Name: n.Spec.DataStoreParams.GiteaSecret, + Name: n.Spec.Secrets.LfsJwtSecret, }, }, }, }, { - Name: "DB_PASSWORD", + Name: "GITEA__DATABASE__PASSWD", ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ - Key: "password", + Key: n.Spec.DatabaseConfig.Credentials.PasswordKey, LocalObjectReference: corev1.LocalObjectReference{ - Name: n.Spec.DataStoreParams.DBSecret, + Name: n.Spec.DatabaseConfig.Credentials.SecretName, }, }, }, }, - { - Name: "DATABASE_URL", - Value: n.Spec.DataStoreParams.DatabaseURL, - }, } return envVars } -// GetStandardAnnotations returns default annotations to apply to the NemoDatastore instance -func (n *NemoDatastore) GetEnvFrom() []corev1.EnvFromSource { - return []corev1.EnvFromSource{ +func (n *NemoDatastore) GetInitContainerEnv() []corev1.EnvVar { + objStoreSetting := n.Spec.ObjectStoreConfig + dbSetting := n.Spec.DatabaseConfig + + envVars := []corev1.EnvVar{ + { + Name: "GITEA_APP_INI", + Value: "/data/gitea/conf/app.ini", + }, + { + Name: "GITEA_CUSTOM", + Value: "/data/gitea", + }, + { + Name: "GITEA_WORK_DIR", + Value: "/data", + }, + { + Name: "TMPDIR", + Value: "/tmp/gitea", + }, + { + Name: "GITEA_TEMP", + Value: "/tmp/gitea", + }, + { + Name: "HOME", + Value: "/data/gitea/git", + }, + { + Name: "GITEA__LFS__MINIO_ACCESS_KEY_ID", + Value: objStoreSetting.Credentials.User, + }, + { + Name: "GITEA__LFS__MINIO_SECRET_ACCESS_KEY", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + Key: objStoreSetting.Credentials.PasswordKey, + LocalObjectReference: corev1.LocalObjectReference{ + Name: objStoreSetting.Credentials.SecretName, + }, + }, + }, + }, { - ConfigMapRef: &corev1.ConfigMapEnvSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: n.Spec.DataStoreParams.EnvConfigMap, + Name: "GITEA__SERVER__LFS_JWT_SECRET", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + Key: "jwtSecret", + LocalObjectReference: corev1.LocalObjectReference{ + Name: n.Spec.Secrets.LfsJwtSecret, + }, }, }, }, { - SecretRef: &corev1.SecretEnvSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: n.Spec.DataStoreParams.EnvSecret, + Name: "GITEA__DATABASE__PASSWD", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + Key: dbSetting.Credentials.PasswordKey, + LocalObjectReference: corev1.LocalObjectReference{ + Name: dbSetting.Credentials.SecretName, + }, + }, + }, + }, + { + Name: "GITEA_ADMIN_USERNAME", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + Key: "GITEA_ADMIN_USERNAME", + LocalObjectReference: corev1.LocalObjectReference{ + Name: n.Spec.Secrets.GiteaAdminSecret, + }, }, }, }, + { + Name: "GITEA_ADMIN_PASSWORD", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + Key: "GITEA_ADMIN_PASSWORD", + LocalObjectReference: corev1.LocalObjectReference{ + Name: n.Spec.Secrets.GiteaAdminSecret, + }, + }, + }, + }, + { + Name: "GITEA__LFS__SERVE_DIRECT", + Value: strconv.FormatBool(objStoreSetting.ServeDirect), + }, + { + Name: "GITEA__LFS__STORAGE_TYPE", + Value: "minio", + }, + { + Name: "GITEA__LFS__MINIO_ENDPOINT", + Value: objStoreSetting.Endpoint, + }, + { + Name: "GITEA__LFS__MINIO_BUCKET", + Value: objStoreSetting.BucketName, + }, + { + Name: "GITEA__LFS__MINIO_LOCATION", + Value: objStoreSetting.Region, + }, + { + Name: "GITEA__LFS__MINIO_LOCATION", + Value: objStoreSetting.Region, + }, + { + Name: "GITEA__LFS__MINIO_USE_SSL", + Value: strconv.FormatBool(objStoreSetting.SSL), + }, + { + Name: "GITEA__DATABASE__SSL_MODE", + Value: "disable", + }, + { + Name: "GITEA__DATABASE__NAME", + Value: dbSetting.DatabaseName, + }, + { + Name: "GITEA__DATABASE__HOST", + Value: fmt.Sprintf("%s:%d", dbSetting.Host, dbSetting.Port), + }, + { + Name: "GITEA__DATABASE__USER", + Value: dbSetting.Credentials.User, + }, } + return envVars } -func (n *NemoDatastore) GetInitContainers() []corev1.Container { - image := n.Spec.DataStoreParams.InitContainerImage - if image == "" { - image = "busybox" - } - cmd := n.Spec.DataStoreParams.InitContainerCommand - if len(cmd) == 0 { - cmd = []string{ - "sh", - "-c", - fmt.Sprintf("until nc -z %s %s; do echo \"PostgreSQL is unavailable. Sleeping for 5 seconds\"; sleep 5; done;", n.Spec.DataStoreParams.DatabaseHost, n.Spec.DataStoreParams.DatabasePort), - } - } - return []corev1.Container{ +// GetVolumes returns volumes for the NemoDatastore container +func (n *NemoDatastore) GetVolumes() []corev1.Volume { + /*volumes: + - name: init + secret: + defaultMode: 110 + secretName: datastore-nemo-datastore-init + - name: config + secret: + defaultMode: 110 + secretName: datastore-nemo-datastore + - name: inline-config-sources + secret: + defaultMode: 420 + secretName: datastore-nemo-datastore-inline-config + - emptyDir: {} + name: temp + - name: data + persistentVolumeClaim: + claimName: datastore-shared-storage + */ + var initMode = int32(110) + var configMode = int32(420) + + volumes := []corev1.Volume{ + { + Name: "init", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: n.Spec.Secrets.DataStoreInitSecret, + DefaultMode: &initMode, + }, + }, + }, { - Name: "wait-postgres-ready", - Image: image, - Command: cmd, + Name: "config", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: n.Spec.Secrets.DataStoreConfigSecret, + DefaultMode: &initMode, + }, + }, }, + { + Name: "inline-config-sources", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: n.Spec.Secrets.DataStoreInlineConfigSecret, + DefaultMode: &configMode, + }, + }, + }, + { + Name: "temp", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }, + } + + if n.Spec.PVC != nil { + volumes = append(volumes, corev1.Volume{ + Name: "data", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: n.GetPVCName(), + }, + }, + }) + } else { + volumes = append(volumes, corev1.Volume{ + Name: "data", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }) } + return volumes +} + +func (n *NemoDatastore) ShouldCreatePersistentStorage() bool { + return n.Spec.PVC != nil && n.Spec.PVC.Create != nil && *n.Spec.PVC.Create } // GetStandardAnnotations returns default annotations to apply to the NemoDatastore instance @@ -429,15 +660,97 @@ func (n *NemoDatastore) GetDefaultStartupProbe() *corev1.Probe { return &probe } -// GetVolumes returns volumes for the NemoDatastore container -func (n *NemoDatastore) GetVolumes() []corev1.Volume { - volumes := []corev1.Volume{} - return volumes -} - // GetVolumeMounts returns volumes for the NemoDatastore container func (n *NemoDatastore) GetVolumeMounts() []corev1.VolumeMount { - return []corev1.VolumeMount{} + mounts := []corev1.VolumeMount{ + { + MountPath: "/tmp", + Name: "temp", + }, + } + + dataMount := corev1.VolumeMount{ + MountPath: "/data", + Name: "data", + } + + if n.Spec.PVC != nil { + dataMount.SubPath = n.Spec.PVC.SubPath + } + mounts = append(mounts, dataMount) + return mounts +} + +func (n *NemoDatastore) GetVolumeMountsInitContainer() []corev1.VolumeMount { + mounts := []corev1.VolumeMount{ + { + MountPath: "/usr/sbin", + Name: "config", + }, + { + MountPath: "/tmp", + Name: "temp", + }, + { + MountPath: "/env-to-ini-mounts/inlines/", + Name: "inline-config-sources", + }, + { + MountPath: "/usr/sbin/init", + Name: "init", + }, + } + dataMount := corev1.VolumeMount{ + MountPath: "/data", + Name: "data", + } + + if n.Spec.PVC != nil { + dataMount.SubPath = n.Spec.PVC.SubPath + } + mounts = append(mounts, dataMount) + return mounts +} + +func (n *NemoDatastore) GetInitContainers() []corev1.Container { + return []corev1.Container{ + { + Name: "init-directories", + Image: n.GetImage(), + ImagePullPolicy: corev1.PullPolicy(n.GetImagePullPolicy()), + Command: []string{ + "/usr/sbin/init/init_directory_structure.sh", + }, + VolumeMounts: n.GetVolumeMountsInitContainer(), + Env: n.GetInitContainerEnv(), + }, + { + Name: "init-app-ini", + Image: n.GetImage(), + ImagePullPolicy: corev1.PullPolicy(n.GetImagePullPolicy()), + Command: []string{ + "/usr/sbin/config_environment.sh", + }, + VolumeMounts: n.GetVolumeMountsInitContainer(), + Env: n.GetInitContainerEnv(), + }, + { + Name: "configure-datastore", + Image: n.GetImage(), + ImagePullPolicy: corev1.PullPolicy(n.GetImagePullPolicy()), + Command: []string{ + "/bin/sh", "-c", + }, + Args: []string{ + "/usr/sbin/init/configure_gitea.sh", + }, + VolumeMounts: n.GetVolumeMountsInitContainer(), + Env: n.GetInitContainerEnv(), + SecurityContext: &corev1.SecurityContext{ + RunAsUser: n.GetUserID(), + }, + }, + } } // GetServiceAccountName returns service account name for the NemoDatastore deployment @@ -505,14 +818,18 @@ func (n *NemoDatastore) GetServiceType() string { // GetUserID returns the user ID for the NemoDatastore deployment func (n *NemoDatastore) GetUserID() *int64 { - return n.Spec.UserID - + if n.Spec.UserID != nil { + return n.Spec.UserID + } + return ptr.To[int64](1000) } // GetGroupID returns the group ID for the NemoDatastore deployment func (n *NemoDatastore) GetGroupID() *int64 { - return n.Spec.GroupID - + if n.Spec.GroupID != nil { + return n.Spec.GroupID + } + return ptr.To[int64](2000) } // GetServiceAccountParams return params to render ServiceAccount from templates diff --git a/api/apps/v1alpha1/zz_generated.deepcopy.go b/api/apps/v1alpha1/zz_generated.deepcopy.go index 0c3ce6b1..440b9039 100644 --- a/api/apps/v1alpha1/zz_generated.deepcopy.go +++ b/api/apps/v1alpha1/zz_generated.deepcopy.go @@ -983,26 +983,6 @@ func (in *NIMSource) DeepCopy() *NIMSource { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *NemoDataStoreParams) DeepCopyInto(out *NemoDataStoreParams) { - *out = *in - if in.InitContainerCommand != nil { - in, out := &in.InitContainerCommand, &out.InitContainerCommand - *out = make([]string, len(*in)) - copy(*out, *in) - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NemoDataStoreParams. -func (in *NemoDataStoreParams) DeepCopy() *NemoDataStoreParams { - if in == nil { - return nil - } - out := new(NemoDataStoreParams) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NemoDatastore) DeepCopyInto(out *NemoDatastore) { *out = *in @@ -1137,7 +1117,14 @@ func (in *NemoDatastoreSpec) DeepCopyInto(out *NemoDatastoreSpec) { *out = new(int64) **out = **in } - in.DataStoreParams.DeepCopyInto(&out.DataStoreParams) + out.ObjectStoreConfig = in.ObjectStoreConfig + in.DatabaseConfig.DeepCopyInto(&out.DatabaseConfig) + out.Secrets = in.Secrets + if in.PVC != nil { + in, out := &in.PVC, &out.PVC + *out = new(PersistentVolumeClaim) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NemoDatastoreSpec. @@ -1702,6 +1689,37 @@ func (in *NemoGuardrailStatus) DeepCopy() *NemoGuardrailStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ObjectStoreConfig) DeepCopyInto(out *ObjectStoreConfig) { + *out = *in + out.Credentials = in.Credentials +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ObjectStoreConfig. +func (in *ObjectStoreConfig) DeepCopy() *ObjectStoreConfig { + if in == nil { + return nil + } + out := new(ObjectStoreConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ObjectStoreCredentials) DeepCopyInto(out *ObjectStoreCredentials) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ObjectStoreCredentials. +func (in *ObjectStoreCredentials) DeepCopy() *ObjectStoreCredentials { + if in == nil { + return nil + } + out := new(ObjectStoreCredentials) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PersistentVolumeClaim) DeepCopyInto(out *PersistentVolumeClaim) { *out = *in @@ -1764,6 +1782,21 @@ func (in *Resources) DeepCopy() *Resources { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Secrets) DeepCopyInto(out *Secrets) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Secrets. +func (in *Secrets) DeepCopy() *Secrets { + if in == nil { + return nil + } + out := new(Secrets) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Service) DeepCopyInto(out *Service) { *out = *in diff --git a/bundle/manifests/apps.nvidia.com_nemodatastores.yaml b/bundle/manifests/apps.nvidia.com_nemodatastores.yaml index ffdc4923..4668139e 100644 --- a/bundle/manifests/apps.nvidia.com_nemodatastores.yaml +++ b/bundle/manifests/apps.nvidia.com_nemodatastores.yaml @@ -63,43 +63,6 @@ spec: items: type: string type: array - dataStoreParams: - properties: - appVersion: - type: string - databaseHost: - type: string - databasePort: - type: string - databaseURL: - type: string - dbSecret: - type: string - envConfigmap: - type: string - envSecret: - type: string - giteaEndpoint: - type: string - giteaSecret: - type: string - initContainerCommand: - items: - type: string - type: array - initContainerImage: - type: string - required: - - appVersion - - databaseHost - - databasePort - - databaseURL - - dbSecret - - envConfigmap - - envSecret - - giteaEndpoint - - giteaSecret - type: object env: items: description: EnvVar represents an environment variable present in @@ -525,6 +488,31 @@ spec: - port type: object type: object + externalDatabase: + properties: + database: + type: string + databaseSecret: + type: string + databaseSecretKey: + type: string + host: + type: string + port: + type: integer + sslMode: + type: string + user: + type: string + required: + - database + - databaseSecret + - databaseSecretKey + - host + - port + - sslMode + - user + type: object groupID: format: int64 type: integer @@ -744,6 +732,34 @@ spec: additionalProperties: type: string type: object + objectStore: + properties: + bucketName: + type: string + endpoint: + type: string + objectStoreSecret: + type: string + objectStoreSecretAccessKey: + type: string + objectStoreSecretAccessSecret: + type: string + region: + type: string + serveDirect: + type: boolean + ssl: + type: boolean + required: + - bucketName + - endpoint + - objectStoreSecret + - objectStoreSecretAccessKey + - objectStoreSecretAccessSecret + - region + - serveDirect + - ssl + type: object podAffinity: description: Pod affinity is a group of inter pod affinity scheduling rules. @@ -1100,6 +1116,30 @@ spec: type: array x-kubernetes-list-type: atomic type: object + pvc: + description: PersistentVolumeClaim defines the attributes of PVC used + as a source for caching NIM model + properties: + create: + description: Create indicates to create a new PVC + type: boolean + name: + description: Name is the name of the PVC + type: string + size: + description: Size of the NIM cache in Gi, used during PVC creation + type: string + storageClass: + description: StorageClass to be used for PVC creation. Leave it + as empty if the PVC is already created. + type: string + subPath: + type: string + volumeAccessMode: + description: VolumeAccessMode is the volume access mode of the + PVC + type: string + type: object readinessProbe: description: Probe defines attributes for startup/liveness/readiness probes @@ -1932,6 +1972,25 @@ spec: - maxReplicas type: object type: object + secrets: + properties: + datastoreConfigSecret: + type: string + datastoreInitSecret: + type: string + datastoreInlineConfigSecret: + type: string + giteaAdminSecret: + type: string + lfsJwtSecret: + type: string + required: + - datastoreConfigSecret + - datastoreInitSecret + - datastoreInlineConfigSecret + - giteaAdminSecret + - lfsJwtSecret + type: object startupProbe: description: Probe defines attributes for startup/liveness/readiness probes @@ -2136,7 +2195,9 @@ spec: type: integer required: - authSecret - - dataStoreParams + - externalDatabase + - objectStore + - secrets type: object status: description: NemoDatastoreStatus defines the observed state of NemoDatastore diff --git a/cmd/main.go b/cmd/main.go index bd84de2a..8b626558 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -188,36 +188,36 @@ func main() { os.Exit(1) } - if err = controller.NewNemoDatastoreReconciler( + if err = controller.NewNemoEvaluatorReconciler( mgr.GetClient(), mgr.GetScheme(), updater, render.NewRenderer("/manifests"), - ctrl.Log.WithName("controllers").WithName("NemoDatastore"), + ctrl.Log.WithName("controllers").WithName("NemoEvaluator"), ).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "NemoDatastore") + setupLog.Error(err, "unable to create controller", "controller", "NemoEvaluator") os.Exit(1) } - if err = controller.NewNemoEvaluatorReconciler( + if err = controller.NewNemoEntitystoreReconciler( mgr.GetClient(), mgr.GetScheme(), updater, render.NewRenderer("/manifests"), - ctrl.Log.WithName("controllers").WithName("NemoEvaluator"), + ctrl.Log.WithName("controllers").WithName("NemoEntitystore"), ).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "NemoEvaluator") + setupLog.Error(err, "unable to create controller", "controller", "NemoEntitystore") os.Exit(1) } - if err = controller.NewNemoEntitystoreReconciler( + if err = controller.NewNemoDatastoreReconciler( mgr.GetClient(), mgr.GetScheme(), updater, render.NewRenderer("/manifests"), - ctrl.Log.WithName("controllers").WithName("NemoEntitystore"), + ctrl.Log.WithName("controllers").WithName("NemoDatastore"), ).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "NemoEntitystore") + setupLog.Error(err, "unable to create controller", "controller", "NemoDatastore") os.Exit(1) } diff --git a/config/crd/bases/apps.nvidia.com_nemodatastores.yaml b/config/crd/bases/apps.nvidia.com_nemodatastores.yaml index ffdc4923..d28fc3a0 100644 --- a/config/crd/bases/apps.nvidia.com_nemodatastores.yaml +++ b/config/crd/bases/apps.nvidia.com_nemodatastores.yaml @@ -63,42 +63,62 @@ spec: items: type: string type: array - dataStoreParams: + databaseConfig: + description: ExternalDatabase contains external PostgreSQL configuration properties: - appVersion: - type: string - databaseHost: - type: string - databasePort: - type: string - databaseURL: - type: string - dbSecret: - type: string - envConfigmap: - type: string - envSecret: - type: string - giteaEndpoint: - type: string - giteaSecret: + credentials: + description: |- + DatabaseCredentials stores the configuration to retrieve the database credentials. + Required, must not be nil. + properties: + passwordKey: + default: password + description: |- + PasswordKey is the name of the key in the `CredentialsSecret` secret for the database credentials. + Defaults to "password". + type: string + secretName: + description: |- + SecretName is the name of the secret which has the database credentials for a NEMO service user. + Required, must not be empty. + minLength: 1 + type: string + user: + description: |- + User is the non-root username for a NEMO Service in the database. + Required, must not be empty. + minLength: 1 + type: string + required: + - secretName + - user + type: object + databaseName: + description: |- + DatabaseName is the database name for a NEMO Service. + Required, must not be empty. + minLength: 1 type: string - initContainerCommand: - items: - type: string - type: array - initContainerImage: + host: + description: |- + Host is the hostname of the database. + Required, must not be empty. + minLength: 1 type: string + port: + default: 5432 + description: |- + Port is the port where the database is reachable at. + If specified, this must be a valid port number, 0 < databasePort < 65536. + Defaults to 5432. + format: int32 + maximum: 65535 + minimum: 1 + type: integer required: - - appVersion - - databaseHost - - databasePort - - databaseURL - - dbSecret - - envConfigmap - - envSecret - - giteaEndpoint - - giteaSecret + - credentials + - databaseName + - host type: object env: items: @@ -744,6 +764,54 @@ spec: additionalProperties: type: string type: object + objectStoreConfig: + description: ObjectStore specifies the location and credentials for + accessing the external Object Storage + properties: + bucketName: + description: BucketName is the bucket where LFS files will be + stored + type: string + credentials: + description: ObjectStoreCredentials stores the configuration to + retrieve the object store credentials + properties: + passwordKey: + description: PasswordKey is the name of the key in the `CredentialsSecret` + secret for the object store credentials. + type: string + secretName: + description: SecretName is the name of the secret which has + the object credentials for a NEMO service user. + type: string + user: + description: User is the non-root username for a NEMO Service + in the object store. + type: string + required: + - passwordKey + - secretName + - user + type: object + endpoint: + description: endpoint is the fully qualidfied object store endpoint + type: string + region: + description: Region is the region where bucket is hosted + type: string + serveDirect: + default: true + type: boolean + ssl: + description: SSL enable ssl for object store transport + type: boolean + required: + - bucketName + - credentials + - endpoint + - region + - ssl + type: object podAffinity: description: Pod affinity is a group of inter pod affinity scheduling rules. @@ -1100,6 +1168,29 @@ spec: type: array x-kubernetes-list-type: atomic type: object + pvc: + description: PVC defines the PersistentVolumeClaim for the datastore + properties: + create: + description: Create indicates to create a new PVC + type: boolean + name: + description: Name is the name of the PVC + type: string + size: + description: Size of the NIM cache in Gi, used during PVC creation + type: string + storageClass: + description: StorageClass to be used for PVC creation. Leave it + as empty if the PVC is already created. + type: string + subPath: + type: string + volumeAccessMode: + description: VolumeAccessMode is the volume access mode of the + PVC + type: string + type: object readinessProbe: description: Probe defines attributes for startup/liveness/readiness probes @@ -1932,6 +2023,27 @@ spec: - maxReplicas type: object type: object + secrets: + description: secrets contains the pre-requisite secrets that must + be created before deploying the datastore CR + properties: + datastoreConfigSecret: + type: string + datastoreInitSecret: + type: string + datastoreInlineConfigSecret: + type: string + giteaAdminSecret: + type: string + lfsJwtSecret: + type: string + required: + - datastoreConfigSecret + - datastoreInitSecret + - datastoreInlineConfigSecret + - giteaAdminSecret + - lfsJwtSecret + type: object startupProbe: description: Probe defines attributes for startup/liveness/readiness probes @@ -2136,7 +2248,9 @@ spec: type: integer required: - authSecret - - dataStoreParams + - databaseConfig + - objectStoreConfig + - secrets type: object status: description: NemoDatastoreStatus defines the observed state of NemoDatastore diff --git a/config/samples/apps_v1alpha1_nimdatastore.yaml b/config/samples/apps_v1alpha1_nimdatastore.yaml new file mode 100644 index 00000000..01e2f34c --- /dev/null +++ b/config/samples/apps_v1alpha1_nimdatastore.yaml @@ -0,0 +1,77 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NemoDatastore +metadata: + name: nemodatastore +spec: + authSecret: ngc-image-pull-secret + secrets: + datastoreConfigSecret: "nemo-ms-nemo-datastore" + datastoreInitSecret: "nemo-ms-nemo-datastore-init" + datastoreInlineConfigSecret: "nemo-ms-nemo-datastore-inline-config" + giteaAdminSecret: "gitea-admin-credentials" + lfsJwtSecret: "nemo-ms-nemo-datastore--lfs-jwt" + objectStoreConfig: + credentials: + user: minioUser + secretName: nds-minio-existing-secret + passwordKey: objectStoreSecret + serveDirect: true + endpoint: minio.k8s-nim-operator-system.svc.cluster.local:9000 + bucketName: datastore-dev + region: object-store-region + ssl: false + databaseConfig: + credentials: + user: ndsuser + secretName: nds-pg-existing-secret + passwordKey: postgresPassword + host: nds-pg-postgresql + port: 5432 + databaseName: ndsdb + pvc: + name: "pvc-shared-data" + create: true + storageClass: "local-path" + volumeAccessMode: ReadWriteOnce + size: "10Gi" + expose: + service: + port: 3000 + type: ClusterIP + image: + repository: "nvcr.io/nvidian/nemo-llm/datastore" + tag: "25.01-rc8" + pullPolicy: IfNotPresent + replicas: 1 + resources: + requests: + memory: "256Mi" + cpu: "500m" + limits: + memory: "512Mi" + cpu: "1" + livenessProbe: + enabled: true + probe: + httpGet: + path: /v1/health + port: 3000 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 5 + readinessProbe: + enabled: true + probe: + httpGet: + path: /v1/health + port: 3000 + scheme: HTTP + initialDelaySeconds: 5 + timeoutSeconds: 3 + # metrics: + # enabled: true + # serviceMonitor: + # additionalLabels: + # app: sample-nemodatastore + # interval: "30s" + # scrapeTimeout: "10s" diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemodatastores.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemodatastores.yaml index ffdc4923..4668139e 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemodatastores.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemodatastores.yaml @@ -63,43 +63,6 @@ spec: items: type: string type: array - dataStoreParams: - properties: - appVersion: - type: string - databaseHost: - type: string - databasePort: - type: string - databaseURL: - type: string - dbSecret: - type: string - envConfigmap: - type: string - envSecret: - type: string - giteaEndpoint: - type: string - giteaSecret: - type: string - initContainerCommand: - items: - type: string - type: array - initContainerImage: - type: string - required: - - appVersion - - databaseHost - - databasePort - - databaseURL - - dbSecret - - envConfigmap - - envSecret - - giteaEndpoint - - giteaSecret - type: object env: items: description: EnvVar represents an environment variable present in @@ -525,6 +488,31 @@ spec: - port type: object type: object + externalDatabase: + properties: + database: + type: string + databaseSecret: + type: string + databaseSecretKey: + type: string + host: + type: string + port: + type: integer + sslMode: + type: string + user: + type: string + required: + - database + - databaseSecret + - databaseSecretKey + - host + - port + - sslMode + - user + type: object groupID: format: int64 type: integer @@ -744,6 +732,34 @@ spec: additionalProperties: type: string type: object + objectStore: + properties: + bucketName: + type: string + endpoint: + type: string + objectStoreSecret: + type: string + objectStoreSecretAccessKey: + type: string + objectStoreSecretAccessSecret: + type: string + region: + type: string + serveDirect: + type: boolean + ssl: + type: boolean + required: + - bucketName + - endpoint + - objectStoreSecret + - objectStoreSecretAccessKey + - objectStoreSecretAccessSecret + - region + - serveDirect + - ssl + type: object podAffinity: description: Pod affinity is a group of inter pod affinity scheduling rules. @@ -1100,6 +1116,30 @@ spec: type: array x-kubernetes-list-type: atomic type: object + pvc: + description: PersistentVolumeClaim defines the attributes of PVC used + as a source for caching NIM model + properties: + create: + description: Create indicates to create a new PVC + type: boolean + name: + description: Name is the name of the PVC + type: string + size: + description: Size of the NIM cache in Gi, used during PVC creation + type: string + storageClass: + description: StorageClass to be used for PVC creation. Leave it + as empty if the PVC is already created. + type: string + subPath: + type: string + volumeAccessMode: + description: VolumeAccessMode is the volume access mode of the + PVC + type: string + type: object readinessProbe: description: Probe defines attributes for startup/liveness/readiness probes @@ -1932,6 +1972,25 @@ spec: - maxReplicas type: object type: object + secrets: + properties: + datastoreConfigSecret: + type: string + datastoreInitSecret: + type: string + datastoreInlineConfigSecret: + type: string + giteaAdminSecret: + type: string + lfsJwtSecret: + type: string + required: + - datastoreConfigSecret + - datastoreInitSecret + - datastoreInlineConfigSecret + - giteaAdminSecret + - lfsJwtSecret + type: object startupProbe: description: Probe defines attributes for startup/liveness/readiness probes @@ -2136,7 +2195,9 @@ spec: type: integer required: - authSecret - - dataStoreParams + - externalDatabase + - objectStore + - secrets type: object status: description: NemoDatastoreStatus defines the observed state of NemoDatastore diff --git a/internal/controller/nemo_datastore_controller.go b/internal/controller/nemo_datastore_controller.go index 74567ea1..11f79c2b 100644 --- a/internal/controller/nemo_datastore_controller.go +++ b/internal/controller/nemo_datastore_controller.go @@ -41,6 +41,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/rest" "k8s.io/client-go/tools/record" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -274,6 +275,13 @@ func (r *NemoDatastoreReconciler) reconcileNemoDatastore(ctx context.Context, ne "NemoDatastore %s failed, msg: %s", nemoDatastore.Name, err.Error()) } }() + + err = r.reconcilePVC(ctx, nemoDatastore) + if err != nil { + logger.Error(err, "reconciliation of pvc failed", "pvc", nemoDatastore.GetPVCName()) + return ctrl.Result{}, err + } + // Generate annotation for the current operator-version and apply to all resources // Get generic name for all resources namespacedName := types.NamespacedName{Name: nemoDatastore.GetName(), Namespace: nemoDatastore.GetNamespace()} @@ -371,9 +379,12 @@ func (r *NemoDatastoreReconciler) reconcileNemoDatastore(ctx context.Context, ne if len(initContainers) > 0 { result.Spec.Template.Spec.InitContainers = initContainers } - envFrom := nemoDatastore.GetEnvFrom() - if len(envFrom) > 0 { - result.Spec.Template.Spec.Containers[0].EnvFrom = envFrom + fsGroup := ptr.To[int64](1000) + if nemoDatastore.Spec.GroupID != nil { + fsGroup = nemoDatastore.Spec.GroupID + } + result.Spec.Template.Spec.SecurityContext = &corev1.PodSecurityContext{ + FSGroup: fsGroup, } return result, nil }, "deployment", conditions.ReasonDeploymentFailed) @@ -407,6 +418,41 @@ func (r *NemoDatastoreReconciler) reconcileNemoDatastore(ctx context.Context, ne return ctrl.Result{}, nil } +func (r *NemoDatastoreReconciler) reconcilePVC(ctx context.Context, nemoDatastore *appsv1alpha1.NemoDatastore) error { + logger := r.GetLogger() + pvcName := nemoDatastore.GetPVCName() + pvcNamespacedName := types.NamespacedName{Name: pvcName, Namespace: nemoDatastore.GetNamespace()} + pvc := &corev1.PersistentVolumeClaim{} + err := r.Get(ctx, pvcNamespacedName, pvc) + if err != nil && client.IgnoreNotFound(err) != nil { + return err + } + + // If PVC does not exist, create a new one if creation flag is enabled + if err != nil { + if nemoDatastore.ShouldCreatePersistentStorage() { + pvc, err = shared.ConstructPVC(*nemoDatastore.Spec.PVC, metav1.ObjectMeta{Name: pvcName, Namespace: nemoDatastore.GetNamespace()}) + if err != nil { + logger.Error(err, "Failed to construct pvc", "name", pvcName) + return err + } + if err := controllerutil.SetControllerReference(nemoDatastore, pvc, r.GetScheme()); err != nil { + return err + } + err = r.Create(ctx, pvc) + if err != nil { + logger.Error(err, "Failed to create pvc", "name", pvcName) + return err + } + logger.Info("Created PVC for NeMo Datastore", "pvc", pvc.Name) + } else { + logger.Error(err, "PVC doesn't exist and auto-creation is not enabled", "name", pvcNamespacedName) + return err + } + } + return nil +} + func (r *NemoDatastoreReconciler) renderAndSyncResource(ctx context.Context, NemoDatastore client.Object, renderer *render.Renderer, obj client.Object, renderFunc func() (client.Object, error), conditionType string, reason string) error { logger := log.FromContext(ctx) diff --git a/manifests/deployment.yaml b/manifests/deployment.yaml index cd22d174..da2c28d5 100644 --- a/manifests/deployment.yaml +++ b/manifests/deployment.yaml @@ -78,6 +78,11 @@ spec: configMap: name: {{ .ConfigMap.Name }} {{- end }} + {{- if .Secret }} + secret: + secretName: {{ .Secret.SecretName }} + defaultMode: {{ .Secret.DefaultMode }} + {{- end }} {{- if .EmptyDir }} emptyDir: medium: {{ .EmptyDir.Medium }}