Skip to content

Commit

Permalink
Error out when no profile selection is made and no GPUs are assigned …
Browse files Browse the repository at this point in the history
…for auto-selection (#63)

Signed-off-by: Shiva Krishna, Merla <[email protected]>
  • Loading branch information
shivamerla authored Aug 16, 2024
1 parent cf948f5 commit 5e3a9b1
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
10 changes: 7 additions & 3 deletions internal/controller/nimcache_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -608,15 +608,15 @@ func (r *NIMCacheReconciler) reconcileJob(ctx context.Context, nimCache *appsv1a
if err != nil && nimCache.Status.State != appsv1alpha1.NimCacheStatusReady {
job, err := constructJob(nimCache)
if err != nil {
logger.Error(err, "Failed to construct job", "name", getPvcName(nimCache, nimCache.Spec.Storage.PVC))
logger.Error(err, "Failed to construct job")
return err
}
if err := controllerutil.SetControllerReference(nimCache, job, r.GetScheme()); err != nil {
return err
}
err = r.Create(ctx, job)
if err != nil {
logger.Error(err, "Failed to create job", "name", getPvcName(nimCache, nimCache.Spec.Storage.PVC))
logger.Error(err, "Failed to create job")
return err
}
logger.Info("Created Job for NIM Cache", "job", jobName)
Expand Down Expand Up @@ -888,7 +888,7 @@ func constructJob(nimCache *appsv1alpha1.NIMCache) (*batchv1.Job, error) {
}

annotations := map[string]string{
"openshift.io/scc": "anyuid",
"openshift.io/scc": "nonroot",
"sidecar.istio.io/inject": "false",
}

Expand Down Expand Up @@ -1038,6 +1038,10 @@ func constructJob(nimCache *appsv1alpha1.NIMCache) (*batchv1.Job, error) {
return nil, err
}

if len(selectedProfiles) == 0 && nimCache.Spec.Resources.GPUs == 0 {
return nil, fmt.Errorf("No profiles are selected for caching and no GPUs are assigned to the pod for auto-selection")
}

if len(selectedProfiles) > 0 {
if utils.ContainsElement(selectedProfiles, AllProfiles) {
job.Spec.Template.Spec.Containers[0].Args = []string{"--all"}
Expand Down
18 changes: 11 additions & 7 deletions internal/controller/nimcache_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,9 @@ var _ = Describe("NIMCache Controller", func() {
Namespace: "default",
},
Spec: appsv1alpha1.NIMCacheSpec{
Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "test-container", PullSecret: "my-secret"}},
Storage: appsv1alpha1.Storage{PVC: appsv1alpha1.PersistentVolumeClaim{Create: ptr.To[bool](true), StorageClass: "standard", Size: "1Gi"}},
Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "test-container", PullSecret: "my-secret"}},
Storage: appsv1alpha1.Storage{PVC: appsv1alpha1.PersistentVolumeClaim{Create: ptr.To[bool](true), StorageClass: "standard", Size: "1Gi"}},
Resources: appsv1alpha1.Resources{GPUs: 1},
},
Status: appsv1alpha1.NIMCacheStatus{
State: appsv1alpha1.NimCacheStatusNotReady,
Expand Down Expand Up @@ -145,8 +146,9 @@ var _ = Describe("NIMCache Controller", func() {
Namespace: "default",
},
Spec: appsv1alpha1.NIMCacheSpec{
Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "test-container", PullSecret: "my-secret"}},
Storage: appsv1alpha1.Storage{PVC: appsv1alpha1.PersistentVolumeClaim{Create: ptr.To[bool](true), StorageClass: "standard", Size: "1Gi"}},
Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "test-container", PullSecret: "my-secret"}},
Storage: appsv1alpha1.Storage{PVC: appsv1alpha1.PersistentVolumeClaim{Create: ptr.To[bool](true), StorageClass: "standard", Size: "1Gi"}},
Resources: appsv1alpha1.Resources{GPUs: 1},
},
Status: appsv1alpha1.NIMCacheStatus{
State: "Initializing",
Expand Down Expand Up @@ -194,8 +196,9 @@ var _ = Describe("NIMCache Controller", func() {
Finalizers: []string{NIMCacheFinalizer},
},
Spec: appsv1alpha1.NIMCacheSpec{
Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "test-container", PullSecret: "my-secret"}},
Storage: appsv1alpha1.Storage{PVC: appsv1alpha1.PersistentVolumeClaim{Create: ptr.To[bool](true), StorageClass: "standard", Size: "1Gi"}},
Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "test-container", PullSecret: "my-secret"}},
Storage: appsv1alpha1.Storage{PVC: appsv1alpha1.PersistentVolumeClaim{Create: ptr.To[bool](true), StorageClass: "standard", Size: "1Gi"}},
Resources: appsv1alpha1.Resources{GPUs: 1},
},
Status: appsv1alpha1.NIMCacheStatus{
State: "Initializing",
Expand Down Expand Up @@ -433,7 +436,8 @@ var _ = Describe("NIMCache Controller", func() {
Namespace: "default",
},
Spec: appsv1alpha1.NIMCacheSpec{
Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "nvcr.io/nim:test", PullSecret: "my-secret"}},
Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "nvcr.io/nim:test", PullSecret: "my-secret"}},
Resources: appsv1alpha1.Resources{GPUs: 1},
},
}

Expand Down

0 comments on commit 5e3a9b1

Please sign in to comment.