Skip to content

Commit 3bd0f2f

Browse files
committed
Fix customizer RBAC for run.ai resources
Signed-off-by: Shiva Krishna, Merla <[email protected]>
1 parent 3019702 commit 3bd0f2f

File tree

5 files changed

+67
-3
lines changed

5 files changed

+67
-3
lines changed

api/apps/v1alpha1/nemo_customizer_types.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,11 @@ func (n *NemoCustomizer) GetRoleParams() *rendertypes.RoleParams {
866866
Resources: []string{"pods", "persistentvolumeclaims", "services", "configmaps"},
867867
Verbs: []string{"create", "get", "list", "watch", "delete"},
868868
},
869+
{
870+
APIGroups: []string{""},
871+
Resources: []string{"events"},
872+
Verbs: []string{"create", "get", "list", "watch"},
873+
},
869874
{
870875
APIGroups: []string{"nvidia.com"},
871876
Resources: []string{"nemotrainingjobs", "nemotrainingjobs/status", "nemoentityhandlers"},
@@ -896,8 +901,18 @@ func (n *NemoCustomizer) GetRoleParams() *rendertypes.RoleParams {
896901
},
897902
}
898903

904+
runAIRules := []rbacv1.PolicyRule{
905+
{
906+
APIGroups: []string{"run.ai"},
907+
Resources: []string{"trainingworkloads", "runaijobs"},
908+
Verbs: []string{"create", "get", "list", "watch", "update", "delete", "patch"},
909+
},
910+
}
911+
899912
if n.Spec.Scheduler.Type == SchedulerTypeVolcano {
900913
params.Rules = append(params.Rules, volcanoRules...)
914+
} else if n.Spec.Scheduler.Type == SchedulerTypeRunAI {
915+
params.Rules = append(params.Rules, runAIRules...)
901916
}
902917

903918
return params

bundle/manifests/k8s-nim-operator.clusterserviceversion.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -744,9 +744,12 @@ spec:
744744
resources:
745745
- events
746746
verbs:
747+
- get
747748
- create
748749
- patch
749750
- update
751+
- list
752+
- watch
750753
clusterPermissions:
751754
- serviceAccountName: k8s-nim-operator
752755
rules:
@@ -763,9 +766,12 @@ spec:
763766
resources:
764767
- events
765768
verbs:
769+
- get
766770
- create
767771
- patch
768772
- update
773+
- list
774+
- watch
769775
- apiGroups:
770776
- ''
771777
resources:
@@ -1206,6 +1212,18 @@ spec:
12061212
- get
12071213
- list
12081214
- watch
1215+
- apiGroups:
1216+
- run.ai
1217+
resources:
1218+
- trainingworkloads
1219+
- runaijobs
1220+
verbs:
1221+
- get
1222+
- list
1223+
- watch
1224+
- delete
1225+
- patch
1226+
- update
12091227
- apiGroups:
12101228
- nvidia.com
12111229
resources:

config/rbac/role.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,11 @@ rules:
3030
- events
3131
verbs:
3232
- create
33+
- get
34+
- list
3335
- patch
3436
- update
37+
- watch
3538
- apiGroups:
3639
- ""
3740
resources:
@@ -221,6 +224,19 @@ rules:
221224
- patch
222225
- update
223226
- watch
227+
- apiGroups:
228+
- run.ai
229+
resources:
230+
- runaijobs
231+
- trainingworkloads
232+
verbs:
233+
- create
234+
- delete
235+
- get
236+
- list
237+
- patch
238+
- update
239+
- watch
224240
- apiGroups:
225241
- scheduling.incubator.k8s.io
226242
- scheduling.volcano.sh

deployments/helm/k8s-nim-operator/templates/manager-rbac.yaml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ rules:
3232
resources:
3333
- events
3434
verbs:
35+
- get
36+
- list
3537
- create
3638
- update
3739
- patch
@@ -486,7 +488,18 @@ rules:
486488
- get
487489
- list
488490
- watch
489-
491+
- apiGroups:
492+
- run.ai
493+
resources:
494+
- trainingworkloads
495+
- runaijobs
496+
verbs:
497+
- get
498+
- list
499+
- watch
500+
- delete
501+
- patch
502+
- update
490503

491504
---
492505
apiVersion: rbac.authorization.k8s.io/v1

internal/controller/nemocustomizer_controller.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ func NewNemoCustomizerReconciler(client client.Client, scheme *runtime.Scheme, u
9090
// +kubebuilder:rbac:groups=apps.nvidia.com,resources=nemocustomizers,verbs=get;list;watch;create;update;patch;delete
9191
// +kubebuilder:rbac:groups=apps.nvidia.com,resources=nemocustomizers/status,verbs=get;update;patch
9292
// +kubebuilder:rbac:groups=apps.nvidia.com,resources=nemocustomizers/finalizers,verbs=update
93+
// +kubebuilder:rbac:groups=run.ai,resources=trainingworkloads;runaijobs,verbs=get;list;watch;create;update;patch;delete
94+
// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create
9395
// +kubebuilder:rbac:groups=nvidia.com,resources=nemotrainingjobs;nemotrainingjobs/status;nemoentityhandlers,verbs=create;get;list;watch;update;delete;patch
9496
// +kubebuilder:rbac:groups=batch.volcano.sh,resources=jobs;jobs/status,verbs=get;list;watch
9597
// +kubebuilder:rbac:groups=nodeinfo.volcano.sh,resources=numatopologies,verbs=get;list;watch
@@ -644,10 +646,10 @@ func (r *NemoCustomizerReconciler) addTrainingConfig(ctx context.Context, cfg ma
644646
trainingCfg["env"] = n.Spec.Training.Env
645647
trainingCfg["training_networking"] = n.Spec.Training.NetworkConfig
646648
trainingCfg["workspace_dir"] = n.Spec.Training.WorkspacePVC.MountPath
647-
trainingCfg["use_run_ai_executor"] = "false"
649+
trainingCfg["use_run_ai_executor"] = false
648650

649651
if n.Spec.Scheduler.Type == appsv1alpha1.SchedulerTypeRunAI {
650-
trainingCfg["use_run_ai_executor"] = "true"
652+
trainingCfg["use_run_ai_executor"] = true
651653
}
652654
if n.Spec.Training.TTLSecondsAfterFinished != nil {
653655
trainingCfg["ttl_seconds_after_finished"] = *n.Spec.Training.TTLSecondsAfterFinished

0 commit comments

Comments
 (0)