@@ -106,10 +106,9 @@ func (r *InstaSliceDaemonsetReconciler) Reconcile(ctx context.Context, req ctrl.
106
106
}
107
107
108
108
if pod .Labels ["processedbydeamonset" ] == "true" && ! pod .DeletionTimestamp .IsZero () {
109
- fmt .Printf ("Deleted pod %v" , pod .Name )
110
109
111
- // Path to the file containing the node name
112
- // Iterate over the allocations and delete the specific one
110
+ logger . Info ( "Performing cleanup " , "pod" , pod . Name )
111
+
113
112
r .cleanUp (ctx , pod , logger )
114
113
}
115
114
@@ -134,7 +133,7 @@ func (r *InstaSliceDaemonsetReconciler) Reconcile(ctx context.Context, req ctrl.
134
133
}
135
134
136
135
if boolDecisionToCreateSlice && boolcontrollerProcessingDone && pod .Status .Phase != v1 .PodSucceeded {
137
- //Assume pod only has one container with one GPU requests
136
+ //Assume pod only has one container with one GPU request
138
137
var profileName string
139
138
var Giprofileid int
140
139
var Ciprofileid int
@@ -143,6 +142,8 @@ func (r *InstaSliceDaemonsetReconciler) Reconcile(ctx context.Context, req ctrl.
143
142
var migUUID string
144
143
var deviceForMig string
145
144
var instasliceList inferencev1.InstasliceList
145
+ var giId uint32
146
+ var ciId uint32
146
147
ret := nvml .Init ()
147
148
if ret != nvml .SUCCESS {
148
149
fmt .Printf ("Unable to initialize NVML: %v \n " , nvml .ErrorString (ret ))
@@ -155,8 +156,6 @@ func (r *InstaSliceDaemonsetReconciler) Reconcile(ctx context.Context, req ctrl.
155
156
156
157
deviceForMig , profileName , Giprofileid , Ciprofileid , CiEngProfileid = r .getAllocation (ctx , instasliceList , deviceForMig , profileName , Giprofileid , Ciprofileid , CiEngProfileid )
157
158
placement := nvml.GpuInstancePlacement {}
158
- var giId uint32
159
- var ciId uint32
160
159
for i := 0 ; i < availableGpus ; i ++ {
161
160
device , ret := nvml .DeviceGetHandleByIndex (i )
162
161
if ret != nvml .SUCCESS {
@@ -185,14 +184,12 @@ func (r *InstaSliceDaemonsetReconciler) Reconcile(ctx context.Context, req ctrl.
185
184
fmt .Printf ("error getting GPU device handle: %v \n " , ret )
186
185
}
187
186
188
- //TODO: Remove hardcoding of profile name
189
- fmt .Printf ("obtained Giprofile is %v\n " , Giprofileid )
190
187
giProfileInfo , retCodeForGi := device .GetGpuInstanceProfileInfo (Giprofileid )
191
188
if retCodeForGi != nvml .SUCCESS {
192
- fmt . Printf ( "error getting GPU instance profile info for '%v': %v \n " , giProfileInfo , retCodeForGi )
189
+ logger . Error ( err , "error getting GPU instance profile info" , "giProfileInfo" , giProfileInfo , "retCodeForGi" , retCodeForGi )
193
190
}
194
191
195
- fmt . Printf ("The profile id is %v with memory size %v \n " , giProfileInfo .Id , giProfileInfo .MemorySizeMB )
192
+ logger . Info ("The profile id is" , "giProfileInfo" , giProfileInfo .Id , "Memory" , giProfileInfo .MemorySizeMB )
196
193
197
194
// Path to the file containing the node name
198
195
updatedPlacement := r .getAllocationsToprepare (ctx , placement )
@@ -241,8 +238,7 @@ func (r *InstaSliceDaemonsetReconciler) Reconcile(ctx context.Context, req ctrl.
241
238
existingAllocations , updatedAllocation := r .updateAllocationProcessing (instaslice , deviceUUID , profileName )
242
239
r .createPreparedEntry (profileName , placement , deviceUUID , pod , giId , ciId , instaslice , migUUID , updatedAllocation )
243
240
244
- fmt .Printf ("creating config map" )
245
- createConfigMap (context .TODO (), r .Client , migUUID , existingAllocations .Namespace , existingAllocations .PodName )
241
+ createConfigMap (context .TODO (), r .Client , migUUID , existingAllocations .Namespace , existingAllocations .PodName , logger )
246
242
247
243
podUpdate := r .labelsForDaemonset (pod )
248
244
// Retry update operation with backoff
@@ -667,13 +663,11 @@ func (*InstaSliceDaemonsetReconciler) discoverDanglingSlices(instaslice *inferen
667
663
668
664
for _ , mig := range migs {
669
665
migUUID , _ := mig .GetUUID ()
670
- fmt .Printf ("The mig UUID obtained is %v" , migUUID )
671
-
672
666
profile , errForProfile := mig .GetProfile ()
673
667
if errForProfile != nil {
674
668
fmt .Printf ("error getting profile in mig loop: %v" , errForProfile )
675
669
}
676
- fmt . Printf ( "The profile is %v \n " , profile . GetInfo ())
670
+
677
671
giID , ret := mig .GetGpuInstanceId ()
678
672
if ret != nvml .SUCCESS {
679
673
fmt .Printf ("error getting GPU instance ID for MIG device: %v" , ret )
@@ -686,8 +680,7 @@ func (*InstaSliceDaemonsetReconciler) discoverDanglingSlices(instaslice *inferen
686
680
if err2 != nvml .SUCCESS {
687
681
fmt .Printf ("err2 %v\n " , err2 )
688
682
}
689
- fmt .Printf ("The instance info size %v and start %v\n " , gpuInstanceInfo .Placement .Size , gpuInstanceInfo .Placement .Start )
690
- fmt .Printf ("The GPU inst info id is %v\n " , gpuInstanceInfo .Id )
683
+
691
684
ciID , ret := mig .GetComputeInstanceId ()
692
685
if ret != nvml .SUCCESS {
693
686
fmt .Printf ("error getting Compute instance ID for MIG device: %v" , ret )
@@ -700,7 +693,6 @@ func (*InstaSliceDaemonsetReconciler) discoverDanglingSlices(instaslice *inferen
700
693
if ret != nvml .SUCCESS {
701
694
fmt .Printf ("error getting Compute instance info for '%v': %v" , ciID , ret )
702
695
}
703
- fmt .Printf ("The compute instance id is %v\n " , ciInfo .Id )
704
696
prepared := inferencev1.PreparedDetails {
705
697
Profile : profile .GetInfo ().String (),
706
698
Start : gpuInstanceInfo .Placement .Start ,
@@ -763,7 +755,7 @@ func (m MigProfile) Attributes() []string {
763
755
}
764
756
765
757
// Create configmap which is used by Pods to consume MIG device
766
- func createConfigMap (ctx context.Context , k8sClient client.Client , migGPUUUID string , namespace string , podName string ) error {
758
+ func createConfigMap (ctx context.Context , k8sClient client.Client , migGPUUUID string , namespace string , podName string , logger logr. Logger ) error {
767
759
configMap := & v1.ConfigMap {
768
760
ObjectMeta : metav1.ObjectMeta {
769
761
Name : podName ,
@@ -780,8 +772,7 @@ func createConfigMap(ctx context.Context, k8sClient client.Client, migGPUUUID st
780
772
log .FromContext (ctx ).Error (err , "Failed to create ConfigMap" )
781
773
return err
782
774
}
783
- fmt .Printf ("ConfigMap created successfully %v" , configMap .Name )
784
- //log.FromContext(ctx).Info("ConfigMap created successfully", "ConfigMap.Name", configMap.Name)
775
+ logger .Info ("ConfigMap created successfully" , "ConfigMap.Name" , configMap .Name )
785
776
return nil
786
777
}
787
778
0 commit comments