@@ -18,6 +18,7 @@ package controller
18
18
19
19
import (
20
20
"context"
21
+ "fmt"
21
22
"regexp"
22
23
"strings"
23
24
@@ -43,7 +44,7 @@ type InstasliceReconciler struct {
43
44
44
45
// AllocationPolicy interface with a single method
45
46
type AllocationPolicy interface {
46
- SetAllocationDetails (profileName string , newStart , size uint32 , podUUID string , nodename string , processed string , discoveredGiprofile int , Ciprofileid int , Ciengprofileid int , namespace string , podName string ) * inferencev1alpha1.AllocationDetails
47
+ SetAllocationDetails (profileName string , newStart , size uint32 , podUUID string , nodename string , processed string , discoveredGiprofile int , Ciprofileid int , Ciengprofileid int , namespace string , podName string , gpuUuid string ) * inferencev1alpha1.AllocationDetails
47
48
}
48
49
49
50
type RightToLeftPolicy struct {}
@@ -133,31 +134,29 @@ func (r *InstasliceReconciler) Reconcile(ctx context.Context, req ctrl.Request)
133
134
134
135
func (r * InstasliceReconciler ) findDeviceForASlice (ctx context.Context , instaslice inferencev1alpha1.Instaslice , gpuUUID string , profileName string , policy AllocationPolicy , pod * v1.Pod , logger logr.Logger ) (string , bool , reconcile.Result , error ) {
135
136
//TODO: discover this value, this may work for A100 and H100 for now.
136
- largestIndex := uint32 (7 )
137
137
for gpuuuid , _ := range instaslice .Spec .MigGPUUUID {
138
138
gpuUUID = gpuuuid
139
139
if instaslice .Spec .Allocations == nil {
140
140
instaslice .Spec .Allocations = make (map [string ]inferencev1alpha1.AllocationDetails )
141
141
}
142
- maxStart := r .extractMaxStart (instaslice , gpuUUID )
143
- size , discoveredGiprofile , Ciprofileid , Ciengprofileid := r .extractGpuProfile (instaslice , profileName )
144
- if maxStart + uint32 (size ) <= largestIndex {
145
- logger .Info ("Device where the slice will be placed" , "DeviceUUID" , gpuUUID )
142
+ newStart := r .getStartIndexFromPreparedState (instaslice , gpuUUID , profileName )
143
+ notValidIndex := uint32 (8 )
144
+ if newStart == notValidIndex {
145
+ //Move to next GPU
146
+ continue
146
147
}
147
-
148
- newStart := maxStart - uint32 (size )
149
- logger .Info ("The placement is " , "index" , newStart )
148
+ size , discoveredGiprofile , Ciprofileid , Ciengprofileid := r .extractGpuProfile (instaslice , profileName )
150
149
allocDetails := policy .SetAllocationDetails (profileName , uint32 (newStart ), uint32 (size ),
151
150
string (pod .UID ), instaslice .Name , "no" , discoveredGiprofile ,
152
- Ciprofileid , Ciengprofileid , pod .Namespace , pod .Name )
153
- instaslice .Spec .Allocations [gpuUUID ] = * allocDetails
151
+ Ciprofileid , Ciengprofileid , pod .Namespace , pod .Name , gpuUUID )
152
+ instaslice .Spec .Allocations [string ( pod . UID ) ] = * allocDetails
154
153
if err := r .Update (ctx , & instaslice ); err != nil {
155
154
logger .Error (err , "Error updating instaslice allocations" )
156
155
return "" , true , ctrl.Result {}, err
157
156
}
158
157
return gpuUUID , false , reconcile.Result {}, nil
159
158
}
160
- return gpuUUID , false , reconcile.Result {}, nil
159
+ return "" , false , reconcile.Result {}, fmt . Errorf ( "No valid GPU found that can fit slice" )
161
160
}
162
161
163
162
// Extract profile name from the container limits spec
@@ -198,23 +197,76 @@ func (*InstasliceReconciler) extractGpuProfile(instaslice inferencev1alpha1.Inst
198
197
return size , discoveredGiprofile , Ciprofileid , Ciengprofileid
199
198
}
200
199
201
- // Walk through all the allocated devices and get the max position where the slice could be allocated.
202
- // the implementation is specific to first fit and this is needed until we get new strategy implemented
203
- // in GPU operator.
204
- func (* InstasliceReconciler ) extractMaxStart (instaslice inferencev1alpha1.Instaslice , gpuUUID string ) uint32 {
205
- var maxSize uint32 = 0
206
- var maxStart uint32 = 0
200
+ func (* InstasliceReconciler ) getStartIndexFromPreparedState (instaslice inferencev1alpha1.Instaslice , gpuUUID string , profileName string ) uint32 {
201
+ //TODO: generalize, A100 and H100 have 7 profiles so it hardwired for now
202
+ var gpuAllocatedIndex [7 ]uint32
203
+ // Explicitly set the array to all zeros
204
+ for i := range gpuAllocatedIndex {
205
+ gpuAllocatedIndex [i ] = 0
206
+ }
207
207
for _ , item := range instaslice .Spec .Prepared {
208
208
if item .Parent == gpuUUID {
209
- if maxSize < item .Size {
210
- maxSize = item . Size
209
+ for i := 0 ; i < int ( item .Size ); i ++ {
210
+ gpuAllocatedIndex [ int ( item . Start ) + i ] = 1
211
211
}
212
- if maxStart < item .Start {
213
- maxStart = item .Start
212
+
213
+ }
214
+ }
215
+
216
+ var neededContinousSlot int
217
+ var possiblePlacements []int
218
+ for _ , placement := range instaslice .Spec .Migplacement {
219
+ if placement .Profile == profileName {
220
+ neededContinousSlot = placement .Placements [0 ].Size
221
+ for _ , placement := range placement .Placements {
222
+ possiblePlacements = append (possiblePlacements , placement .Start )
214
223
}
224
+ break
215
225
}
216
226
}
217
- return maxStart
227
+ //TODO: generalize, no slices can be placed on 8th index
228
+ //if we return 8 then assume no valid index is found.
229
+ var newStart = uint32 (8 )
230
+ for _ , value := range possiblePlacements {
231
+ if gpuAllocatedIndex [value ] == 0 {
232
+ if neededContinousSlot == 1 {
233
+ newStart = uint32 (value )
234
+ break
235
+ }
236
+ if neededContinousSlot == 2 {
237
+ if value + neededContinousSlot < len (gpuAllocatedIndex ) {
238
+ if gpuAllocatedIndex [value ] == 0 && gpuAllocatedIndex [value + 1 ] == 0 {
239
+ newStart = uint32 (value )
240
+ break
241
+ }
242
+ }
243
+
244
+ }
245
+ if neededContinousSlot == 4 {
246
+ if value + neededContinousSlot < len (gpuAllocatedIndex ) {
247
+ if gpuAllocatedIndex [value ] == 0 && gpuAllocatedIndex [value + 1 ] == 0 && gpuAllocatedIndex [value + 2 ] == 0 && gpuAllocatedIndex [value + 3 ] == 0 {
248
+ newStart = uint32 (value )
249
+ break
250
+ }
251
+ }
252
+ }
253
+
254
+ if neededContinousSlot == 8 {
255
+ //special case
256
+ if value + neededContinousSlot - 1 < len (gpuAllocatedIndex ) {
257
+ if gpuAllocatedIndex [value ] == 0 && gpuAllocatedIndex [value + 1 ] == 0 &&
258
+ gpuAllocatedIndex [value + 2 ] == 0 && gpuAllocatedIndex [value + 3 ] == 0 &&
259
+ gpuAllocatedIndex [value + 4 ] == 0 && gpuAllocatedIndex [value + 5 ] == 0 &&
260
+ gpuAllocatedIndex [value + 6 ] == 0 && gpuAllocatedIndex [value + 7 ] == 0 {
261
+ newStart = uint32 (value )
262
+ }
263
+ }
264
+ }
265
+ }
266
+
267
+ }
268
+
269
+ return newStart
218
270
}
219
271
220
272
// Since we dont have user facing CRD, we make our way with attaching labels to the pods to indicate processing status.
@@ -274,7 +326,7 @@ func (r *InstasliceReconciler) unGatePod(ctx context.Context, podName string, re
274
326
// Policy based allocation - FirstFit
275
327
func (r * FirstFitPolicy ) SetAllocationDetails (profileName string , newStart , size uint32 , podUUID , nodename string ,
276
328
processed string , discoveredGiprofile int , Ciprofileid int , Ciengprofileid int ,
277
- namespace string , podName string ) * inferencev1alpha1.AllocationDetails {
329
+ namespace string , podName string , gpuUuid string ) * inferencev1alpha1.AllocationDetails {
278
330
return & inferencev1alpha1.AllocationDetails {
279
331
Profile : profileName ,
280
332
Start : uint32 (newStart ),
@@ -287,21 +339,22 @@ func (r *FirstFitPolicy) SetAllocationDetails(profileName string, newStart, size
287
339
CIEngProfileID : Ciengprofileid ,
288
340
Namespace : namespace ,
289
341
PodName : podName ,
342
+ GPUUUID : gpuUuid ,
290
343
}
291
344
}
292
345
293
346
// Policy based allocation - LeftToRIght
294
347
func (l * LeftToRightPolicy ) SetAllocationDetails (profileName string , newStart , size uint32 , podUUID , nodename string ,
295
348
processed string , discoveredGiprofile int , Ciprofileid int , Ciengprofileid int ,
296
- namespace string , podName string ) * inferencev1alpha1.AllocationDetails {
349
+ namespace string , podName string , gpuUuid string ) * inferencev1alpha1.AllocationDetails {
297
350
// Implement the left-to-right policy here
298
351
return & inferencev1alpha1.AllocationDetails {}
299
352
}
300
353
301
354
// Policy based allocation - RigghToLeft
302
355
func (l * RightToLeftPolicy ) SetAllocationDetails (profileName string , newStart , size uint32 , podUUID , nodename string ,
303
356
processed string , discoveredGiprofile int , Ciprofileid int , Ciengprofileid int ,
304
- namespace string , podName string ) * inferencev1alpha1.AllocationDetails {
357
+ namespace string , podName string , gpuUuid string ) * inferencev1alpha1.AllocationDetails {
305
358
// Implement the left-to-right policy here
306
359
return & inferencev1alpha1.AllocationDetails {}
307
360
}
0 commit comments