Skip to content

Commit 5c086c8

Browse files
authored
Merge pull request #2 from project-codeflare/patch_firstfit
fix firstfit allocation
2 parents 313d89e + 6bced1b commit 5c086c8

File tree

4 files changed

+97
-38
lines changed

4 files changed

+97
-38
lines changed

api/v1alpha1/instaslice_types.go

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ type AllocationDetails struct {
3939
Start uint32 `json:"start"`
4040
Size uint32 `json:"size"`
4141
PodUUID string `json:"podUUID"`
42+
GPUUUID string `json:"gpuUUID"`
4243
Nodename string `json:"nodename"`
4344
Processed string `json:"processed"`
4445
Giprofileid int `json:"giprofileid"`

config/crd/bases/inference.codeflare.dev_instaslices.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ spec:
5353
type: integer
5454
giprofileid:
5555
type: integer
56+
gpuUUID:
57+
type: string
5658
namespace:
5759
type: string
5860
nodename:
@@ -75,6 +77,7 @@ spec:
7577
- ciProfileid
7678
- ciengprofileid
7779
- giprofileid
80+
- gpuUUID
7881
- namespace
7982
- nodename
8083
- podName

internal/controller/instaslice_controller.go

+79-26
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package controller
1818

1919
import (
2020
"context"
21+
"fmt"
2122
"regexp"
2223
"strings"
2324

@@ -43,7 +44,7 @@ type InstasliceReconciler struct {
4344

4445
// AllocationPolicy interface with a single method
4546
type AllocationPolicy interface {
46-
SetAllocationDetails(profileName string, newStart, size uint32, podUUID string, nodename string, processed string, discoveredGiprofile int, Ciprofileid int, Ciengprofileid int, namespace string, podName string) *inferencev1alpha1.AllocationDetails
47+
SetAllocationDetails(profileName string, newStart, size uint32, podUUID string, nodename string, processed string, discoveredGiprofile int, Ciprofileid int, Ciengprofileid int, namespace string, podName string, gpuUuid string) *inferencev1alpha1.AllocationDetails
4748
}
4849

4950
type RightToLeftPolicy struct{}
@@ -133,31 +134,29 @@ func (r *InstasliceReconciler) Reconcile(ctx context.Context, req ctrl.Request)
133134

134135
func (r *InstasliceReconciler) findDeviceForASlice(ctx context.Context, instaslice inferencev1alpha1.Instaslice, gpuUUID string, profileName string, policy AllocationPolicy, pod *v1.Pod, logger logr.Logger) (string, bool, reconcile.Result, error) {
135136
//TODO: discover this value, this may work for A100 and H100 for now.
136-
largestIndex := uint32(7)
137137
for gpuuuid, _ := range instaslice.Spec.MigGPUUUID {
138138
gpuUUID = gpuuuid
139139
if instaslice.Spec.Allocations == nil {
140140
instaslice.Spec.Allocations = make(map[string]inferencev1alpha1.AllocationDetails)
141141
}
142-
maxStart := r.extractMaxStart(instaslice, gpuUUID)
143-
size, discoveredGiprofile, Ciprofileid, Ciengprofileid := r.extractGpuProfile(instaslice, profileName)
144-
if maxStart+uint32(size) <= largestIndex {
145-
logger.Info("Device where the slice will be placed", "DeviceUUID", gpuUUID)
142+
newStart := r.getStartIndexFromPreparedState(instaslice, gpuUUID, profileName)
143+
notValidIndex := uint32(8)
144+
if newStart == notValidIndex {
145+
//Move to next GPU
146+
continue
146147
}
147-
148-
newStart := maxStart - uint32(size)
149-
logger.Info("The placement is ", "index", newStart)
148+
size, discoveredGiprofile, Ciprofileid, Ciengprofileid := r.extractGpuProfile(instaslice, profileName)
150149
allocDetails := policy.SetAllocationDetails(profileName, uint32(newStart), uint32(size),
151150
string(pod.UID), instaslice.Name, "no", discoveredGiprofile,
152-
Ciprofileid, Ciengprofileid, pod.Namespace, pod.Name)
153-
instaslice.Spec.Allocations[gpuUUID] = *allocDetails
151+
Ciprofileid, Ciengprofileid, pod.Namespace, pod.Name, gpuUUID)
152+
instaslice.Spec.Allocations[string(pod.UID)] = *allocDetails
154153
if err := r.Update(ctx, &instaslice); err != nil {
155154
logger.Error(err, "Error updating instaslice allocations")
156155
return "", true, ctrl.Result{}, err
157156
}
158157
return gpuUUID, false, reconcile.Result{}, nil
159158
}
160-
return gpuUUID, false, reconcile.Result{}, nil
159+
return "", false, reconcile.Result{}, fmt.Errorf("No valid GPU found that can fit slice")
161160
}
162161

163162
// Extract profile name from the container limits spec
@@ -198,23 +197,76 @@ func (*InstasliceReconciler) extractGpuProfile(instaslice inferencev1alpha1.Inst
198197
return size, discoveredGiprofile, Ciprofileid, Ciengprofileid
199198
}
200199

201-
// Walk through all the allocated devices and get the max position where the slice could be allocated.
202-
// the implementation is specific to first fit and this is needed until we get new strategy implemented
203-
// in GPU operator.
204-
func (*InstasliceReconciler) extractMaxStart(instaslice inferencev1alpha1.Instaslice, gpuUUID string) uint32 {
205-
var maxSize uint32 = 0
206-
var maxStart uint32 = 0
200+
func (*InstasliceReconciler) getStartIndexFromPreparedState(instaslice inferencev1alpha1.Instaslice, gpuUUID string, profileName string) uint32 {
201+
//TODO: generalize, A100 and H100 have 7 profiles so it hardwired for now
202+
var gpuAllocatedIndex [7]uint32
203+
// Explicitly set the array to all zeros
204+
for i := range gpuAllocatedIndex {
205+
gpuAllocatedIndex[i] = 0
206+
}
207207
for _, item := range instaslice.Spec.Prepared {
208208
if item.Parent == gpuUUID {
209-
if maxSize < item.Size {
210-
maxSize = item.Size
209+
for i := 0; i < int(item.Size); i++ {
210+
gpuAllocatedIndex[int(item.Start)+i] = 1
211211
}
212-
if maxStart < item.Start {
213-
maxStart = item.Start
212+
213+
}
214+
}
215+
216+
var neededContinousSlot int
217+
var possiblePlacements []int
218+
for _, placement := range instaslice.Spec.Migplacement {
219+
if placement.Profile == profileName {
220+
neededContinousSlot = placement.Placements[0].Size
221+
for _, placement := range placement.Placements {
222+
possiblePlacements = append(possiblePlacements, placement.Start)
214223
}
224+
break
215225
}
216226
}
217-
return maxStart
227+
//TODO: generalize, no slices can be placed on 8th index
228+
//if we return 8 then assume no valid index is found.
229+
var newStart = uint32(8)
230+
for _, value := range possiblePlacements {
231+
if gpuAllocatedIndex[value] == 0 {
232+
if neededContinousSlot == 1 {
233+
newStart = uint32(value)
234+
break
235+
}
236+
if neededContinousSlot == 2 {
237+
if value+neededContinousSlot < len(gpuAllocatedIndex) {
238+
if gpuAllocatedIndex[value] == 0 && gpuAllocatedIndex[value+1] == 0 {
239+
newStart = uint32(value)
240+
break
241+
}
242+
}
243+
244+
}
245+
if neededContinousSlot == 4 {
246+
if value+neededContinousSlot < len(gpuAllocatedIndex) {
247+
if gpuAllocatedIndex[value] == 0 && gpuAllocatedIndex[value+1] == 0 && gpuAllocatedIndex[value+2] == 0 && gpuAllocatedIndex[value+3] == 0 {
248+
newStart = uint32(value)
249+
break
250+
}
251+
}
252+
}
253+
254+
if neededContinousSlot == 8 {
255+
//special case
256+
if value+neededContinousSlot-1 < len(gpuAllocatedIndex) {
257+
if gpuAllocatedIndex[value] == 0 && gpuAllocatedIndex[value+1] == 0 &&
258+
gpuAllocatedIndex[value+2] == 0 && gpuAllocatedIndex[value+3] == 0 &&
259+
gpuAllocatedIndex[value+4] == 0 && gpuAllocatedIndex[value+5] == 0 &&
260+
gpuAllocatedIndex[value+6] == 0 && gpuAllocatedIndex[value+7] == 0 {
261+
newStart = uint32(value)
262+
}
263+
}
264+
}
265+
}
266+
267+
}
268+
269+
return newStart
218270
}
219271

220272
// Since we dont have user facing CRD, we make our way with attaching labels to the pods to indicate processing status.
@@ -274,7 +326,7 @@ func (r *InstasliceReconciler) unGatePod(ctx context.Context, podName string, re
274326
// Policy based allocation - FirstFit
275327
func (r *FirstFitPolicy) SetAllocationDetails(profileName string, newStart, size uint32, podUUID, nodename string,
276328
processed string, discoveredGiprofile int, Ciprofileid int, Ciengprofileid int,
277-
namespace string, podName string) *inferencev1alpha1.AllocationDetails {
329+
namespace string, podName string, gpuUuid string) *inferencev1alpha1.AllocationDetails {
278330
return &inferencev1alpha1.AllocationDetails{
279331
Profile: profileName,
280332
Start: uint32(newStart),
@@ -287,21 +339,22 @@ func (r *FirstFitPolicy) SetAllocationDetails(profileName string, newStart, size
287339
CIEngProfileID: Ciengprofileid,
288340
Namespace: namespace,
289341
PodName: podName,
342+
GPUUUID: gpuUuid,
290343
}
291344
}
292345

293346
// Policy based allocation - LeftToRIght
294347
func (l *LeftToRightPolicy) SetAllocationDetails(profileName string, newStart, size uint32, podUUID, nodename string,
295348
processed string, discoveredGiprofile int, Ciprofileid int, Ciengprofileid int,
296-
namespace string, podName string) *inferencev1alpha1.AllocationDetails {
349+
namespace string, podName string, gpuUuid string) *inferencev1alpha1.AllocationDetails {
297350
// Implement the left-to-right policy here
298351
return &inferencev1alpha1.AllocationDetails{}
299352
}
300353

301354
// Policy based allocation - RigghToLeft
302355
func (l *RightToLeftPolicy) SetAllocationDetails(profileName string, newStart, size uint32, podUUID, nodename string,
303356
processed string, discoveredGiprofile int, Ciprofileid int, Ciengprofileid int,
304-
namespace string, podName string) *inferencev1alpha1.AllocationDetails {
357+
namespace string, podName string, gpuUuid string) *inferencev1alpha1.AllocationDetails {
305358
// Implement the left-to-right policy here
306359
return &inferencev1alpha1.AllocationDetails{}
307360
}

internal/controller/instaslice_daemonset.go

+14-12
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ const (
9090

9191
func (r *InstaSliceDaemonsetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
9292
_ = log.FromContext(ctx)
93-
logger := log.Log.WithName("InstaSlice-controller")
93+
logger := log.Log.WithName("InstaSlice-daemonset")
9494

9595
pod := &v1.Pod{}
9696
//var podName string
@@ -144,6 +144,7 @@ func (r *InstaSliceDaemonsetReconciler) Reconcile(ctx context.Context, req ctrl.
144144
var instasliceList inferencev1alpha1.InstasliceList
145145
var giId uint32
146146
var ciId uint32
147+
var podUUID = string(pod.UID)
147148
ret := nvml.Init()
148149
if ret != nvml.SUCCESS {
149150
fmt.Printf("Unable to initialize NVML: %v \n", nvml.ErrorString(ret))
@@ -235,10 +236,10 @@ func (r *InstaSliceDaemonsetReconciler) Reconcile(ctx context.Context, req ctrl.
235236
if errGettingobj != nil {
236237
fmt.Printf("Error getting instaslice obj %v", errGettingobj)
237238
}
238-
existingAllocations, updatedAllocation := r.updateAllocationProcessing(instaslice, deviceUUID, profileName)
239-
r.createPreparedEntry(profileName, placement, deviceUUID, pod, giId, ciId, instaslice, migUUID, updatedAllocation)
239+
_, updatedAllocation := r.updateAllocationProcessing(instaslice, deviceUUID, profileName)
240+
r.createPreparedEntry(profileName, placement, podUUID, deviceUUID, pod, giId, ciId, instaslice, migUUID, updatedAllocation)
240241

241-
createConfigMap(context.TODO(), r.Client, migUUID, existingAllocations.Namespace, existingAllocations.PodName, logger)
242+
createConfigMap(context.TODO(), r.Client, migUUID, updatedAllocation.Namespace, updatedAllocation.PodName, logger)
242243

243244
podUpdate := r.labelsForDaemonset(pod)
244245
// Retry update operation with backoff
@@ -339,9 +340,9 @@ func (r *InstaSliceDaemonsetReconciler) getAllocation(ctx context.Context, insta
339340
for _, instaslice := range instasliceList.Items {
340341
nodeName := os.Getenv("NODE_NAME")
341342
if instaslice.Name == nodeName {
342-
for k, v := range instaslice.Spec.Allocations {
343+
for _, v := range instaslice.Spec.Allocations {
343344
if v.Processed == "no" {
344-
deviceForMig = k
345+
deviceForMig = v.GPUUUID
345346
profileName = v.Profile
346347
Giprofileid = v.Giprofileid
347348
Ciprofileid = v.CIProfileID
@@ -412,11 +413,11 @@ func (r *InstaSliceDaemonsetReconciler) cleanUp(ctx context.Context, pod *v1.Pod
412413
}
413414
}
414415

415-
func (r *InstaSliceDaemonsetReconciler) createPreparedEntry(profileName string, placement nvml.GpuInstancePlacement, deviceUUID string, pod *v1.Pod, giId uint32, ciId uint32, instaslice *inferencev1alpha1.Instaslice, migUUID string, updatedAllocation inferencev1alpha1.AllocationDetails) {
416+
func (r *InstaSliceDaemonsetReconciler) createPreparedEntry(profileName string, placement nvml.GpuInstancePlacement, podUUID string, deviceUUID string, pod *v1.Pod, giId uint32, ciId uint32, instaslice *inferencev1alpha1.Instaslice, migUUID string, updatedAllocation inferencev1alpha1.AllocationDetails) {
416417
instaslicePrepared := inferencev1alpha1.PreparedDetails{
417418
Profile: profileName,
418-
Start: placement.Start,
419-
Size: placement.Size,
419+
Start: updatedAllocation.Start,
420+
Size: updatedAllocation.Size,
420421
Parent: deviceUUID,
421422
PodUUID: string(pod.UID),
422423
Giinfoid: giId,
@@ -426,7 +427,7 @@ func (r *InstaSliceDaemonsetReconciler) createPreparedEntry(profileName string,
426427
instaslice.Spec.Prepared = make(map[string]inferencev1alpha1.PreparedDetails)
427428
}
428429
instaslice.Spec.Prepared[migUUID] = instaslicePrepared
429-
instaslice.Spec.Allocations[deviceUUID] = updatedAllocation
430+
instaslice.Spec.Allocations[podUUID] = updatedAllocation
430431

431432
errForUpdate := r.Update(context.TODO(), instaslice)
432433

@@ -435,8 +436,8 @@ func (r *InstaSliceDaemonsetReconciler) createPreparedEntry(profileName string,
435436
}
436437
}
437438

438-
func (*InstaSliceDaemonsetReconciler) updateAllocationProcessing(instaslice *inferencev1alpha1.Instaslice, deviceUUID string, profileName string) (inferencev1alpha1.AllocationDetails, inferencev1alpha1.AllocationDetails) {
439-
existingAllocations := instaslice.Spec.Allocations[deviceUUID]
439+
func (*InstaSliceDaemonsetReconciler) updateAllocationProcessing(instaslice *inferencev1alpha1.Instaslice, podUUID string, profileName string) (inferencev1alpha1.AllocationDetails, inferencev1alpha1.AllocationDetails) {
440+
existingAllocations := instaslice.Spec.Allocations[podUUID]
440441
updatedAllocation := inferencev1alpha1.AllocationDetails{
441442
Profile: profileName,
442443
Start: existingAllocations.Start,
@@ -447,6 +448,7 @@ func (*InstaSliceDaemonsetReconciler) updateAllocationProcessing(instaslice *inf
447448
Processed: "yes",
448449
Namespace: existingAllocations.Namespace,
449450
PodName: existingAllocations.PodName,
451+
GPUUUID: existingAllocations.GPUUUID,
450452
}
451453
return existingAllocations, updatedAllocation
452454
}

0 commit comments

Comments
 (0)