Skip to content

Commit 6e0ed57

Browse files
Merge pull request #554 from asm582/doc_acc
add docs accounting and resolve merge issue
2 parents 55fb5ee + b10160f commit 6e0ed57

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

pkg/controller/queuejob/queuejob_controller_ex.go

+8-3
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,7 @@ func (qjm *XController) addTotalSnapshotResourcesConsumedByAw(totalgpu int32, to
718718

719719
func (qjm *XController) getAggregatedAvailableResourcesPriority(unallocatedClusterResources *clusterstateapi.
720720
Resource, targetpr float64, requestingJob *arbv1.AppWrapper, agentId string) (*clusterstateapi.Resource, []*arbv1.AppWrapper) {
721+
//get available free resources in the cluster.
721722
r := unallocatedClusterResources.Clone()
722723
// Track preemption resources
723724
preemptable := clusterstateapi.EmptyResource()
@@ -732,7 +733,10 @@ func (qjm *XController) getAggregatedAvailableResourcesPriority(unallocatedClust
732733
klog.Errorf("[getAggAvaiResPri] Unable to obtain the list of queueJobs %+v", err)
733734
return r, nil
734735
}
735-
736+
//for all AWs that have canRun status are true
737+
//in non-preemption mode, we reserve resources for AWs
738+
//reserving is done by subtracting total AW resources from pods owned by AW that are running or completed.
739+
// AW can be running but items owned by it can be completed or there might be new set of pods yet to be spawned
736740
for _, value := range queueJobs {
737741
klog.V(10).Infof("[getAggAvaiResPri] %s: Evaluating job: %s to calculate aggregated resources.", time.Now().String(), value.Name)
738742
if value.Name == requestingJob.Name {
@@ -797,10 +801,11 @@ func (qjm *XController) getAggregatedAvailableResourcesPriority(unallocatedClust
797801

798802
totalResource := qjm.addTotalSnapshotResourcesConsumedByAw(value.Status.TotalGPU, value.Status.TotalCPU, value.Status.TotalMemory)
799803
klog.V(6).Infof("[getAggAvaiResPri] total resources consumed by Appwrapper %v when CanRun are %v", value.Name, totalResource)
800-
pending, err = qjv.NonNegSub(totalResource)
804+
delta, err := qjv.NonNegSub(totalResource)
805+
pending = pending.Add(delta)
801806
if err != nil {
802807
klog.Warningf("[getAggAvaiResPri] Subtraction of resources failed, adding entire appwrapper resoources %v, %v", qjv, err)
803-
pending = qjv
808+
pending = pending.Add(qjv)
804809
}
805810
klog.V(6).Infof("[getAggAvaiResPri] The value of pending is %v", pending)
806811
continue

0 commit comments

Comments
 (0)