@@ -718,6 +718,7 @@ func (qjm *XController) addTotalSnapshotResourcesConsumedByAw(totalgpu int32, to
718
718
719
719
func (qjm * XController ) getAggregatedAvailableResourcesPriority (unallocatedClusterResources * clusterstateapi.
720
720
Resource , targetpr float64 , requestingJob * arbv1.AppWrapper , agentId string ) (* clusterstateapi.Resource , []* arbv1.AppWrapper ) {
721
+ //get available free resources in the cluster.
721
722
r := unallocatedClusterResources .Clone ()
722
723
// Track preemption resources
723
724
preemptable := clusterstateapi .EmptyResource ()
@@ -732,7 +733,10 @@ func (qjm *XController) getAggregatedAvailableResourcesPriority(unallocatedClust
732
733
klog .Errorf ("[getAggAvaiResPri] Unable to obtain the list of queueJobs %+v" , err )
733
734
return r , nil
734
735
}
735
-
736
+ //for all AWs that have canRun status are true
737
+ //in non-preemption mode, we reserve resources for AWs
738
+ //reserving is done by subtracting total AW resources from pods owned by AW that are running or completed.
739
+ // AW can be running but items owned by it can be completed or there might be new set of pods yet to be spawned
736
740
for _ , value := range queueJobs {
737
741
klog .V (10 ).Infof ("[getAggAvaiResPri] %s: Evaluating job: %s to calculate aggregated resources." , time .Now ().String (), value .Name )
738
742
if value .Name == requestingJob .Name {
@@ -797,10 +801,11 @@ func (qjm *XController) getAggregatedAvailableResourcesPriority(unallocatedClust
797
801
798
802
totalResource := qjm .addTotalSnapshotResourcesConsumedByAw (value .Status .TotalGPU , value .Status .TotalCPU , value .Status .TotalMemory )
799
803
klog .V (6 ).Infof ("[getAggAvaiResPri] total resources consumed by Appwrapper %v when CanRun are %v" , value .Name , totalResource )
800
- pending , err = qjv .NonNegSub (totalResource )
804
+ delta , err := qjv .NonNegSub (totalResource )
805
+ pending = pending .Add (delta )
801
806
if err != nil {
802
807
klog .Warningf ("[getAggAvaiResPri] Subtraction of resources failed, adding entire appwrapper resoources %v, %v" , qjv , err )
803
- pending = qjv
808
+ pending = pending . Add ( qjv )
804
809
}
805
810
klog .V (6 ).Infof ("[getAggAvaiResPri] The value of pending is %v" , pending )
806
811
continue
0 commit comments