in pkg/scheduler/objects/preemption.go [431:517]
func (p *Preemptor) calculateAdditionalVictims(nodeVictims []*Allocation) ([]*Allocation, bool) {
// clone the queue snapshots
allocationsByQueueSnap := p.duplicateQueueSnapshots()
// get the current queue snapshot
askQueue, ok := allocationsByQueueSnap[p.queuePath]
if !ok {
log.Log(log.SchedPreemption).Warn("BUG: Queue not found by name", zap.String("queuePath", p.queuePath))
return nil, false
}
// remove all victims previously chosen for the node
seen := make(map[string]*Allocation, 0)
for _, victim := range nodeVictims {
if qv, ok := p.queueByAlloc[victim.GetAllocationKey()]; ok {
if queueSnapshot, ok2 := allocationsByQueueSnap[qv.QueuePath]; ok2 {
queueSnapshot.RemoveAllocation(victim.GetAllocatedResource())
seen[victim.GetAllocationKey()] = victim
}
}
}
// build and sort list of potential victims
potentialVictims := make([]*Allocation, 0)
for _, alloc := range p.allocationsByQueue {
for _, victim := range alloc.PotentialVictims {
if _, ok := seen[victim.GetAllocationKey()]; ok {
// skip already processed victim
continue
}
potentialVictims = append(potentialVictims, victim)
}
}
sort.SliceStable(potentialVictims, func(i, j int) bool {
return compareAllocationLess(potentialVictims[i], potentialVictims[j])
})
// evaluate each potential victim in turn, stopping once sufficient resources have been freed
victims := make([]*Allocation, 0)
for _, victim := range potentialVictims {
// check to see if removing this task will keep queue above guaranteed amount; if not, skip to the next one
if qv, ok := p.queueByAlloc[victim.GetAllocationKey()]; ok {
if queueSnapshot, ok2 := allocationsByQueueSnap[qv.QueuePath]; ok2 {
oldRemaining := queueSnapshot.GetRemainingGuaranteedResource()
queueSnapshot.RemoveAllocation(victim.GetAllocatedResource())
// Did removing this allocation still keep the queue over-allocated?
// At times, over-allocation happens because of resource types in usage but not defined as guaranteed.
// So, as an additional check, -ve remaining guaranteed resource before removing the victim means
// some really useful victim is there.
preemptableResource := queueSnapshot.GetPreemptableResource()
if resources.StrictlyGreaterThanOrEquals(preemptableResource, resources.Zero) &&
(oldRemaining == nil || resources.StrictlyGreaterThan(resources.Zero, oldRemaining)) {
askQueueRemainingAfterVictimRemoval := askQueue.GetRemainingGuaranteedResource()
// add the current victim into the ask queue
askQueue.AddAllocation(victim.GetAllocatedResource())
askQueueNewRemaining := askQueue.GetRemainingGuaranteedResource()
// Did adding this allocation make the ask queue over - utilized?
if askQueueNewRemaining != nil && resources.StrictlyGreaterThan(resources.Zero, askQueueNewRemaining) {
askQueue.RemoveAllocation(victim.GetAllocatedResource())
queueSnapshot.AddAllocation(victim.GetAllocatedResource())
break
}
// check to see if the shortfall on the queue has changed
if !resources.EqualsOrEmpty(askQueueRemainingAfterVictimRemoval, askQueueNewRemaining) {
// remaining capacity changed, so we should keep this task
victims = append(victims, victim)
} else {
// remaining guaranteed amount in ask queue did not change, so preempting task won't help
askQueue.RemoveAllocation(victim.GetAllocatedResource())
queueSnapshot.AddAllocation(victim.GetAllocatedResource())
}
} else {
// removing this allocation would have reduced queue below guaranteed limits, put it back
queueSnapshot.AddAllocation(victim.GetAllocatedResource())
}
}
}
}
// At last, did the ask queue usage under or equals guaranteed quota?
finalRemainingRes := askQueue.GetRemainingGuaranteedResource()
if finalRemainingRes != nil && resources.StrictlyGreaterThanOrEquals(finalRemainingRes, resources.Zero) {
return victims, true
}
return nil, false
}