Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cluster-autoscaler: Try disabling anti affinity check #1246

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions cluster-autoscaler/ca.patch
Original file line number Diff line number Diff line change
@@ -1,3 +1,133 @@
diff --git a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go
index 7fb533570..ddf3ea689 100644
--- a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go
+++ b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go
@@ -42,6 +42,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/klogx"
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
+ interpodaffinity "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity"
)

// ScaleUpOrchestrator implements scaleup.Orchestrator interface.
@@ -604,7 +605,17 @@ func (o *ScaleUpOrchestrator) SchedulablePodGroups(
var schedulablePodGroups []estimator.PodEquivalenceGroup
for _, eg := range podEquivalenceGroups {
samplePod := eg.Pods[0]
- if err := o.autoscalingContext.PredicateChecker.CheckPredicates(o.autoscalingContext.ClusterSnapshot, samplePod, nodeInfo.Node().Name); err == nil {
+ err := o.autoscalingContext.PredicateChecker.CheckPredicates(o.autoscalingContext.ClusterSnapshot, samplePod, nodeInfo.Node().Name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's break this into multiple lines?

+ // Ignore inter-pod affinity since this is not usually a reason to fail the ASG
+ // (unless the anti-affinity conflict is with a DaemonSet pod, but there's no way to tell)
+
+ ignoreErr := err != nil && err.PredicateName() == "InterPodAffinity" && err.Message() == interpodaffinity.ErrReasonAntiAffinityRulesNotMatch
+
+ if ignoreErr {
+ klog.V(4).Infof("Ignoring error while checking predicates for pod %s.%s on node %s in ClusterSnapshot; %v", samplePod.Namespace, samplePod.Name, nodeInfo.Node().Name, err)
+ }
+
+ if err == nil || ignoreErr {
// Add pods to option.
schedulablePodGroups = append(schedulablePodGroups, estimator.PodEquivalenceGroup{
Pods: eg.Pods,
diff --git a/cluster-autoscaler/estimator/binpacking_estimator.go b/cluster-autoscaler/estimator/binpacking_estimator.go
index 229a2e264..552c2d8ab 100644
--- a/cluster-autoscaler/estimator/binpacking_estimator.go
+++ b/cluster-autoscaler/estimator/binpacking_estimator.go
@@ -26,6 +26,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
klog "k8s.io/klog/v2"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
+ interpodaffinity "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity"
)

// BinpackingNodeEstimator estimates the number of needed nodes to handle the given amount of pods.
@@ -108,13 +109,18 @@ func (e *BinpackingNodeEstimator) Estimate(
var err error
var remainingPods []*apiv1.Pod

+ klog.V(4).Infof("trying to schedule %d pods on existing nodes", len(podsEquivalenceGroup.Pods))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove the print debugging?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly, I'd prefer to keep it in unless you strongly think we should take it out. If we run into issue with this again it'd be a lot easier to debug with the debugging messages than of course, adding them back and the back and forth that entails. Though - we default to v=4 I believe so I'd be happy to give it a higher verbosity (5?)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, I'd like to minimize the size of our patch, to avoid conflicts when updating.

So in this case, if you think the debugging is useful, let's keep it in but revisit in 1-3 months to see if we can get rid of it?

remainingPods, err = e.tryToScheduleOnExistingNodes(estimationState, podsEquivalenceGroup.Pods)
+
+ klog.V(4).Infof("After attempt, there are %d pods that couldn't be scheduled", len(remainingPods))
if err != nil {
klog.Errorf(err.Error())
return 0, nil
}

+ klog.V(4).Infof("trying to schedule %d pods on new nodes", len(remainingPods))
err = e.tryToScheduleOnNewNodes(estimationState, nodeTemplate, remainingPods)
+ klog.V(4).Infof("After attempt, there are %d pods that couldn't be scheduled", len(remainingPods))
if err != nil {
klog.Errorf(err.Error())
return 0, nil
@@ -137,6 +143,7 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnExistingNodes(

// Check schedulability on all nodes created during simulation
nodeName, err := e.predicateChecker.FitsAnyNodeMatching(e.clusterSnapshot, pod, func(nodeInfo *schedulerframework.NodeInfo) bool {
+ klog.V(4).Infof("checking if pod %s.%s with nodeSelector %v fits on node %s", pod.Namespace, pod.Name, pod.Spec.NodeSelector, nodeInfo.Node().Name)
return estimationState.newNodeNames[nodeInfo.Node().Name]
})
if err != nil {
@@ -144,6 +151,7 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnExistingNodes(
}

if err := e.tryToAddNode(estimationState, pod, nodeName); err != nil {
+ klog.V(4).Infof("Error while adding pod %s.%s to node %s in ClusterSnapshot; %v", pod.Namespace, pod.Name, nodeName, err)
return nil, err
}
}
@@ -183,6 +191,7 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes(
// each call that returns true, one node gets added. Therefore this
// must be the last check right before really adding a node.
if !e.limiter.PermissionToAddNode() {
+ klog.V(4).Infof("Tried to add node, but couldn't due to hitting the limit")
break
}

@@ -195,10 +204,19 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes(
// Note that this may still fail (ex. if topology spreading with zonal topologyKey is used);
// in this case we can't help the pending pod. We keep the node in clusterSnapshot to avoid
// adding and removing node to snapshot for each such pod.
- if err := e.predicateChecker.CheckPredicates(e.clusterSnapshot, pod, estimationState.lastNodeName); err != nil {
+ err := e.predicateChecker.CheckPredicates(e.clusterSnapshot, pod, estimationState.lastNodeName)
+
+ ignoreErr := err != nil && err.PredicateName() == "InterPodAffinity" && err.Message() == interpodaffinity.ErrReasonAntiAffinityRulesNotMatch
+
+ if ignoreErr {
+ klog.V(4).Infof("Ignoring error while checking predicates for pod %s.%s on theoretical node %s in ClusterSnapshot; %v", pod.Namespace, pod.Name, estimationState.lastNodeName, err)
+ }
+
+ if err == nil || ignoreErr {
break
}
if err := e.tryToAddNode(estimationState, pod, estimationState.lastNodeName); err != nil {
+ klog.V(4).Infof("Error while adding pod %s.%s to node %s in ClusterSnapshot; %v", pod.Namespace, pod.Name, estimationState.lastNodeName, err)
return err
}
}
@@ -211,6 +229,9 @@ func (e *BinpackingNodeEstimator) addNewNodeToSnapshot(
template *schedulerframework.NodeInfo,
) error {
newNodeInfo := scheduler.DeepCopyTemplateNode(template, fmt.Sprintf("e-%d", estimationState.newNodeNameIndex))
+
+ klog.V(4).Infof("[Estimator] adding new node %s to ClusterSnapshot, with labels %v and taints %v", newNodeInfo.Node().Name, newNodeInfo.Node().Labels, newNodeInfo.Node().Spec.Taints)
+
var pods []*apiv1.Pod
for _, podInfo := range newNodeInfo.Pods {
pods = append(pods, podInfo.Pod)
diff --git a/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go b/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go
index 8a13ee2cf..77589f408 100644
--- a/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go
+++ b/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go
@@ -129,6 +129,8 @@ func (p *SchedulerBasedPredicateChecker) FitsAnyNodeMatching(clusterSnapshot clu
if filterStatus.IsSuccess() {
p.lastIndex = (p.lastIndex + i + 1) % len(nodeInfosList)
return nodeInfo.Node().Name, nil
+ } else {
+ klog.V(4).Infof("Pod %s cannot be placed on node %s because: %s", pod.Name, nodeInfo.Node().Name, filterStatus.Message())
}
}
return "", fmt.Errorf("cannot put pod %s on any node", pod.Name)
diff --git a/cluster-autoscaler/utils/kubernetes/listers.go b/cluster-autoscaler/utils/kubernetes/listers.go
index b9be94b6e..5efb40df2 100644
--- a/cluster-autoscaler/utils/kubernetes/listers.go
Expand Down
Loading