-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
cluster-autoscaler: Try disabling anti affinity check #1246
base: main
Are you sure you want to change the base?
Changes from 5 commits
7764508
49b826b
acffe55
47f5fe5
96732b1
275293f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,133 @@ | ||
diff --git a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go | ||
index 7fb533570..ddf3ea689 100644 | ||
--- a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go | ||
+++ b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go | ||
@@ -42,6 +42,7 @@ import ( | ||
"k8s.io/autoscaler/cluster-autoscaler/utils/errors" | ||
"k8s.io/autoscaler/cluster-autoscaler/utils/klogx" | ||
"k8s.io/autoscaler/cluster-autoscaler/utils/taints" | ||
+ interpodaffinity "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity" | ||
) | ||
|
||
// ScaleUpOrchestrator implements scaleup.Orchestrator interface. | ||
@@ -604,7 +605,17 @@ func (o *ScaleUpOrchestrator) SchedulablePodGroups( | ||
var schedulablePodGroups []estimator.PodEquivalenceGroup | ||
for _, eg := range podEquivalenceGroups { | ||
samplePod := eg.Pods[0] | ||
- if err := o.autoscalingContext.PredicateChecker.CheckPredicates(o.autoscalingContext.ClusterSnapshot, samplePod, nodeInfo.Node().Name); err == nil { | ||
+ err := o.autoscalingContext.PredicateChecker.CheckPredicates(o.autoscalingContext.ClusterSnapshot, samplePod, nodeInfo.Node().Name) | ||
+ // Ignore inter-pod affinity since this is not usually a reason to fail the ASG | ||
+ // (unless the anti-affinity conflict is with a DaemonSet pod, but there's no way to tell) | ||
+ | ||
+ ignoreErr := err != nil && err.PredicateName() == "InterPodAffinity" && err.Message() == interpodaffinity.ErrReasonAntiAffinityRulesNotMatch | ||
+ | ||
+ if ignoreErr { | ||
+ klog.V(4).Infof("Ignoring error while checking predicates for pod %s.%s on node %s in ClusterSnapshot; %v", samplePod.Namespace, samplePod.Name, nodeInfo.Node().Name, err) | ||
+ } | ||
+ | ||
+ if err == nil || ignoreErr { | ||
// Add pods to option. | ||
schedulablePodGroups = append(schedulablePodGroups, estimator.PodEquivalenceGroup{ | ||
Pods: eg.Pods, | ||
diff --git a/cluster-autoscaler/estimator/binpacking_estimator.go b/cluster-autoscaler/estimator/binpacking_estimator.go | ||
index 229a2e264..552c2d8ab 100644 | ||
--- a/cluster-autoscaler/estimator/binpacking_estimator.go | ||
+++ b/cluster-autoscaler/estimator/binpacking_estimator.go | ||
@@ -26,6 +26,7 @@ import ( | ||
"k8s.io/autoscaler/cluster-autoscaler/utils/scheduler" | ||
klog "k8s.io/klog/v2" | ||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" | ||
+ interpodaffinity "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity" | ||
) | ||
|
||
// BinpackingNodeEstimator estimates the number of needed nodes to handle the given amount of pods. | ||
@@ -108,13 +109,18 @@ func (e *BinpackingNodeEstimator) Estimate( | ||
var err error | ||
var remainingPods []*apiv1.Pod | ||
|
||
+ klog.V(4).Infof("trying to schedule %d pods on existing nodes", len(podsEquivalenceGroup.Pods)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's remove the print debugging? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Honestly, I'd prefer to keep it in unless you strongly think we should take it out. If we run into issue with this again it'd be a lot easier to debug with the debugging messages than of course, adding them back and the back and forth that entails. Though - we default to v=4 I believe so I'd be happy to give it a higher verbosity (5?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general, I'd like to minimize the size of our patch, to avoid conflicts when updating. So in this case, if you think the debugging is useful, let's keep it in but revisit in 1-3 months to see if we can get rid of it? |
||
remainingPods, err = e.tryToScheduleOnExistingNodes(estimationState, podsEquivalenceGroup.Pods) | ||
+ | ||
+ klog.V(4).Infof("After attempt, there are %d pods that couldn't be scheduled", len(remainingPods)) | ||
if err != nil { | ||
klog.Errorf(err.Error()) | ||
return 0, nil | ||
} | ||
|
||
+ klog.V(4).Infof("trying to schedule %d pods on new nodes", len(remainingPods)) | ||
err = e.tryToScheduleOnNewNodes(estimationState, nodeTemplate, remainingPods) | ||
+ klog.V(4).Infof("After attempt, there are %d pods that couldn't be scheduled", len(remainingPods)) | ||
if err != nil { | ||
klog.Errorf(err.Error()) | ||
return 0, nil | ||
@@ -137,6 +143,7 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnExistingNodes( | ||
|
||
// Check schedulability on all nodes created during simulation | ||
nodeName, err := e.predicateChecker.FitsAnyNodeMatching(e.clusterSnapshot, pod, func(nodeInfo *schedulerframework.NodeInfo) bool { | ||
+ klog.V(4).Infof("checking if pod %s.%s with nodeSelector %v fits on node %s", pod.Namespace, pod.Name, pod.Spec.NodeSelector, nodeInfo.Node().Name) | ||
return estimationState.newNodeNames[nodeInfo.Node().Name] | ||
}) | ||
if err != nil { | ||
@@ -144,6 +151,7 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnExistingNodes( | ||
} | ||
|
||
if err := e.tryToAddNode(estimationState, pod, nodeName); err != nil { | ||
+ klog.V(4).Infof("Error while adding pod %s.%s to node %s in ClusterSnapshot; %v", pod.Namespace, pod.Name, nodeName, err) | ||
return nil, err | ||
} | ||
} | ||
@@ -183,6 +191,7 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes( | ||
// each call that returns true, one node gets added. Therefore this | ||
// must be the last check right before really adding a node. | ||
if !e.limiter.PermissionToAddNode() { | ||
+ klog.V(4).Infof("Tried to add node, but couldn't due to hitting the limit") | ||
break | ||
} | ||
|
||
@@ -195,10 +204,19 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes( | ||
// Note that this may still fail (ex. if topology spreading with zonal topologyKey is used); | ||
// in this case we can't help the pending pod. We keep the node in clusterSnapshot to avoid | ||
// adding and removing node to snapshot for each such pod. | ||
- if err := e.predicateChecker.CheckPredicates(e.clusterSnapshot, pod, estimationState.lastNodeName); err != nil { | ||
+ err := e.predicateChecker.CheckPredicates(e.clusterSnapshot, pod, estimationState.lastNodeName) | ||
+ | ||
+ ignoreErr := err != nil && err.PredicateName() == "InterPodAffinity" && err.Message() == interpodaffinity.ErrReasonAntiAffinityRulesNotMatch | ||
+ | ||
+ if ignoreErr { | ||
+ klog.V(4).Infof("Ignoring error while checking predicates for pod %s.%s on theoretical node %s in ClusterSnapshot; %v", pod.Namespace, pod.Name, estimationState.lastNodeName, err) | ||
+ } | ||
+ | ||
+ if err == nil || ignoreErr { | ||
break | ||
} | ||
if err := e.tryToAddNode(estimationState, pod, estimationState.lastNodeName); err != nil { | ||
+ klog.V(4).Infof("Error while adding pod %s.%s to node %s in ClusterSnapshot; %v", pod.Namespace, pod.Name, estimationState.lastNodeName, err) | ||
return err | ||
} | ||
} | ||
@@ -211,6 +229,9 @@ func (e *BinpackingNodeEstimator) addNewNodeToSnapshot( | ||
template *schedulerframework.NodeInfo, | ||
) error { | ||
newNodeInfo := scheduler.DeepCopyTemplateNode(template, fmt.Sprintf("e-%d", estimationState.newNodeNameIndex)) | ||
+ | ||
+ klog.V(4).Infof("[Estimator] adding new node %s to ClusterSnapshot, with labels %v and taints %v", newNodeInfo.Node().Name, newNodeInfo.Node().Labels, newNodeInfo.Node().Spec.Taints) | ||
+ | ||
var pods []*apiv1.Pod | ||
for _, podInfo := range newNodeInfo.Pods { | ||
pods = append(pods, podInfo.Pod) | ||
diff --git a/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go b/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go | ||
index 8a13ee2cf..77589f408 100644 | ||
--- a/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go | ||
+++ b/cluster-autoscaler/simulator/predicatechecker/schedulerbased.go | ||
@@ -129,6 +129,8 @@ func (p *SchedulerBasedPredicateChecker) FitsAnyNodeMatching(clusterSnapshot clu | ||
if filterStatus.IsSuccess() { | ||
p.lastIndex = (p.lastIndex + i + 1) % len(nodeInfosList) | ||
return nodeInfo.Node().Name, nil | ||
+ } else { | ||
+ klog.V(4).Infof("Pod %s cannot be placed on node %s because: %s", pod.Name, nodeInfo.Node().Name, filterStatus.Message()) | ||
} | ||
} | ||
return "", fmt.Errorf("cannot put pod %s on any node", pod.Name) | ||
diff --git a/cluster-autoscaler/utils/kubernetes/listers.go b/cluster-autoscaler/utils/kubernetes/listers.go | ||
index b9be94b6e..5efb40df2 100644 | ||
--- a/cluster-autoscaler/utils/kubernetes/listers.go | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's break this into multiple lines?