Skip to content

Commit 9488fbe

Browse files
authored
Merge pull request #84883 from Huang-Wei/tbe-flake-followup
Update test logic to simulate NodeReady/False and NodeReady/Unknown events correctly
2 parents 49a9b6c + b6b92b6 commit 9488fbe

File tree

1 file changed

+75
-44
lines changed

1 file changed

+75
-44
lines changed

test/integration/scheduler/taint_test.go

Lines changed: 75 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@ package scheduler
1919
// This file tests the Taint feature.
2020

2121
import (
22+
"errors"
2223
"fmt"
2324
"testing"
2425
"time"
2526

2627
v1 "k8s.io/api/core/v1"
28+
apierrors "k8s.io/apimachinery/pkg/api/errors"
2729
"k8s.io/apimachinery/pkg/api/resource"
2830
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2931
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -584,6 +586,7 @@ func TestTaintBasedEvictions(t *testing.T) {
584586
nodeCount := 3
585587
zero := int64(0)
586588
gracePeriod := int64(1)
589+
heartbeatInternal := time.Second * 2
587590
testPod := &v1.Pod{
588591
ObjectMeta: metav1.ObjectMeta{Name: "testpod1", DeletionGracePeriodSeconds: &zero},
589592
Spec: v1.PodSpec{
@@ -657,7 +660,6 @@ func TestTaintBasedEvictions(t *testing.T) {
657660
for i, test := range tests {
658661
t.Run(test.name, func(t *testing.T) {
659662
context := initTestMaster(t, "taint-based-evictions", admission)
660-
defer cleanupTest(t, context)
661663

662664
// Build clientset and informers for controllers.
663665
externalClientset := kubernetes.NewForConfigOrDie(&restclient.Config{
@@ -669,6 +671,7 @@ func TestTaintBasedEvictions(t *testing.T) {
669671
podTolerations.SetExternalKubeInformerFactory(externalInformers)
670672

671673
context = initTestScheduler(t, context, true, nil)
674+
defer cleanupTest(t, context)
672675
cs := context.clientSet
673676
informers := context.informerFactory
674677
_, err := cs.CoreV1().Namespaces().Create(context.ns)
@@ -726,8 +729,9 @@ func TestTaintBasedEvictions(t *testing.T) {
726729
Allocatable: nodeRes,
727730
Conditions: []v1.NodeCondition{
728731
{
729-
Type: v1.NodeReady,
730-
Status: v1.ConditionTrue,
732+
Type: v1.NodeReady,
733+
Status: v1.ConditionTrue,
734+
LastHeartbeatTime: metav1.Now(),
731735
},
732736
},
733737
},
@@ -737,33 +741,6 @@ func TestTaintBasedEvictions(t *testing.T) {
737741
}
738742
}
739743

740-
// Regularly send heartbeat event to APIServer so that the cluster doesn't enter fullyDisruption mode.
741-
// TODO(Huang-Wei): use "NodeDisruptionExclusion" feature to simply the below logic when it's beta.
742-
var heartbeatChans []chan struct{}
743-
for i := 0; i < nodeCount; i++ {
744-
heartbeatChans = append(heartbeatChans, make(chan struct{}))
745-
}
746-
for i := 0; i < nodeCount; i++ {
747-
// Spin up <nodeCount> goroutines to send heartbeat event to APIServer periodically.
748-
go func(i int) {
749-
for {
750-
select {
751-
case <-heartbeatChans[i]:
752-
return
753-
case <-time.Tick(2 * time.Second):
754-
nodes[i].Status.Conditions = []v1.NodeCondition{
755-
{
756-
Type: v1.NodeReady,
757-
Status: v1.ConditionTrue,
758-
LastHeartbeatTime: metav1.Now(),
759-
},
760-
}
761-
updateNodeStatus(cs, nodes[i])
762-
}
763-
}
764-
}(i)
765-
}
766-
767744
neededNode := nodes[1]
768745
if test.pod != nil {
769746
test.pod.Name = fmt.Sprintf("testpod-%d", i)
@@ -790,18 +767,53 @@ func TestTaintBasedEvictions(t *testing.T) {
790767
}
791768
}
792769

770+
// Regularly send heartbeat event to APIServer so that the cluster doesn't enter fullyDisruption mode.
771+
// TODO(Huang-Wei): use "NodeDisruptionExclusion" feature to simply the below logic when it's beta.
793772
for i := 0; i < nodeCount; i++ {
794-
// Stop the neededNode's heartbeat goroutine.
795-
if neededNode.Name == fmt.Sprintf("node-%d", i) {
796-
heartbeatChans[i] <- struct{}{}
797-
break
773+
var conditions []v1.NodeCondition
774+
// If current node is not <neededNode>
775+
if neededNode.Name != nodes[i].Name {
776+
conditions = []v1.NodeCondition{
777+
{
778+
Type: v1.NodeReady,
779+
Status: v1.ConditionTrue,
780+
},
781+
}
782+
} else {
783+
c, err := nodeReadyStatus(test.nodeConditions)
784+
if err != nil {
785+
t.Error(err)
786+
}
787+
// Need to distinguish NodeReady/False and NodeReady/Unknown.
788+
// If we try to update the node with condition NotReady/False, i.e. expect a NotReady:NoExecute taint
789+
// we need to keep sending the update event to keep it alive, rather than just sending once.
790+
if c == v1.ConditionFalse {
791+
conditions = test.nodeConditions
792+
} else if c == v1.ConditionUnknown {
793+
// If it's expected to update the node with condition NotReady/Unknown,
794+
// i.e. expect a Unreachable:NoExecute taint,
795+
// we need to only send the update event once to simulate the network unreachable scenario.
796+
nodeCopy := nodeCopyWithConditions(nodes[i], test.nodeConditions)
797+
if err := updateNodeStatus(cs, nodeCopy); err != nil && !apierrors.IsNotFound(err) {
798+
t.Errorf("Cannot update node: %v", err)
799+
}
800+
continue
801+
}
798802
}
799-
}
800-
neededNode.Status.Conditions = test.nodeConditions
801-
// Update node condition.
802-
err = updateNodeStatus(cs, neededNode)
803-
if err != nil {
804-
t.Fatalf("Cannot update node: %v", err)
803+
// Keeping sending NodeReady/True or NodeReady/False events.
804+
go func(i int) {
805+
for {
806+
select {
807+
case <-context.ctx.Done():
808+
return
809+
case <-time.Tick(heartbeatInternal):
810+
nodeCopy := nodeCopyWithConditions(nodes[i], conditions)
811+
if err := updateNodeStatus(cs, nodeCopy); err != nil && !apierrors.IsNotFound(err) {
812+
t.Errorf("Cannot update node: %v", err)
813+
}
814+
}
815+
}
816+
}(i)
805817
}
806818

807819
if err := waitForNodeTaints(cs, neededNode, test.nodeTaints); err != nil {
@@ -826,10 +838,6 @@ func TestTaintBasedEvictions(t *testing.T) {
826838
}
827839
cleanupPods(cs, t, []*v1.Pod{test.pod})
828840
}
829-
// Close all heartbeat channels.
830-
for i := 0; i < nodeCount; i++ {
831-
close(heartbeatChans[i])
832-
}
833841
cleanupNodes(cs, t)
834842
waitForSchedulerCacheCleanup(context.scheduler, t)
835843
})
@@ -844,3 +852,26 @@ func getTolerationSeconds(tolerations []v1.Toleration) (int64, error) {
844852
}
845853
return 0, fmt.Errorf("cannot find toleration")
846854
}
855+
856+
// nodeReadyStatus returns the status of first condition with type NodeReady.
857+
// If none of the condition is of type NodeReady, returns an error.
858+
func nodeReadyStatus(conditions []v1.NodeCondition) (v1.ConditionStatus, error) {
859+
for _, c := range conditions {
860+
if c.Type != v1.NodeReady {
861+
continue
862+
}
863+
// Just return the first condition with type NodeReady
864+
return c.Status, nil
865+
}
866+
return v1.ConditionFalse, errors.New("None of the conditions is of type NodeReady")
867+
}
868+
869+
func nodeCopyWithConditions(node *v1.Node, conditions []v1.NodeCondition) *v1.Node {
870+
copy := node.DeepCopy()
871+
copy.ResourceVersion = "0"
872+
copy.Status.Conditions = conditions
873+
for i := range copy.Status.Conditions {
874+
copy.Status.Conditions[i].LastHeartbeatTime = metav1.Now()
875+
}
876+
return copy
877+
}

0 commit comments

Comments
 (0)