Skip to content

Commit

Permalink
[Refactor] Do not use RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV as timeo…
Browse files Browse the repository at this point in the history
…ut of status check in tests

It is only an insurance to keep reconciliation continuously triggered to hopefully fix an unexpected state.
Relying on it to fix state transitions is not a good pratice. Therefore, We should not reference it in tests at all.
  • Loading branch information
rueian committed Dec 16, 2023
1 parent 1594e88 commit b5be747
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 6 deletions.
8 changes: 4 additions & 4 deletions ray-operator/controllers/ray/raycluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ var _ = Context("Inside the default namespace", func() {
// We need to figure out the behavior. See https://github.com/ray-project/kuberay/issues/1736 for more details.
Eventually(
getClusterState(ctx, "default", myRayCluster.Name),
time.Second*(utils.RAYCLUSTER_DEFAULT_REQUEUE_SECONDS+5), time.Millisecond*500).Should(Equal(rayv1.Ready))
time.Second*15, time.Millisecond*500).Should(Equal(rayv1.Ready))
})

It("should re-create a deleted worker", func() {
Expand Down Expand Up @@ -311,7 +311,7 @@ var _ = Context("Inside the default namespace", func() {
It("cluster's .status.state should be updated to 'suspended' shortly after all Pods are terminated", func() {
Eventually(
getClusterState(ctx, "default", myRayCluster.Name),
time.Second*(utils.RAYCLUSTER_DEFAULT_REQUEUE_SECONDS+5), time.Millisecond*500).Should(Equal(rayv1.Suspended))
time.Second*15, time.Millisecond*500).Should(Equal(rayv1.Suspended))
})

It("set suspend to false and then revert it to true before all Pods are running", func() {
Expand Down Expand Up @@ -363,7 +363,7 @@ var _ = Context("Inside the default namespace", func() {
// RayCluster should be in Suspended state.
Eventually(
getClusterState(ctx, "default", myRayCluster.Name),
time.Second*(utils.RAYCLUSTER_DEFAULT_REQUEUE_SECONDS+5), time.Millisecond*500).Should(Equal(rayv1.Suspended))
time.Second*15, time.Millisecond*500).Should(Equal(rayv1.Suspended))
})

It("should run all head and worker pods if un-suspended", func() {
Expand Down Expand Up @@ -402,7 +402,7 @@ var _ = Context("Inside the default namespace", func() {
It("cluster's .status.state should be updated back to 'ready' after being un-suspended", func() {
Eventually(
getClusterState(ctx, "default", myRayCluster.Name),
time.Second*(utils.RAYCLUSTER_DEFAULT_REQUEUE_SECONDS+5), time.Millisecond*500).Should(Equal(rayv1.Ready))
time.Second*15, time.Millisecond*500).Should(Equal(rayv1.Ready))
})
})
})
Expand Down
2 changes: 1 addition & 1 deletion ray-operator/controllers/ray/rayjob_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ var _ = Context("Inside the default namespace", func() {
// The RayCluster.Status.State should be Ready.
Eventually(
getClusterState(ctx, "default", myRayCluster.Name),
time.Second*(utils.RAYCLUSTER_DEFAULT_REQUEUE_SECONDS+5), time.Millisecond*500).Should(Equal(rayv1.Ready))
time.Second*15, time.Millisecond*500).Should(Equal(rayv1.Ready))
})

It("Dashboard URL should be set", func() {
Expand Down
5 changes: 4 additions & 1 deletion ray-operator/controllers/ray/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ var _ = BeforeSuite(func(ctx SpecContext) {
Expect(err).ToNot(HaveOccurred())
Expect(k8sClient).ToNot(BeNil())

// Suggested way to run tests
// The RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV is an insurance to keep reconciliation continuously triggered to hopefully fix an unexpected state.
// In a production environment, the requeue period is set to five minutes by default, which is relatively infrequent.
// TODO: We probably should not shorten RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV here just to make tests pass.
// Instead, we should fix the reconciliation if any unexpected happened.
os.Setenv(utils.RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV, "10")
mgr, err := ctrl.NewManager(cfg, ctrl.Options{
Scheme: scheme.Scheme,
Expand Down

0 comments on commit b5be747

Please sign in to comment.