Skip to content

Commit 8a74296

Browse files
committed
feat: add RayCluster.status.readyWorkerReplicas
A worker Pod is ready if it has a PodCondition with type == Ready and status == True. See https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-conditions
1 parent 3c44ba0 commit 8a74296

File tree

13 files changed

+128
-16
lines changed

13 files changed

+128
-16
lines changed

helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ray-operator/apis/ray/v1/raycluster_types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ type RayClusterStatus struct {
121121
// Important: Run "make" to regenerate code after modifying this file
122122
// Status reflects the status of the cluster
123123
State ClusterState `json:"state,omitempty"`
124+
// ReadyWorkerReplicas indicates how many worker replicas are ready in the cluster
125+
ReadyWorkerReplicas int32 `json:"readyWorkerReplicas,omitempty"`
124126
// AvailableWorkerReplicas indicates how many replicas are available in the cluster
125127
AvailableWorkerReplicas int32 `json:"availableWorkerReplicas,omitempty"`
126128
// DesiredWorkerReplicas indicates overall desired replicas claimed by the user at the cluster level.

ray-operator/apis/ray/v1alpha1/raycluster_types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ type RayClusterStatus struct {
119119
// Important: Run "make" to regenerate code after modifying this file
120120
// Status reflects the status of the cluster
121121
State ClusterState `json:"state,omitempty"`
122+
// ReadyWorkerReplicas indicates how many worker replicas are ready in the cluster
123+
ReadyWorkerReplicas int32 `json:"readyWorkerReplicas,omitempty"`
122124
// AvailableWorkerReplicas indicates how many replicas are available in the cluster
123125
AvailableWorkerReplicas int32 `json:"availableWorkerReplicas,omitempty"`
124126
// DesiredWorkerReplicas indicates overall desired replicas claimed by the user at the cluster level.

ray-operator/config/crd/bases/ray.io_rayclusters.yaml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ray-operator/config/crd/bases/ray.io_rayjobs.yaml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ray-operator/config/crd/bases/ray.io_rayservices.yaml

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ray-operator/controllers/ray/raycluster_controller.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -400,13 +400,22 @@ func (r *RayClusterReconciler) inconsistentRayClusterStatus(ctx context.Context,
400400
oldStatus.State, newStatus.State, oldStatus.Reason, newStatus.Reason))
401401
return true
402402
}
403-
if oldStatus.AvailableWorkerReplicas != newStatus.AvailableWorkerReplicas || oldStatus.DesiredWorkerReplicas != newStatus.DesiredWorkerReplicas ||
404-
oldStatus.MinWorkerReplicas != newStatus.MinWorkerReplicas || oldStatus.MaxWorkerReplicas != newStatus.MaxWorkerReplicas {
403+
if oldStatus.ReadyWorkerReplicas != newStatus.ReadyWorkerReplicas ||
404+
oldStatus.AvailableWorkerReplicas != newStatus.AvailableWorkerReplicas ||
405+
oldStatus.DesiredWorkerReplicas != newStatus.DesiredWorkerReplicas ||
406+
oldStatus.MinWorkerReplicas != newStatus.MinWorkerReplicas ||
407+
oldStatus.MaxWorkerReplicas != newStatus.MaxWorkerReplicas {
405408
logger.Info("inconsistentRayClusterStatus", "detect inconsistency", fmt.Sprintf(
406-
"old AvailableWorkerReplicas: %d, new AvailableWorkerReplicas: %d, old DesiredWorkerReplicas: %d, new DesiredWorkerReplicas: %d, "+
407-
"old MinWorkerReplicas: %d, new MinWorkerReplicas: %d, old MaxWorkerReplicas: %d, new MaxWorkerReplicas: %d",
408-
oldStatus.AvailableWorkerReplicas, newStatus.AvailableWorkerReplicas, oldStatus.DesiredWorkerReplicas, newStatus.DesiredWorkerReplicas,
409-
oldStatus.MinWorkerReplicas, newStatus.MinWorkerReplicas, oldStatus.MaxWorkerReplicas, newStatus.MaxWorkerReplicas))
409+
"old ReadyWorkerReplicas: %d, new ReadyWorkerReplicas: %d, "+
410+
"old AvailableWorkerReplicas: %d, new AvailableWorkerReplicas: %d, "+
411+
"old DesiredWorkerReplicas: %d, new DesiredWorkerReplicas: %d, "+
412+
"old MinWorkerReplicas: %d, new MinWorkerReplicas: %d, "+
413+
"old MaxWorkerReplicas: %d, new MaxWorkerReplicas: %d",
414+
oldStatus.ReadyWorkerReplicas, newStatus.ReadyWorkerReplicas,
415+
oldStatus.AvailableWorkerReplicas, newStatus.AvailableWorkerReplicas,
416+
oldStatus.DesiredWorkerReplicas, newStatus.DesiredWorkerReplicas,
417+
oldStatus.MinWorkerReplicas, newStatus.MinWorkerReplicas,
418+
oldStatus.MaxWorkerReplicas, newStatus.MaxWorkerReplicas))
410419
return true
411420
}
412421
if !reflect.DeepEqual(oldStatus.Endpoints, newStatus.Endpoints) || !reflect.DeepEqual(oldStatus.Head, newStatus.Head) {
@@ -1222,6 +1231,7 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra
12221231
return nil, err
12231232
}
12241233

1234+
newInstance.Status.ReadyWorkerReplicas = utils.CalculateReadyReplicas(runtimePods)
12251235
newInstance.Status.AvailableWorkerReplicas = utils.CalculateAvailableReplicas(runtimePods)
12261236
newInstance.Status.DesiredWorkerReplicas = utils.CalculateDesiredReplicas(ctx, newInstance)
12271237
newInstance.Status.MinWorkerReplicas = utils.CalculateMinReplicas(newInstance)

ray-operator/controllers/ray/raycluster_controller_unit_test.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,6 +1530,7 @@ func TestInconsistentRayClusterStatus(t *testing.T) {
15301530
timeNow := metav1.Now()
15311531
oldStatus := rayv1.RayClusterStatus{
15321532
State: rayv1.Ready,
1533+
ReadyWorkerReplicas: 1,
15331534
AvailableWorkerReplicas: 1,
15341535
DesiredWorkerReplicas: 1,
15351536
MinWorkerReplicas: 1,
@@ -1564,42 +1565,47 @@ func TestInconsistentRayClusterStatus(t *testing.T) {
15641565
newStatus.Reason = "new reason"
15651566
assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))
15661567

1567-
// Case 3: `AvailableWorkerReplicas` is different => return true
1568+
// Case 3: `ReadyWorkerReplicas` is different => return true
1569+
newStatus = oldStatus.DeepCopy()
1570+
newStatus.ReadyWorkerReplicas = oldStatus.ReadyWorkerReplicas + 1
1571+
assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))
1572+
1573+
// Case 4: `AvailableWorkerReplicas` is different => return true
15681574
newStatus = oldStatus.DeepCopy()
15691575
newStatus.AvailableWorkerReplicas = oldStatus.AvailableWorkerReplicas + 1
15701576
assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))
15711577

1572-
// Case 4: `DesiredWorkerReplicas` is different => return true
1578+
// Case 5: `DesiredWorkerReplicas` is different => return true
15731579
newStatus = oldStatus.DeepCopy()
15741580
newStatus.DesiredWorkerReplicas = oldStatus.DesiredWorkerReplicas + 1
15751581
assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))
15761582

1577-
// Case 5: `MinWorkerReplicas` is different => return true
1583+
// Case 6: `MinWorkerReplicas` is different => return true
15781584
newStatus = oldStatus.DeepCopy()
15791585
newStatus.MinWorkerReplicas = oldStatus.MinWorkerReplicas + 1
15801586
assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))
15811587

1582-
// Case 6: `MaxWorkerReplicas` is different => return true
1588+
// Case 7: `MaxWorkerReplicas` is different => return true
15831589
newStatus = oldStatus.DeepCopy()
15841590
newStatus.MaxWorkerReplicas = oldStatus.MaxWorkerReplicas + 1
15851591
assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))
15861592

1587-
// Case 7: `Endpoints` is different => return true
1593+
// Case 8: `Endpoints` is different => return true
15881594
newStatus = oldStatus.DeepCopy()
15891595
newStatus.Endpoints["fakeEndpoint"] = "10009"
15901596
assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))
15911597

1592-
// Case 8: `Head` is different => return true
1598+
// Case 9: `Head` is different => return true
15931599
newStatus = oldStatus.DeepCopy()
15941600
newStatus.Head.PodIP = "test head pod ip"
15951601
assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))
15961602

1597-
// Case 9: `LastUpdateTime` is different => return false
1603+
// Case 10: `LastUpdateTime` is different => return false
15981604
newStatus = oldStatus.DeepCopy()
15991605
newStatus.LastUpdateTime = &metav1.Time{Time: timeNow.Add(time.Hour)}
16001606
assert.False(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))
16011607

1602-
// Case 10: `ObservedGeneration` is different => return false
1608+
// Case 11: `ObservedGeneration` is different => return false
16031609
newStatus = oldStatus.DeepCopy()
16041610
newStatus.ObservedGeneration = oldStatus.ObservedGeneration + 1
16051611
assert.False(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))

0 commit comments

Comments
 (0)