Skip to content

Commit

Permalink
[RayJob][Status][7/n] Define JobDeploymentStatusNew explicitly (#1772)
Browse files Browse the repository at this point in the history
  • Loading branch information
kevin85421 authored Dec 27, 2023
1 parent d723f50 commit 59503c6
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
1 change: 1 addition & 0 deletions ray-operator/apis/ray/v1/rayjob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ func IsJobTerminal(status JobStatus) bool {
type JobDeploymentStatus string

const (
JobDeploymentStatusNew JobDeploymentStatus = ""
JobDeploymentStatusInitializing JobDeploymentStatus = "Initializing"
JobDeploymentStatusRunning JobDeploymentStatus = "Running"
JobDeploymentStatusComplete JobDeploymentStatus = "Complete"
Expand Down
14 changes: 8 additions & 6 deletions ray-operator/controllers/ray/rayjob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,14 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)

r.Log.Info("RayJob", "name", rayJobInstance.Name, "namespace", rayJobInstance.Namespace, "JobStatus", rayJobInstance.Status.JobStatus, "JobDeploymentStatus", rayJobInstance.Status.JobDeploymentStatus)
switch rayJobInstance.Status.JobDeploymentStatus {
case rayv1.JobDeploymentStatusNew:
// Set `Status.JobDeploymentStatus` to `JobDeploymentStatusInitializing`, and initialize `Status.JobId`
// and `Status.RayClusterName` prior to avoid duplicate job submissions and cluster creations.
r.Log.Info("JobDeploymentStatusNew", "RayJob", rayJobInstance.Name)
if err = r.initRayJobStatusIfNeed(ctx, rayJobInstance); err != nil {
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
}
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, nil
case rayv1.JobDeploymentStatusComplete:
// If this RayJob uses an existing RayCluster (i.e., ClusterSelector is set), we should not delete the RayCluster.
r.Log.Info("JobDeploymentStatusComplete", "RayJob", rayJobInstance.Name, "ShutdownAfterJobFinishes", rayJobInstance.Spec.ShutdownAfterJobFinishes, "ClusterSelector", rayJobInstance.Spec.ClusterSelector)
Expand Down Expand Up @@ -164,12 +172,6 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, nil
}

// Set `Status.JobDeploymentStatus` to `JobDeploymentStatusInitializing`, and initialize `Status.JobId`
// and `Status.RayClusterName` prior to avoid duplicate job submissions and cluster creations.
if err = r.initRayJobStatusIfNeed(ctx, rayJobInstance); err != nil {
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
}

var rayClusterInstance *rayv1.RayCluster
if rayClusterInstance, err = r.getOrCreateRayClusterInstance(ctx, rayJobInstance); err != nil {
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
Expand Down

0 comments on commit 59503c6

Please sign in to comment.