Skip to content

Commit 542abfa

Browse files
[Chore][Log] Delete error loggings right before returned errors
Signed-off-by: Chi-Sheng Liu <[email protected]>
1 parent 4836d01 commit 542abfa

File tree

4 files changed

+1
-65
lines changed

4 files changed

+1
-65
lines changed

ray-operator/controllers/ray/raycluster_controller.go

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,6 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request
216216
"finalizer", utils.GCSFaultToleranceRedisCleanupFinalizer)
217217
controllerutil.AddFinalizer(instance, utils.GCSFaultToleranceRedisCleanupFinalizer)
218218
if err := r.Update(ctx, instance); err != nil {
219-
logger.Error(err, fmt.Sprintf("Failed to add the finalizer %s to the RayCluster.", utils.GCSFaultToleranceRedisCleanupFinalizer))
220219
return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err
221220
}
222221
// Only start the RayCluster reconciliation after the finalizer is added.
@@ -287,7 +286,6 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request
287286
logger.Info(fmt.Sprintf("Redis cleanup Job already exists. Requeue the RayCluster CR %s.", instance.Name))
288287
return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, nil
289288
}
290-
logger.Error(err, "Failed to create Redis cleanup Job")
291289
return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err
292290
}
293291
logger.Info("Successfully created Redis cleanup Job", "Job name", redisCleanupJob.Name)
@@ -439,7 +437,6 @@ func (r *RayClusterReconciler) reconcileRouteOpenShift(ctx context.Context, inst
439437
headRoutes := routev1.RouteList{}
440438
filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name}
441439
if err := r.List(ctx, &headRoutes, client.InNamespace(instance.Namespace), filterLabels); err != nil {
442-
logger.Error(err, "Route Listing error!", "Route.Error", err)
443440
return err
444441
}
445442

@@ -451,7 +448,6 @@ func (r *RayClusterReconciler) reconcileRouteOpenShift(ctx context.Context, inst
451448
if headRoutes.Items == nil || len(headRoutes.Items) == 0 {
452449
route, err := common.BuildRouteForHeadService(*instance)
453450
if err != nil {
454-
logger.Error(err, "Failed building route!", "Route.Error", err)
455451
return err
456452
}
457453

@@ -461,7 +457,6 @@ func (r *RayClusterReconciler) reconcileRouteOpenShift(ctx context.Context, inst
461457

462458
err = r.createHeadRoute(ctx, route, instance)
463459
if err != nil {
464-
logger.Error(err, "Failed creating route!", "Route.Error", err)
465460
return err
466461
}
467462
}
@@ -925,7 +920,6 @@ func (r *RayClusterReconciler) createHeadIngress(ctx context.Context, ingress *n
925920
logger.Info("Ingress already exists, no need to create")
926921
return nil
927922
}
928-
logger.Error(err, "Ingress create error!", "Ingress.Error", err)
929923
return err
930924
}
931925
logger.Info("Ingress created successfully", "ingress name", ingress.Name)
@@ -944,7 +938,6 @@ func (r *RayClusterReconciler) createHeadRoute(ctx context.Context, route *route
944938
logger.Info("Route already exists, no need to create")
945939
return nil
946940
}
947-
logger.Error(err, "Route create error!", "Route.Error", err)
948941
return err
949942
}
950943
logger.Info("Route created successfully", "route name", route.Name)
@@ -967,7 +960,6 @@ func (r *RayClusterReconciler) createService(ctx context.Context, raySvc *corev1
967960
logger.Info("Pod service already exist, no need to create")
968961
return nil
969962
}
970-
logger.Error(err, "Pod Service create error!", "Pod.Service.Error", err)
971963
return err
972964
}
973965
logger.Info("Pod Service created successfully", "service name", raySvc.Name)
@@ -999,7 +991,6 @@ func (r *RayClusterReconciler) createHeadPod(ctx context.Context, instance rayv1
999991
// the pod might be in terminating state, we need to check
1000992
if errPod := r.Get(ctx, podIdentifier, &fetchedPod); errPod == nil {
1001993
if fetchedPod.DeletionTimestamp != nil {
1002-
logger.Error(errPod, "create pod error!", "pod is in a terminating state, we will wait until it is cleaned up", podIdentifier)
1003994
return err
1004995
}
1005996
}
@@ -1036,13 +1027,11 @@ func (r *RayClusterReconciler) createWorkerPod(ctx context.Context, instance ray
10361027
// the pod might be in terminating state, we need to check
10371028
if errPod := r.Get(ctx, podIdentifier, &fetchedPod); errPod == nil {
10381029
if fetchedPod.DeletionTimestamp != nil {
1039-
logger.Error(errPod, "create pod error!", "pod is in a terminating state, we will wait until it is cleaned up", podIdentifier)
10401030
return err
10411031
}
10421032
}
10431033
logger.Info("Creating pod", "Pod already exists", pod.Name)
10441034
} else {
1045-
logger.Error(fmt.Errorf("createWorkerPod error"), "error creating pod", "pod", pod, "err = ", err)
10461035
return err
10471036
}
10481037
}
@@ -1262,7 +1251,6 @@ func (r *RayClusterReconciler) getHeadPodIP(ctx context.Context, instance *rayv1
12621251
runtimePods := corev1.PodList{}
12631252
filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name, utils.RayNodeTypeLabelKey: string(rayv1.HeadNode)}
12641253
if err := r.List(ctx, &runtimePods, client.InNamespace(instance.Namespace), filterLabels); err != nil {
1265-
logger.Error(err, "Failed to list pods while getting head pod ip.")
12661254
return "", err
12671255
}
12681256
if len(runtimePods.Items) != 1 {
@@ -1366,10 +1354,6 @@ func (r *RayClusterReconciler) reconcileAutoscalerServiceAccount(ctx context.Con
13661354
// zero-downtime rolling updates when RayService is performed. See https://github.com/ray-project/kuberay/issues/1123
13671355
// for more details.
13681356
if instance.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName == namespacedName.Name {
1369-
logger.Error(err, fmt.Sprintf(
1370-
"If users specify ServiceAccountName for the head Pod, they need to create a ServiceAccount themselves. "+
1371-
"However, ServiceAccount %s is not found. Please create one. "+
1372-
"See the PR description of https://github.com/ray-project/kuberay/pull/1128 for more details.", namespacedName.Name), "ServiceAccount", namespacedName)
13731357
return err
13741358
}
13751359

@@ -1392,7 +1376,6 @@ func (r *RayClusterReconciler) reconcileAutoscalerServiceAccount(ctx context.Con
13921376
logger.Info("Pod service account already exist, no need to create")
13931377
return nil
13941378
}
1395-
logger.Error(err, "Pod Service Account create error!", "Pod.ServiceAccount.Error", err)
13961379
return err
13971380
}
13981381
logger.Info("Pod ServiceAccount created successfully", "service account name", serviceAccount.Name)
@@ -1434,7 +1417,6 @@ func (r *RayClusterReconciler) reconcileAutoscalerRole(ctx context.Context, inst
14341417
logger.Info("role already exist, no need to create")
14351418
return nil
14361419
}
1437-
logger.Error(err, "Role create error!", "Role.Error", err)
14381420
return err
14391421
}
14401422
logger.Info("Role created successfully", "role name", role.Name)
@@ -1476,7 +1458,6 @@ func (r *RayClusterReconciler) reconcileAutoscalerRoleBinding(ctx context.Contex
14761458
logger.Info("role binding already exist, no need to create")
14771459
return nil
14781460
}
1479-
logger.Error(err, "Role binding create error!", "RoleBinding.Error", err)
14801461
return err
14811462
}
14821463
logger.Info("RoleBinding created successfully", "role binding name", roleBinding.Name)

ray-operator/controllers/ray/rayjob_controller.go

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,6 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)
8484
logger.Info("RayJob resource not found. Ignoring since object must be deleted", "name", request.NamespacedName)
8585
return ctrl.Result{}, nil
8686
}
87-
// Error reading the object - requeue the request.
88-
logger.Error(err, "Failed to get RayJob")
8987
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
9088
}
9189

@@ -115,14 +113,12 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)
115113
controllerutil.RemoveFinalizer(rayJobInstance, utils.RayJobStopJobFinalizer)
116114
err := r.Update(ctx, rayJobInstance)
117115
if err != nil {
118-
logger.Error(err, "Failed to remove finalizer for RayJob")
119116
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
120117
}
121118
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
122119
}
123120

124121
if err := validateRayJobSpec(rayJobInstance); err != nil {
125-
logger.Error(err, "The RayJob spec is invalid")
126122
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
127123
}
128124

@@ -136,7 +132,6 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)
136132
logger.Info("Add a finalizer", "finalizer", utils.RayJobStopJobFinalizer)
137133
controllerutil.AddFinalizer(rayJobInstance, utils.RayJobStopJobFinalizer)
138134
if err := r.Update(ctx, rayJobInstance); err != nil {
139-
logger.Error(err, "Failed to update RayJob with finalizer")
140135
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
141136
}
142137
}
@@ -168,7 +163,6 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)
168163
}
169164

170165
if clientURL, err = utils.FetchHeadServiceURL(ctx, r.Client, rayClusterInstance, utils.DashboardPortName); err != nil || clientURL == "" {
171-
logger.Error(err, "Failed to get the dashboard URL after the RayCluster is ready!", "RayCluster", rayClusterInstance.Name)
172166
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
173167
}
174168
rayJobInstance.Status.DashboardURL = clientURL
@@ -201,7 +195,6 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)
201195
// mode is not stuck in the `Running` status indefinitely.
202196
namespacedName := common.RayJobK8sJobNamespacedName(rayJobInstance)
203197
if err := r.Client.Get(ctx, namespacedName, job); err != nil {
204-
logger.Error(err, "Failed to get the submitter Kubernetes Job", "NamespacedName", namespacedName)
205198
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
206199
}
207200
if shouldUpdate := r.checkK8sJobAndUpdateStatusIfNeeded(ctx, rayJobInstance, job); shouldUpdate {
@@ -228,12 +221,10 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)
228221
if rayJobInstance.Spec.SubmissionMode == rayv1.HTTPMode && errors.IsBadRequest(err) {
229222
logger.Info("The Ray job was not found. Submit a Ray job via an HTTP request.", "JobId", rayJobInstance.Status.JobId)
230223
if _, err := rayDashboardClient.SubmitJob(ctx, rayJobInstance); err != nil {
231-
logger.Error(err, "Failed to submit the Ray job", "JobId", rayJobInstance.Status.JobId)
232224
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
233225
}
234226
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, nil
235227
}
236-
logger.Error(err, "Failed to get job info", "JobId", rayJobInstance.Status.JobId)
237228
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
238229
}
239230
logger.Info("GetJobInfo", "Job Info", jobInfo)
@@ -357,14 +348,11 @@ func (r *RayJobReconciler) createK8sJobIfNeed(ctx context.Context, rayJobInstanc
357348
if errors.IsNotFound(err) {
358349
submitterTemplate, err := r.getSubmitterTemplate(ctx, rayJobInstance, rayClusterInstance)
359350
if err != nil {
360-
logger.Error(err, "failed to get submitter template")
361351
return err
362352
}
363353
return r.createNewK8sJob(ctx, rayJobInstance, submitterTemplate)
364354
}
365355

366-
// Some other error occurred while trying to get the Job
367-
logger.Error(err, "failed to get Kubernetes Job")
368356
return err
369357
}
370358

@@ -444,13 +432,11 @@ func (r *RayJobReconciler) createNewK8sJob(ctx context.Context, rayJobInstance *
444432

445433
// Set the ownership in order to do the garbage collection by k8s.
446434
if err := ctrl.SetControllerReference(rayJobInstance, job, r.Scheme); err != nil {
447-
logger.Error(err, "failed to set controller reference")
448435
return err
449436
}
450437

451438
// Create the Kubernetes Job
452439
if err := r.Client.Create(ctx, job); err != nil {
453-
logger.Error(err, "failed to create k8s Job")
454440
return err
455441
}
456442
logger.Info("Kubernetes Job created", "RayJob", rayJobInstance.Name, "Kubernetes Job", job.Name)
@@ -476,7 +462,6 @@ func (r *RayJobReconciler) deleteSubmitterJob(ctx context.Context, rayJobInstanc
476462
isJobDeleted = true
477463
logger.Info("The submitter Kubernetes Job has been already deleted", "RayJob", rayJobInstance.Name, "Kubernetes Job", job.Name)
478464
} else {
479-
logger.Error(err, "Failed to get Kubernetes Job")
480465
return false, err
481466
}
482467
} else {
@@ -629,16 +614,13 @@ func (r *RayJobReconciler) getOrCreateRayClusterInstance(ctx context.Context, ra
629614
logger.Info("RayCluster not found, creating RayCluster!", "RayCluster", rayClusterNamespacedName)
630615
rayClusterInstance, err = r.constructRayClusterForRayJob(rayJobInstance, rayClusterNamespacedName.Name)
631616
if err != nil {
632-
logger.Error(err, "unable to construct a new RayCluster")
633617
return nil, err
634618
}
635619
if err := r.Create(ctx, rayClusterInstance); err != nil {
636-
logger.Error(err, "unable to create RayCluster for RayJob", "RayCluster", rayClusterInstance)
637620
return nil, err
638621
}
639622
r.Recorder.Eventf(rayJobInstance, corev1.EventTypeNormal, "Created", "Created RayCluster %s", rayJobInstance.Status.RayClusterName)
640623
} else {
641-
logger.Error(err, "Fail to get RayCluster!")
642624
return nil, err
643625
}
644626
}

ray-operator/controllers/ray/rayservice_controller.go

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,6 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque
221221
if r.inconsistentRayServiceStatuses(ctx, originalRayServiceInstance.Status, rayServiceInstance.Status) {
222222
rayServiceInstance.Status.LastUpdateTime = &metav1.Time{Time: time.Now()}
223223
if errStatus := r.Status().Update(ctx, rayServiceInstance); errStatus != nil {
224-
logger.Error(errStatus, "Failed to update RayService status", "rayServiceInstance", rayServiceInstance)
225224
return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, errStatus
226225
}
227226
}
@@ -230,10 +229,8 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque
230229
}
231230

232231
func (r *RayServiceReconciler) calculateStatus(ctx context.Context, rayServiceInstance *rayv1.RayService) error {
233-
logger := ctrl.LoggerFrom(ctx)
234232
serveEndPoints := &corev1.Endpoints{}
235233
if err := r.Get(ctx, common.RayServiceServeServiceNamespacedName(rayServiceInstance), serveEndPoints); err != nil && !errors.IsNotFound(err) {
236-
logger.Error(err, "Fail to retrieve the Kubernetes Endpoints from the cluster!")
237234
return err
238235
}
239236

@@ -438,7 +435,6 @@ func (r *RayServiceReconciler) cleanUpRayClusterInstance(ctx context.Context, ra
438435

439436
var err error
440437
if err = r.List(ctx, &rayClusterList, common.RayServiceRayClustersAssociationOptions(rayServiceInstance).ToListOptions()...); err != nil {
441-
logger.Error(err, "Fail to list RayCluster for "+rayServiceInstance.Name)
442438
return err
443439
}
444440

@@ -463,7 +459,6 @@ func (r *RayServiceReconciler) cleanUpRayClusterInstance(ctx context.Context, ra
463459
if reasonForDeletion != "" {
464460
logger.Info("reconcileRayCluster", "delete Ray cluster", rayClusterInstance.Name, "reason", reasonForDeletion)
465461
if err := r.Delete(ctx, &rayClusterInstance, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil {
466-
logger.Error(err, "Fail to delete RayCluster "+rayClusterInstance.Name)
467462
return err
468463
}
469464
}
@@ -475,12 +470,10 @@ func (r *RayServiceReconciler) cleanUpRayClusterInstance(ctx context.Context, ra
475470
}
476471

477472
func (r *RayServiceReconciler) getRayClusterByNamespacedName(ctx context.Context, clusterKey client.ObjectKey) (*rayv1.RayCluster, error) {
478-
logger := ctrl.LoggerFrom(ctx)
479473
rayCluster := &rayv1.RayCluster{}
480474
if clusterKey.Name != "" {
481475
// Ignore not found since in that case we should return RayCluster as nil.
482476
if err := r.Get(ctx, clusterKey, rayCluster); client.IgnoreNotFound(err) != nil {
483-
logger.Error(err, "Fail to get RayCluster "+clusterKey.String())
484477
return nil, err
485478
}
486479
} else {
@@ -603,7 +596,6 @@ func (r *RayServiceReconciler) createRayClusterInstanceIfNeeded(ctx context.Cont
603596
} else {
604597
clusterAction, err = getClusterAction(pendingRayCluster.Spec, rayServiceInstance.Spec.RayClusterSpec)
605598
if err != nil {
606-
logger.Error(err, "Fail to generate hash for RayClusterSpec")
607599
return nil, err
608600
}
609601
}
@@ -640,7 +632,6 @@ func (r *RayServiceReconciler) updateRayClusterInstance(ctx context.Context, ray
640632
Name: rayClusterInstance.Name,
641633
})
642634
if err != nil {
643-
logger.Error(err, "Failed to get the current state of RayCluster", "Namespace", rayClusterInstance.Namespace, "Name", rayClusterInstance.Name)
644635
return err
645636
}
646637

@@ -658,7 +649,6 @@ func (r *RayServiceReconciler) updateRayClusterInstance(ctx context.Context, ray
658649

659650
// Update the RayCluster
660651
if err = r.Update(ctx, currentRayCluster); err != nil {
661-
logger.Error(err, "Fail to update RayCluster "+currentRayCluster.Name)
662652
return err
663653
}
664654

@@ -692,19 +682,16 @@ func (r *RayServiceReconciler) createRayClusterInstance(ctx context.Context, ray
692682
}
693683
// if error is `not found`, then continue.
694684
} else if !errors.IsNotFound(err) {
695-
logger.Error(err, "Get request rayCluster instance error!")
696685
return nil, err
697686
// if error is `not found`, then continue.
698687
}
699688

700689
logger.Info("No pending RayCluster, creating RayCluster.")
701690
rayClusterInstance, err = r.constructRayClusterForRayService(ctx, rayServiceInstance, rayClusterKey.Name)
702691
if err != nil {
703-
logger.Error(err, "unable to construct rayCluster from spec")
704692
return nil, err
705693
}
706694
if err = r.Create(ctx, rayClusterInstance); err != nil {
707-
logger.Error(err, "unable to create rayCluster for rayService", "rayCluster", rayClusterInstance)
708695
return nil, err
709696
}
710697
logger.Info("created rayCluster for rayService", "rayCluster", rayClusterInstance)
@@ -713,8 +700,6 @@ func (r *RayServiceReconciler) createRayClusterInstance(ctx context.Context, ray
713700
}
714701

715702
func (r *RayServiceReconciler) constructRayClusterForRayService(ctx context.Context, rayService *rayv1.RayService, rayClusterName string) (*rayv1.RayCluster, error) {
716-
logger := ctrl.LoggerFrom(ctx)
717-
718703
var err error
719704
rayClusterLabel := make(map[string]string)
720705
for k, v := range rayService.Labels {
@@ -727,12 +712,8 @@ func (r *RayServiceReconciler) constructRayClusterForRayService(ctx context.Cont
727712
for k, v := range rayService.Annotations {
728713
rayClusterAnnotations[k] = v
729714
}
730-
errContext := "Failed to serialize RayCluster config. " +
731-
"Manual config updates will NOT be tracked accurately. " +
732-
"Please tear down the cluster and apply a new config."
733715
rayClusterAnnotations[utils.HashWithoutReplicasAndWorkersToDeleteKey], err = generateHashWithoutReplicasAndWorkersToDelete(rayService.Spec.RayClusterSpec)
734716
if err != nil {
735-
logger.Error(err, errContext)
736717
return nil, err
737718
}
738719
rayClusterAnnotations[utils.NumWorkerGroupsKey] = strconv.Itoa(len(rayService.Spec.RayClusterSpec.WorkerGroupSpecs))
@@ -991,7 +972,6 @@ func (r *RayServiceReconciler) reconcileServices(ctx context.Context, rayService
991972
oldSvc.Spec = *newSvc.Spec.DeepCopy()
992973
logger.Info(fmt.Sprintf("Update Kubernetes Service serviceType %v", serviceType))
993974
if updateErr := r.Update(ctx, oldSvc); updateErr != nil {
994-
logger.Error(updateErr, fmt.Sprintf("Fail to update Kubernetes Service serviceType %v", serviceType), "Error", updateErr)
995975
return updateErr
996976
}
997977
} else if errors.IsNotFound(err) {
@@ -1004,11 +984,9 @@ func (r *RayServiceReconciler) reconcileServices(ctx context.Context, rayService
1004984
logger.Info("The Kubernetes Service already exists, no need to create.")
1005985
return nil
1006986
}
1007-
logger.Error(createErr, fmt.Sprintf("Fail to create Kubernetes Service serviceType %v", serviceType), "Error", createErr)
1008987
return createErr
1009988
}
1010989
} else {
1011-
logger.Error(err, "Fail to retrieve the Kubernetes Service from the cluster!")
1012990
return err
1013991
}
1014992

@@ -1120,7 +1098,6 @@ func (r *RayServiceReconciler) reconcileServe(ctx context.Context, rayServiceIns
11201098
}
11211099

11221100
func (r *RayServiceReconciler) labelHeadPodForServeStatus(ctx context.Context, rayClusterInstance *rayv1.RayCluster) error {
1123-
logger := ctrl.LoggerFrom(ctx)
11241101
headPod, err := r.getHeadPod(ctx, rayClusterInstance)
11251102
if err != nil {
11261103
return err
@@ -1151,7 +1128,6 @@ func (r *RayServiceReconciler) labelHeadPodForServeStatus(ctx context.Context, r
11511128

11521129
if !reflect.DeepEqual(originalLabels, headPod.Labels) {
11531130
if updateErr := r.Update(ctx, headPod); updateErr != nil {
1154-
logger.Error(updateErr, "Pod label Update error!", "Pod.Error", updateErr)
11551131
return updateErr
11561132
}
11571133
}

0 commit comments

Comments
 (0)