Skip to content

Commit 6cdc87d

Browse files
[Chore][Log] Delete error loggings right before returned errors
Signed-off-by: Chi-Sheng Liu <[email protected]>
1 parent 4836d01 commit 6cdc87d

File tree

4 files changed

+1
-46
lines changed

4 files changed

+1
-46
lines changed

ray-operator/controllers/ray/raycluster_controller.go

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,6 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request
216216
"finalizer", utils.GCSFaultToleranceRedisCleanupFinalizer)
217217
controllerutil.AddFinalizer(instance, utils.GCSFaultToleranceRedisCleanupFinalizer)
218218
if err := r.Update(ctx, instance); err != nil {
219-
logger.Error(err, fmt.Sprintf("Failed to add the finalizer %s to the RayCluster.", utils.GCSFaultToleranceRedisCleanupFinalizer))
220219
return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err
221220
}
222221
// Only start the RayCluster reconciliation after the finalizer is added.
@@ -287,7 +286,6 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request
287286
logger.Info(fmt.Sprintf("Redis cleanup Job already exists. Requeue the RayCluster CR %s.", instance.Name))
288287
return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, nil
289288
}
290-
logger.Error(err, "Failed to create Redis cleanup Job")
291289
return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err
292290
}
293291
logger.Info("Successfully created Redis cleanup Job", "Job name", redisCleanupJob.Name)
@@ -439,7 +437,6 @@ func (r *RayClusterReconciler) reconcileRouteOpenShift(ctx context.Context, inst
439437
headRoutes := routev1.RouteList{}
440438
filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name}
441439
if err := r.List(ctx, &headRoutes, client.InNamespace(instance.Namespace), filterLabels); err != nil {
442-
logger.Error(err, "Route Listing error!", "Route.Error", err)
443440
return err
444441
}
445442

@@ -451,7 +448,6 @@ func (r *RayClusterReconciler) reconcileRouteOpenShift(ctx context.Context, inst
451448
if headRoutes.Items == nil || len(headRoutes.Items) == 0 {
452449
route, err := common.BuildRouteForHeadService(*instance)
453450
if err != nil {
454-
logger.Error(err, "Failed building route!", "Route.Error", err)
455451
return err
456452
}
457453

@@ -461,7 +457,6 @@ func (r *RayClusterReconciler) reconcileRouteOpenShift(ctx context.Context, inst
461457

462458
err = r.createHeadRoute(ctx, route, instance)
463459
if err != nil {
464-
logger.Error(err, "Failed creating route!", "Route.Error", err)
465460
return err
466461
}
467462
}
@@ -925,7 +920,6 @@ func (r *RayClusterReconciler) createHeadIngress(ctx context.Context, ingress *n
925920
logger.Info("Ingress already exists, no need to create")
926921
return nil
927922
}
928-
logger.Error(err, "Ingress create error!", "Ingress.Error", err)
929923
return err
930924
}
931925
logger.Info("Ingress created successfully", "ingress name", ingress.Name)
@@ -944,7 +938,6 @@ func (r *RayClusterReconciler) createHeadRoute(ctx context.Context, route *route
944938
logger.Info("Route already exists, no need to create")
945939
return nil
946940
}
947-
logger.Error(err, "Route create error!", "Route.Error", err)
948941
return err
949942
}
950943
logger.Info("Route created successfully", "route name", route.Name)
@@ -967,7 +960,6 @@ func (r *RayClusterReconciler) createService(ctx context.Context, raySvc *corev1
967960
logger.Info("Pod service already exist, no need to create")
968961
return nil
969962
}
970-
logger.Error(err, "Pod Service create error!", "Pod.Service.Error", err)
971963
return err
972964
}
973965
logger.Info("Pod Service created successfully", "service name", raySvc.Name)
@@ -999,7 +991,6 @@ func (r *RayClusterReconciler) createHeadPod(ctx context.Context, instance rayv1
999991
// the pod might be in terminating state, we need to check
1000992
if errPod := r.Get(ctx, podIdentifier, &fetchedPod); errPod == nil {
1001993
if fetchedPod.DeletionTimestamp != nil {
1002-
logger.Error(errPod, "create pod error!", "pod is in a terminating state, we will wait until it is cleaned up", podIdentifier)
1003994
return err
1004995
}
1005996
}
@@ -1036,13 +1027,11 @@ func (r *RayClusterReconciler) createWorkerPod(ctx context.Context, instance ray
10361027
// the pod might be in terminating state, we need to check
10371028
if errPod := r.Get(ctx, podIdentifier, &fetchedPod); errPod == nil {
10381029
if fetchedPod.DeletionTimestamp != nil {
1039-
logger.Error(errPod, "create pod error!", "pod is in a terminating state, we will wait until it is cleaned up", podIdentifier)
10401030
return err
10411031
}
10421032
}
10431033
logger.Info("Creating pod", "Pod already exists", pod.Name)
10441034
} else {
1045-
logger.Error(fmt.Errorf("createWorkerPod error"), "error creating pod", "pod", pod, "err = ", err)
10461035
return err
10471036
}
10481037
}
@@ -1262,7 +1251,6 @@ func (r *RayClusterReconciler) getHeadPodIP(ctx context.Context, instance *rayv1
12621251
runtimePods := corev1.PodList{}
12631252
filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name, utils.RayNodeTypeLabelKey: string(rayv1.HeadNode)}
12641253
if err := r.List(ctx, &runtimePods, client.InNamespace(instance.Namespace), filterLabels); err != nil {
1265-
logger.Error(err, "Failed to list pods while getting head pod ip.")
12661254
return "", err
12671255
}
12681256
if len(runtimePods.Items) != 1 {
@@ -1392,7 +1380,6 @@ func (r *RayClusterReconciler) reconcileAutoscalerServiceAccount(ctx context.Con
13921380
logger.Info("Pod service account already exist, no need to create")
13931381
return nil
13941382
}
1395-
logger.Error(err, "Pod Service Account create error!", "Pod.ServiceAccount.Error", err)
13961383
return err
13971384
}
13981385
logger.Info("Pod ServiceAccount created successfully", "service account name", serviceAccount.Name)
@@ -1434,7 +1421,6 @@ func (r *RayClusterReconciler) reconcileAutoscalerRole(ctx context.Context, inst
14341421
logger.Info("role already exist, no need to create")
14351422
return nil
14361423
}
1437-
logger.Error(err, "Role create error!", "Role.Error", err)
14381424
return err
14391425
}
14401426
logger.Info("Role created successfully", "role name", role.Name)
@@ -1476,7 +1462,6 @@ func (r *RayClusterReconciler) reconcileAutoscalerRoleBinding(ctx context.Contex
14761462
logger.Info("role binding already exist, no need to create")
14771463
return nil
14781464
}
1479-
logger.Error(err, "Role binding create error!", "RoleBinding.Error", err)
14801465
return err
14811466
}
14821467
logger.Info("RoleBinding created successfully", "role binding name", roleBinding.Name)

ray-operator/controllers/ray/rayjob_controller.go

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -357,14 +357,10 @@ func (r *RayJobReconciler) createK8sJobIfNeed(ctx context.Context, rayJobInstanc
357357
if errors.IsNotFound(err) {
358358
submitterTemplate, err := r.getSubmitterTemplate(ctx, rayJobInstance, rayClusterInstance)
359359
if err != nil {
360-
logger.Error(err, "failed to get submitter template")
361360
return err
362361
}
363362
return r.createNewK8sJob(ctx, rayJobInstance, submitterTemplate)
364363
}
365-
366-
// Some other error occurred while trying to get the Job
367-
logger.Error(err, "failed to get Kubernetes Job")
368364
return err
369365
}
370366

@@ -444,13 +440,11 @@ func (r *RayJobReconciler) createNewK8sJob(ctx context.Context, rayJobInstance *
444440

445441
// Set the ownership in order to do the garbage collection by k8s.
446442
if err := ctrl.SetControllerReference(rayJobInstance, job, r.Scheme); err != nil {
447-
logger.Error(err, "failed to set controller reference")
448443
return err
449444
}
450445

451446
// Create the Kubernetes Job
452447
if err := r.Client.Create(ctx, job); err != nil {
453-
logger.Error(err, "failed to create k8s Job")
454448
return err
455449
}
456450
logger.Info("Kubernetes Job created", "RayJob", rayJobInstance.Name, "Kubernetes Job", job.Name)
@@ -476,7 +470,6 @@ func (r *RayJobReconciler) deleteSubmitterJob(ctx context.Context, rayJobInstanc
476470
isJobDeleted = true
477471
logger.Info("The submitter Kubernetes Job has been already deleted", "RayJob", rayJobInstance.Name, "Kubernetes Job", job.Name)
478472
} else {
479-
logger.Error(err, "Failed to get Kubernetes Job")
480473
return false, err
481474
}
482475
} else {
@@ -629,16 +622,13 @@ func (r *RayJobReconciler) getOrCreateRayClusterInstance(ctx context.Context, ra
629622
logger.Info("RayCluster not found, creating RayCluster!", "RayCluster", rayClusterNamespacedName)
630623
rayClusterInstance, err = r.constructRayClusterForRayJob(rayJobInstance, rayClusterNamespacedName.Name)
631624
if err != nil {
632-
logger.Error(err, "unable to construct a new RayCluster")
633625
return nil, err
634626
}
635627
if err := r.Create(ctx, rayClusterInstance); err != nil {
636-
logger.Error(err, "unable to create RayCluster for RayJob", "RayCluster", rayClusterInstance)
637628
return nil, err
638629
}
639630
r.Recorder.Eventf(rayJobInstance, corev1.EventTypeNormal, "Created", "Created RayCluster %s", rayJobInstance.Status.RayClusterName)
640631
} else {
641-
logger.Error(err, "Fail to get RayCluster!")
642632
return nil, err
643633
}
644634
}

ray-operator/controllers/ray/rayservice_controller.go

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -230,10 +230,8 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque
230230
}
231231

232232
func (r *RayServiceReconciler) calculateStatus(ctx context.Context, rayServiceInstance *rayv1.RayService) error {
233-
logger := ctrl.LoggerFrom(ctx)
234233
serveEndPoints := &corev1.Endpoints{}
235234
if err := r.Get(ctx, common.RayServiceServeServiceNamespacedName(rayServiceInstance), serveEndPoints); err != nil && !errors.IsNotFound(err) {
236-
logger.Error(err, "Fail to retrieve the Kubernetes Endpoints from the cluster!")
237235
return err
238236
}
239237

@@ -438,7 +436,6 @@ func (r *RayServiceReconciler) cleanUpRayClusterInstance(ctx context.Context, ra
438436

439437
var err error
440438
if err = r.List(ctx, &rayClusterList, common.RayServiceRayClustersAssociationOptions(rayServiceInstance).ToListOptions()...); err != nil {
441-
logger.Error(err, "Fail to list RayCluster for "+rayServiceInstance.Name)
442439
return err
443440
}
444441

@@ -463,7 +460,6 @@ func (r *RayServiceReconciler) cleanUpRayClusterInstance(ctx context.Context, ra
463460
if reasonForDeletion != "" {
464461
logger.Info("reconcileRayCluster", "delete Ray cluster", rayClusterInstance.Name, "reason", reasonForDeletion)
465462
if err := r.Delete(ctx, &rayClusterInstance, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil {
466-
logger.Error(err, "Fail to delete RayCluster "+rayClusterInstance.Name)
467463
return err
468464
}
469465
}
@@ -475,12 +471,10 @@ func (r *RayServiceReconciler) cleanUpRayClusterInstance(ctx context.Context, ra
475471
}
476472

477473
func (r *RayServiceReconciler) getRayClusterByNamespacedName(ctx context.Context, clusterKey client.ObjectKey) (*rayv1.RayCluster, error) {
478-
logger := ctrl.LoggerFrom(ctx)
479474
rayCluster := &rayv1.RayCluster{}
480475
if clusterKey.Name != "" {
481476
// Ignore not found since in that case we should return RayCluster as nil.
482477
if err := r.Get(ctx, clusterKey, rayCluster); client.IgnoreNotFound(err) != nil {
483-
logger.Error(err, "Fail to get RayCluster "+clusterKey.String())
484478
return nil, err
485479
}
486480
} else {
@@ -603,7 +597,6 @@ func (r *RayServiceReconciler) createRayClusterInstanceIfNeeded(ctx context.Cont
603597
} else {
604598
clusterAction, err = getClusterAction(pendingRayCluster.Spec, rayServiceInstance.Spec.RayClusterSpec)
605599
if err != nil {
606-
logger.Error(err, "Fail to generate hash for RayClusterSpec")
607600
return nil, err
608601
}
609602
}
@@ -640,7 +633,6 @@ func (r *RayServiceReconciler) updateRayClusterInstance(ctx context.Context, ray
640633
Name: rayClusterInstance.Name,
641634
})
642635
if err != nil {
643-
logger.Error(err, "Failed to get the current state of RayCluster", "Namespace", rayClusterInstance.Namespace, "Name", rayClusterInstance.Name)
644636
return err
645637
}
646638

@@ -658,7 +650,6 @@ func (r *RayServiceReconciler) updateRayClusterInstance(ctx context.Context, ray
658650

659651
// Update the RayCluster
660652
if err = r.Update(ctx, currentRayCluster); err != nil {
661-
logger.Error(err, "Fail to update RayCluster "+currentRayCluster.Name)
662653
return err
663654
}
664655

@@ -692,19 +683,16 @@ func (r *RayServiceReconciler) createRayClusterInstance(ctx context.Context, ray
692683
}
693684
// if error is `not found`, then continue.
694685
} else if !errors.IsNotFound(err) {
695-
logger.Error(err, "Get request rayCluster instance error!")
696686
return nil, err
697687
// if error is `not found`, then continue.
698688
}
699689

700690
logger.Info("No pending RayCluster, creating RayCluster.")
701691
rayClusterInstance, err = r.constructRayClusterForRayService(ctx, rayServiceInstance, rayClusterKey.Name)
702692
if err != nil {
703-
logger.Error(err, "unable to construct rayCluster from spec")
704693
return nil, err
705694
}
706695
if err = r.Create(ctx, rayClusterInstance); err != nil {
707-
logger.Error(err, "unable to create rayCluster for rayService", "rayCluster", rayClusterInstance)
708696
return nil, err
709697
}
710698
logger.Info("created rayCluster for rayService", "rayCluster", rayClusterInstance)
@@ -991,7 +979,6 @@ func (r *RayServiceReconciler) reconcileServices(ctx context.Context, rayService
991979
oldSvc.Spec = *newSvc.Spec.DeepCopy()
992980
logger.Info(fmt.Sprintf("Update Kubernetes Service serviceType %v", serviceType))
993981
if updateErr := r.Update(ctx, oldSvc); updateErr != nil {
994-
logger.Error(updateErr, fmt.Sprintf("Fail to update Kubernetes Service serviceType %v", serviceType), "Error", updateErr)
995982
return updateErr
996983
}
997984
} else if errors.IsNotFound(err) {
@@ -1004,11 +991,9 @@ func (r *RayServiceReconciler) reconcileServices(ctx context.Context, rayService
1004991
logger.Info("The Kubernetes Service already exists, no need to create.")
1005992
return nil
1006993
}
1007-
logger.Error(createErr, fmt.Sprintf("Fail to create Kubernetes Service serviceType %v", serviceType), "Error", createErr)
1008994
return createErr
1009995
}
1010996
} else {
1011-
logger.Error(err, "Fail to retrieve the Kubernetes Service from the cluster!")
1012997
return err
1013998
}
1014999

@@ -1120,7 +1105,6 @@ func (r *RayServiceReconciler) reconcileServe(ctx context.Context, rayServiceIns
11201105
}
11211106

11221107
func (r *RayServiceReconciler) labelHeadPodForServeStatus(ctx context.Context, rayClusterInstance *rayv1.RayCluster) error {
1123-
logger := ctrl.LoggerFrom(ctx)
11241108
headPod, err := r.getHeadPod(ctx, rayClusterInstance)
11251109
if err != nil {
11261110
return err
@@ -1151,7 +1135,6 @@ func (r *RayServiceReconciler) labelHeadPodForServeStatus(ctx context.Context, r
11511135

11521136
if !reflect.DeepEqual(originalLabels, headPod.Labels) {
11531137
if updateErr := r.Update(ctx, headPod); updateErr != nil {
1154-
logger.Error(updateErr, "Pod label Update error!", "Pod.Error", updateErr)
11551138
return updateErr
11561139
}
11571140
}

ray-operator/controllers/ray/utils/httpproxy_httpclient.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,15 @@ func (r *RayHttpProxyClient) SetHostIp(hostIp, podNamespace, podName string, por
5050

5151
// CheckProxyActorHealth checks the health status of the Ray Serve proxy actor.
5252
func (r *RayHttpProxyClient) CheckProxyActorHealth(ctx context.Context) error {
53-
logger := ctrl.LoggerFrom(ctx)
5453
resp, err := r.client.Get(r.httpProxyURL + RayServeProxyHealthPath)
5554
if err != nil {
56-
logger.Error(err, "CheckProxyActorHealth fails.")
5755
return err
5856
}
5957
defer resp.Body.Close()
6058

6159
body, _ := io.ReadAll(resp.Body)
6260
if resp.StatusCode != 200 {
63-
err := fmt.Errorf("CheckProxyActorHealth fails: Status code is not 200")
64-
logger.Error(err, "CheckProxyActorHealth fails.", "status code", resp.StatusCode, "status", resp.Status, "body", string(body))
61+
err := fmt.Errorf("CheckProxyActorHealth fails. status code: %d, status: %s, body: %s", resp.StatusCode, resp.Status, string(body))
6562
return err
6663
}
6764

0 commit comments

Comments
 (0)