diff --git a/internal/conditions/conditions.go b/internal/conditions/conditions.go index b717acc7..325578a5 100644 --- a/internal/conditions/conditions.go +++ b/internal/conditions/conditions.go @@ -62,7 +62,6 @@ const ( ) // Updater is the condition updater - type Updater interface { SetConditionsReady(ctx context.Context, cr client.Object, reason, message string) error SetConditionsNotReady(ctx context.Context, cr client.Object, reason, message string) error @@ -88,6 +87,8 @@ func (u *updater) SetConditionsReady(ctx context.Context, obj client.Object, rea return u.SetConditionsReadyNemoEntitystore(ctx, cr, reason, message) case *appsv1alpha1.NemoCustomizer: return u.SetConditionsReadyNemoCustomizer(ctx, cr, reason, message) + case *appsv1alpha1.NemoDatastore: + return u.SetConditionsReadyNemoDatastore(ctx, cr, reason, message) default: return fmt.Errorf("unknown CRD type for %v", obj) } @@ -144,6 +145,23 @@ func (u *updater) SetConditionsReadyNemoEntitystore(ctx context.Context, cr *app return u.updateNemoEntitystoreStatus(ctx, cr) } +func (u *updater) SetConditionsReadyNemoDatastore(ctx context.Context, cr *appsv1alpha1.NemoDatastore, reason, message string) error { + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: Ready, + Status: metav1.ConditionTrue, + Reason: reason, + Message: message, + }) + + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: Failed, + Status: metav1.ConditionFalse, + Reason: Ready, + }) + cr.Status.State = v1alpha1.NemoDatastoreStatusReady + return u.updateNemoDatastoreStatus(ctx, cr) +} + func (u *updater) SetConditionsReadyNemoCustomizer(ctx context.Context, cr *appsv1alpha1.NemoCustomizer, reason, message string) error { meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ Type: Ready, @@ -169,6 +187,8 @@ func (u *updater) SetConditionsNotReady(ctx context.Context, obj client.Object, return u.SetConditionsNotReadyNemoGuardrail(ctx, cr, reason, message) case *appsv1alpha1.NemoEntitystore: return u.SetConditionsNotReadyNemoEntitystore(ctx, cr, reason, message) + case *appsv1alpha1.NemoDatastore: + return u.SetConditionsNotReadyNemoDatastore(ctx, cr, reason, message) case *appsv1alpha1.NemoCustomizer: return u.SetConditionsNotReadyNemoCustomizer(ctx, cr, reason, message) default: @@ -230,6 +250,24 @@ func (u *updater) SetConditionsNotReadyNemoEntitystore(ctx context.Context, cr * return u.updateNemoEntitystoreStatus(ctx, cr) } +func (u *updater) SetConditionsNotReadyNemoDatastore(ctx context.Context, cr *appsv1alpha1.NemoDatastore, reason, message string) error { + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: Ready, + Status: metav1.ConditionFalse, + Reason: reason, + Message: message, + }) + + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: Failed, + Status: metav1.ConditionFalse, + Reason: Ready, + Message: message, + }) + cr.Status.State = v1alpha1.NemoDatastoreStatusNotReady + return u.updateNemoDatastoreStatus(ctx, cr) +} + func (u *updater) SetConditionsNotReadyNemoCustomizer(ctx context.Context, cr *appsv1alpha1.NemoCustomizer, reason, message string) error { meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ Type: Ready, @@ -256,6 +294,8 @@ func (u *updater) SetConditionsFailed(ctx context.Context, obj client.Object, re return u.SetConditionsFailedNemoGuardrail(ctx, cr, reason, message) case *appsv1alpha1.NemoEntitystore: return u.SetConditionsFailedNemoEntitystore(ctx, cr, reason, message) + case *appsv1alpha1.NemoDatastore: + return u.SetConditionsFailedNemoDatastore(ctx, cr, reason, message) case *appsv1alpha1.NemoCustomizer: return u.SetConditionsFailedNemoCustomizer(ctx, cr, reason, message) default: @@ -314,6 +354,23 @@ func (u *updater) SetConditionsFailedNemoEntitystore(ctx context.Context, cr *ap return u.updateNemoEntitystoreStatus(ctx, cr) } +func (u *updater) SetConditionsFailedNemoDatastore(ctx context.Context, cr *appsv1alpha1.NemoDatastore, reason, message string) error { + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: Ready, + Status: metav1.ConditionFalse, + Reason: Failed, + }) + + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: Failed, + Status: metav1.ConditionTrue, + Reason: reason, + Message: message, + }) + cr.Status.State = v1alpha1.NemoDatastoreStatusFailed + return u.updateNemoDatastoreStatus(ctx, cr) +} + func (u *updater) SetConditionsFailedNemoCustomizer(ctx context.Context, cr *appsv1alpha1.NemoCustomizer, reason, message string) error { meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ Type: Ready, @@ -371,6 +428,19 @@ func (u *updater) updateNemoEntitystoreStatus(ctx context.Context, cr *appsv1alp return nil } +func (u *updater) updateNemoDatastoreStatus(ctx context.Context, cr *appsv1alpha1.NemoDatastore) error { + obj := &appsv1alpha1.NemoDatastore{} + errGet := u.client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.GetNamespace()}, obj) + if errGet != nil { + return errGet + } + obj.Status = cr.Status + if err := u.client.Status().Update(ctx, obj); err != nil { + return err + } + return nil +} + func (u *updater) updateNemoCustomizerStatus(ctx context.Context, cr *appsv1alpha1.NemoCustomizer) error { obj := &appsv1alpha1.NemoCustomizer{} errGet := u.client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.GetNamespace()}, obj) @@ -408,6 +478,7 @@ func UpdateCondition(conditions *[]metav1.Condition, conditionType string, statu // condition updated } +// IfPresentUpdateCondition updates an already existing condition func IfPresentUpdateCondition(conditions *[]metav1.Condition, conditionType string, status metav1.ConditionStatus, reason, message string) { for i := range *conditions { if (*conditions)[i].Type == conditionType { diff --git a/internal/controller/nemo_datastore_controller.go b/internal/controller/nemo_datastore_controller.go index 11f79c2b..65c72507 100644 --- a/internal/controller/nemo_datastore_controller.go +++ b/internal/controller/nemo_datastore_controller.go @@ -26,7 +26,6 @@ import ( "github.com/NVIDIA/k8s-nim-operator/internal/k8sutil" "github.com/NVIDIA/k8s-nim-operator/internal/render" "github.com/NVIDIA/k8s-nim-operator/internal/shared" - "github.com/NVIDIA/k8s-nim-operator/internal/utils" "github.com/go-logr/logr" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" @@ -35,7 +34,6 @@ import ( networkingv1 "k8s.io/api/networking/v1" rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -329,7 +327,7 @@ func (r *NemoDatastoreReconciler) reconcileNemoDatastore(ctx context.Context, ne return ctrl.Result{}, err } } else { - err = r.cleanupResource(ctx, &networkingv1.Ingress{}, namespacedName) + err = k8sutil.CleanupResource(ctx, r.GetClient(), &networkingv1.Ingress{}, namespacedName) if err != nil && !errors.IsNotFound(err) { return ctrl.Result{}, err } @@ -345,7 +343,7 @@ func (r *NemoDatastoreReconciler) reconcileNemoDatastore(ctx context.Context, ne } } else { // If autoscaling is disabled, ensure the HPA is deleted - err = r.cleanupResource(ctx, &autoscalingv2.HorizontalPodAutoscaler{}, namespacedName) + err = k8sutil.CleanupResource(ctx, r.GetClient(), &autoscalingv2.HorizontalPodAutoscaler{}, namespacedName) if err != nil { return ctrl.Result{}, err } @@ -393,19 +391,19 @@ func (r *NemoDatastoreReconciler) reconcileNemoDatastore(ctx context.Context, ne } // Wait for deployment - msg, ready, err := r.isDeploymentReady(ctx, &namespacedName) + msg, ready, err := k8sutil.IsDeploymentReady(ctx, r.GetClient(), &namespacedName) if err != nil { return ctrl.Result{}, err } if !ready { // Update status as NotReady - err = r.SetConditionsNotReady(ctx, nemoDatastore, conditions.NotReady, msg) + err = r.updater.SetConditionsNotReady(ctx, nemoDatastore, conditions.NotReady, msg) r.GetEventRecorder().Eventf(nemoDatastore, corev1.EventTypeNormal, conditions.NotReady, "NemoDatastore %s not ready yet, msg: %s", nemoDatastore.Name, msg) } else { // Update status as ready - err = r.SetConditionsReady(ctx, nemoDatastore, conditions.Ready, msg) + err = r.updater.SetConditionsReady(ctx, nemoDatastore, conditions.Ready, msg) r.GetEventRecorder().Eventf(nemoDatastore, corev1.EventTypeNormal, conditions.Ready, "NemoDatastore %s ready, msg: %s", nemoDatastore.Name, msg) } @@ -494,7 +492,7 @@ func (r *NemoDatastoreReconciler) renderAndSyncResource(ctx context.Context, Nem return err } - err = r.syncResource(ctx, obj, resource, namespacedName) + err = k8sutil.SyncResource(ctx, r.GetClient(), obj, resource, namespacedName) if err != nil { logger.Error(err, "failed to sync", conditionType, namespacedName) statusError := r.updater.SetConditionsFailed(ctx, NemoDatastore, reason, err.Error()) @@ -505,157 +503,3 @@ func (r *NemoDatastoreReconciler) renderAndSyncResource(ctx context.Context, Nem } return nil } - -// CheckDeploymentReadiness checks if the Deployment is ready -func (r *NemoDatastoreReconciler) isDeploymentReady(ctx context.Context, namespacedName *types.NamespacedName) (string, bool, error) { - deployment := &appsv1.Deployment{} - err := r.Get(ctx, client.ObjectKey{Name: namespacedName.Name, Namespace: namespacedName.Namespace}, deployment) - if err != nil { - if errors.IsNotFound(err) { - return "", false, nil - } - return "", false, err - } - - cond := getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing) - if cond != nil && cond.Reason == "ProgressDeadlineExceeded" { - return fmt.Sprintf("deployment %q exceeded its progress deadline", deployment.Name), false, nil - } - if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d out of %d new replicas have been updated...\n", deployment.Name, deployment.Status.UpdatedReplicas, *deployment.Spec.Replicas), false, nil - } - if deployment.Status.Replicas > deployment.Status.UpdatedReplicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d old replicas are pending termination...\n", deployment.Name, deployment.Status.Replicas-deployment.Status.UpdatedReplicas), false, nil - } - if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d of %d updated replicas are available...\n", deployment.Name, deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas), false, nil - } - return fmt.Sprintf("deployment %q successfully rolled out\n", deployment.Name), true, nil -} - -func (r *NemoDatastoreReconciler) syncResource(ctx context.Context, obj client.Object, desired client.Object, namespacedName types.NamespacedName) error { - logger := log.FromContext(ctx) - - err := r.Get(ctx, namespacedName, obj) - if err != nil && !errors.IsNotFound(err) { - return err - } - - if !utils.IsSpecChanged(obj, desired) { - logger.V(2).Info("Object spec has not changed, skipping update", "obj", obj) - return nil - } - logger.V(2).Info("Object spec has changed, updating") - - if errors.IsNotFound(err) { - err = r.Create(ctx, desired) - if err != nil { - return err - } - } else { - err = r.Update(ctx, desired) - if err != nil { - return err - } - } - return nil -} - -// cleanupResource deletes the given Kubernetes resource if it exists. -// If the resource does not exist or an error occurs during deletion, the function returns nil or the error. -// -// Parameters: -// ctx (context.Context): The context for the operation. -// obj (client.Object): The Kubernetes resource to delete. -// namespacedName (types.NamespacedName): The namespaced name of the resource. -// -// Returns: -// error: An error if the resource deletion fails, or nil if the resource is not found or deletion is successful. -func (r *NemoDatastoreReconciler) cleanupResource(ctx context.Context, obj client.Object, namespacedName types.NamespacedName) error { - - logger := log.FromContext(ctx) - - err := r.Get(ctx, namespacedName, obj) - if err != nil && !errors.IsNotFound(err) { - return err - } - - if errors.IsNotFound(err) { - return nil - } - - err = r.Delete(ctx, obj) - if err != nil { - return err - } - logger.V(2).Info("NIM Service object changed, deleting ", "obj", obj) - return nil -} - -func (r *NemoDatastoreReconciler) SetConditionsReady(ctx context.Context, cr *appsv1alpha1.NemoDatastore, reason, message string) error { - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Ready, - Status: metav1.ConditionTrue, - Reason: reason, - Message: message, - }) - - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Failed, - Status: metav1.ConditionFalse, - Reason: conditions.Ready, - }) - - cr.Status.State = appsv1alpha1.NemoDatastoreStatusReady - return r.updateNemoDatastoreStatus(ctx, cr) -} - -func (r *NemoDatastoreReconciler) SetConditionsNotReady(ctx context.Context, cr *appsv1alpha1.NemoDatastore, reason, message string) error { - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Ready, - Status: metav1.ConditionFalse, - Reason: reason, - Message: message, - }) - - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Failed, - Status: metav1.ConditionFalse, - Reason: conditions.Ready, - Message: message, - }) - - cr.Status.State = appsv1alpha1.NemoDatastoreStatusNotReady - return r.updateNemoDatastoreStatus(ctx, cr) -} - -func (r *NemoDatastoreReconciler) SetConditionsFailed(ctx context.Context, cr *appsv1alpha1.NemoDatastore, reason, message string) error { - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Ready, - Status: metav1.ConditionFalse, - Reason: conditions.Failed, - }) - - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Failed, - Status: metav1.ConditionTrue, - Reason: reason, - Message: message, - }) - cr.Status.State = appsv1alpha1.NemoDatastoreStatusFailed - return r.updateNemoDatastoreStatus(ctx, cr) -} - -func (r *NemoDatastoreReconciler) updateNemoDatastoreStatus(ctx context.Context, cr *appsv1alpha1.NemoDatastore) error { - - obj := &appsv1alpha1.NemoDatastore{} - errGet := r.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.GetNamespace()}, obj) - if errGet != nil { - return errGet - } - obj.Status = cr.Status - if err := r.Status().Update(ctx, obj); err != nil { - return err - } - return nil -} diff --git a/internal/controller/nemo_entitystore_controller.go b/internal/controller/nemo_entitystore_controller.go index c0ac155f..beef64db 100644 --- a/internal/controller/nemo_entitystore_controller.go +++ b/internal/controller/nemo_entitystore_controller.go @@ -26,7 +26,6 @@ import ( "github.com/NVIDIA/k8s-nim-operator/internal/k8sutil" "github.com/NVIDIA/k8s-nim-operator/internal/render" "github.com/NVIDIA/k8s-nim-operator/internal/shared" - "github.com/NVIDIA/k8s-nim-operator/internal/utils" "github.com/go-logr/logr" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" @@ -321,7 +320,7 @@ func (r *NemoEntitystoreReconciler) reconcileNemoEntitystore(ctx context.Context return ctrl.Result{}, err } } else { - err = r.cleanupResource(ctx, &networkingv1.Ingress{}, namespacedName) + err = k8sutil.CleanupResource(ctx, r.GetClient(), &networkingv1.Ingress{}, namespacedName) if err != nil && !errors.IsNotFound(err) { return ctrl.Result{}, err } @@ -337,7 +336,7 @@ func (r *NemoEntitystoreReconciler) reconcileNemoEntitystore(ctx context.Context } } else { // If autoscaling is disabled, ensure the HPA is deleted - err = r.cleanupResource(ctx, &autoscalingv2.HorizontalPodAutoscaler{}, namespacedName) + err = k8sutil.CleanupResource(ctx, r.GetClient(), &autoscalingv2.HorizontalPodAutoscaler{}, namespacedName) if err != nil { return ctrl.Result{}, err } @@ -370,7 +369,7 @@ func (r *NemoEntitystoreReconciler) reconcileNemoEntitystore(ctx context.Context } // Wait for deployment - msg, ready, err := r.isDeploymentReady(ctx, &namespacedName) + msg, ready, err := k8sutil.IsDeploymentReady(ctx, r.GetClient(), &namespacedName) if err != nil { return ctrl.Result{}, err } @@ -436,7 +435,7 @@ func (r *NemoEntitystoreReconciler) renderAndSyncResource(ctx context.Context, N return err } - err = r.syncResource(ctx, obj, resource, namespacedName) + err = k8sutil.SyncResource(ctx, r.GetClient(), obj, resource, namespacedName) if err != nil { logger.Error(err, "failed to sync", conditionType, namespacedName) statusError := r.updater.SetConditionsFailed(ctx, NemoEntitystore, reason, err.Error()) @@ -447,89 +446,3 @@ func (r *NemoEntitystoreReconciler) renderAndSyncResource(ctx context.Context, N } return nil } - -// CheckDeploymentReadiness checks if the Deployment is ready -func (r *NemoEntitystoreReconciler) isDeploymentReady(ctx context.Context, namespacedName *types.NamespacedName) (string, bool, error) { - deployment := &appsv1.Deployment{} - err := r.Get(ctx, client.ObjectKey{Name: namespacedName.Name, Namespace: namespacedName.Namespace}, deployment) - if err != nil { - if errors.IsNotFound(err) { - return "", false, nil - } - return "", false, err - } - - cond := getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing) - if cond != nil && cond.Reason == "ProgressDeadlineExceeded" { - return fmt.Sprintf("deployment %q exceeded its progress deadline", deployment.Name), false, nil - } - if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d out of %d new replicas have been updated...\n", deployment.Name, deployment.Status.UpdatedReplicas, *deployment.Spec.Replicas), false, nil - } - if deployment.Status.Replicas > deployment.Status.UpdatedReplicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d old replicas are pending termination...\n", deployment.Name, deployment.Status.Replicas-deployment.Status.UpdatedReplicas), false, nil - } - if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d of %d updated replicas are available...\n", deployment.Name, deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas), false, nil - } - return fmt.Sprintf("deployment %q successfully rolled out\n", deployment.Name), true, nil -} - -func (r *NemoEntitystoreReconciler) syncResource(ctx context.Context, obj client.Object, desired client.Object, namespacedName types.NamespacedName) error { - logger := log.FromContext(ctx) - - err := r.Get(ctx, namespacedName, obj) - if err != nil && !errors.IsNotFound(err) { - return err - } - - if !utils.IsSpecChanged(obj, desired) { - logger.V(2).Info("Object spec has not changed, skipping update", "obj", obj) - return nil - } - logger.V(2).Info("Object spec has changed, updating") - - if errors.IsNotFound(err) { - err = r.Create(ctx, desired) - if err != nil { - return err - } - } else { - err = r.Update(ctx, desired) - if err != nil { - return err - } - } - return nil -} - -// cleanupResource deletes the given Kubernetes resource if it exists. -// If the resource does not exist or an error occurs during deletion, the function returns nil or the error. -// -// Parameters: -// ctx (context.Context): The context for the operation. -// obj (client.Object): The Kubernetes resource to delete. -// namespacedName (types.NamespacedName): The namespaced name of the resource. -// -// Returns: -// error: An error if the resource deletion fails, or nil if the resource is not found or deletion is successful. -func (r *NemoEntitystoreReconciler) cleanupResource(ctx context.Context, obj client.Object, namespacedName types.NamespacedName) error { - - logger := log.FromContext(ctx) - - err := r.Get(ctx, namespacedName, obj) - if err != nil && !errors.IsNotFound(err) { - return err - } - - if errors.IsNotFound(err) { - return nil - } - - err = r.Delete(ctx, obj) - if err != nil { - return err - } - logger.V(2).Info("NIM Service object changed, deleting ", "obj", obj) - return nil -} diff --git a/internal/controller/nemo_evaluator_controller.go b/internal/controller/nemo_evaluator_controller.go index 40302e60..73ee5c91 100644 --- a/internal/controller/nemo_evaluator_controller.go +++ b/internal/controller/nemo_evaluator_controller.go @@ -26,15 +26,14 @@ import ( "github.com/NVIDIA/k8s-nim-operator/internal/k8sutil" "github.com/NVIDIA/k8s-nim-operator/internal/render" "github.com/NVIDIA/k8s-nim-operator/internal/shared" - "github.com/NVIDIA/k8s-nim-operator/internal/utils" "github.com/go-logr/logr" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" autoscalingv2 "k8s.io/api/autoscaling/v2" corev1 "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -313,6 +312,47 @@ func (r *NemoEvaluatorReconciler) reconcileNemoEvaluator(ctx context.Context, Ne return ctrl.Result{}, err } + // Sync ingress + if NemoEvaluator.IsIngressEnabled() { + err = r.renderAndSyncResource(ctx, NemoEvaluator, &renderer, &networkingv1.Ingress{}, func() (client.Object, error) { + return renderer.Ingress(NemoEvaluator.GetIngressParams()) + }, "ingress", conditions.ReasonIngressFailed) + if err != nil { + return ctrl.Result{}, err + } + } else { + err = k8sutil.CleanupResource(ctx, r.GetClient(), &networkingv1.Ingress{}, namespacedName) + if err != nil && !errors.IsNotFound(err) { + return ctrl.Result{}, err + } + } + + // Sync HPA + if NemoEvaluator.IsAutoScalingEnabled() { + err = r.renderAndSyncResource(ctx, NemoEvaluator, &renderer, &autoscalingv2.HorizontalPodAutoscaler{}, func() (client.Object, error) { + return renderer.HPA(NemoEvaluator.GetHPAParams()) + }, "hpa", conditions.ReasonHPAFailed) + if err != nil { + return ctrl.Result{}, err + } + } else { + // If autoscaling is disabled, ensure the HPA is deleted + err = k8sutil.CleanupResource(ctx, r.GetClient(), &autoscalingv2.HorizontalPodAutoscaler{}, namespacedName) + if err != nil { + return ctrl.Result{}, err + } + } + + // Sync Service Monitor + if NemoEvaluator.IsServiceMonitorEnabled() { + err = r.renderAndSyncResource(ctx, NemoEvaluator, &renderer, &monitoringv1.ServiceMonitor{}, func() (client.Object, error) { + return renderer.ServiceMonitor(NemoEvaluator.GetServiceMonitorParams()) + }, "servicemonitor", conditions.ReasonServiceMonitorFailed) + if err != nil { + return ctrl.Result{}, err + } + } + deploymentParams := NemoEvaluator.GetDeploymentParams() // Sync deployment @@ -324,19 +364,19 @@ func (r *NemoEvaluatorReconciler) reconcileNemoEvaluator(ctx context.Context, Ne } // Wait for deployment - msg, ready, err := r.isDeploymentReady(ctx, &namespacedName) + msg, ready, err := k8sutil.IsDeploymentReady(ctx, r.GetClient(), &namespacedName) if err != nil { return ctrl.Result{}, err } if !ready { // Update status as NotReady - err = r.SetConditionsNotReady(ctx, NemoEvaluator, conditions.NotReady, msg) + err = r.updater.SetConditionsNotReady(ctx, NemoEvaluator, conditions.NotReady, msg) r.GetEventRecorder().Eventf(NemoEvaluator, corev1.EventTypeNormal, conditions.NotReady, "NemoEvaluator %s not ready yet, msg: %s", NemoEvaluator.Name, msg) } else { // Update status as ready - err = r.SetConditionsReady(ctx, NemoEvaluator, conditions.Ready, msg) + err = r.updater.SetConditionsReady(ctx, NemoEvaluator, conditions.Ready, msg) r.GetEventRecorder().Eventf(NemoEvaluator, corev1.EventTypeNormal, conditions.Ready, "NemoEvaluator %s ready, msg: %s", NemoEvaluator.Name, msg) } @@ -403,7 +443,7 @@ func (r *NemoEvaluatorReconciler) renderAndSyncResource(ctx context.Context, Nem return err } - err = r.syncResource(ctx, obj, resource, namespacedName) + err = k8sutil.SyncResource(ctx, r.GetClient(), obj, resource, namespacedName) if err != nil { logger.Error(err, "failed to sync", conditionType, namespacedName) statusError := r.updater.SetConditionsFailed(ctx, NemoEvaluator, reason, err.Error()) @@ -414,126 +454,3 @@ func (r *NemoEvaluatorReconciler) renderAndSyncResource(ctx context.Context, Nem } return nil } - -// CheckDeploymentReadiness checks if the Deployment is ready -func (r *NemoEvaluatorReconciler) isDeploymentReady(ctx context.Context, namespacedName *types.NamespacedName) (string, bool, error) { - deployment := &appsv1.Deployment{} - err := r.Get(ctx, client.ObjectKey{Name: namespacedName.Name, Namespace: namespacedName.Namespace}, deployment) - if err != nil { - if errors.IsNotFound(err) { - return "", false, nil - } - return "", false, err - } - - cond := getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing) - if cond != nil && cond.Reason == "ProgressDeadlineExceeded" { - return fmt.Sprintf("deployment %q exceeded its progress deadline", deployment.Name), false, nil - } - if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d out of %d new replicas have been updated...\n", deployment.Name, deployment.Status.UpdatedReplicas, *deployment.Spec.Replicas), false, nil - } - if deployment.Status.Replicas > deployment.Status.UpdatedReplicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d old replicas are pending termination...\n", deployment.Name, deployment.Status.Replicas-deployment.Status.UpdatedReplicas), false, nil - } - if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d of %d updated replicas are available...\n", deployment.Name, deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas), false, nil - } - return fmt.Sprintf("deployment %q successfully rolled out\n", deployment.Name), true, nil -} - -func (r *NemoEvaluatorReconciler) syncResource(ctx context.Context, obj client.Object, desired client.Object, namespacedName types.NamespacedName) error { - logger := log.FromContext(ctx) - - err := r.Get(ctx, namespacedName, obj) - if err != nil && !errors.IsNotFound(err) { - return err - } - - if !utils.IsSpecChanged(obj, desired) { - logger.V(2).Info("Object spec has not changed, skipping update", "obj", obj) - return nil - } - logger.V(2).Info("Object spec has changed, updating") - - if errors.IsNotFound(err) { - err = r.Create(ctx, desired) - if err != nil { - return err - } - } else { - err = r.Update(ctx, desired) - if err != nil { - return err - } - } - return nil -} - -func (r *NemoEvaluatorReconciler) SetConditionsReady(ctx context.Context, cr *appsv1alpha1.NemoEvaluator, reason, message string) error { - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Ready, - Status: metav1.ConditionTrue, - Reason: reason, - Message: message, - }) - - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Failed, - Status: metav1.ConditionFalse, - Reason: conditions.Ready, - }) - - cr.Status.State = appsv1alpha1.NemoEvaluatorStatusReady - return r.updateNemoEvaluatorStatus(ctx, cr) -} - -func (r *NemoEvaluatorReconciler) SetConditionsNotReady(ctx context.Context, cr *appsv1alpha1.NemoEvaluator, reason, message string) error { - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Ready, - Status: metav1.ConditionFalse, - Reason: reason, - Message: message, - }) - - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Failed, - Status: metav1.ConditionFalse, - Reason: conditions.Ready, - Message: message, - }) - - cr.Status.State = appsv1alpha1.NemoEvaluatorStatusNotReady - return r.updateNemoEvaluatorStatus(ctx, cr) -} - -func (r *NemoEvaluatorReconciler) SetConditionsFailed(ctx context.Context, cr *appsv1alpha1.NemoEvaluator, reason, message string) error { - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Ready, - Status: metav1.ConditionFalse, - Reason: conditions.Failed, - }) - - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Failed, - Status: metav1.ConditionTrue, - Reason: reason, - Message: message, - }) - cr.Status.State = appsv1alpha1.NemoEvaluatorStatusFailed - return r.updateNemoEvaluatorStatus(ctx, cr) -} - -func (r *NemoEvaluatorReconciler) updateNemoEvaluatorStatus(ctx context.Context, cr *appsv1alpha1.NemoEvaluator) error { - - obj := &appsv1alpha1.NemoEvaluator{} - errGet := r.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.GetNamespace()}, obj) - if errGet != nil { - return errGet - } - obj.Status = cr.Status - if err := r.Status().Update(ctx, obj); err != nil { - return err - } - return nil -} diff --git a/internal/controller/nemo_guardrail_controller.go b/internal/controller/nemo_guardrail_controller.go index f7a34e87..ee83dd54 100644 --- a/internal/controller/nemo_guardrail_controller.go +++ b/internal/controller/nemo_guardrail_controller.go @@ -26,7 +26,6 @@ import ( "github.com/NVIDIA/k8s-nim-operator/internal/k8sutil" "github.com/NVIDIA/k8s-nim-operator/internal/render" "github.com/NVIDIA/k8s-nim-operator/internal/shared" - "github.com/NVIDIA/k8s-nim-operator/internal/utils" "github.com/go-logr/logr" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" @@ -35,7 +34,6 @@ import ( networkingv1 "k8s.io/api/networking/v1" rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -322,7 +320,7 @@ func (r *NemoGuardrailReconciler) reconcileNemoGuardrail(ctx context.Context, Ne return ctrl.Result{}, err } } else { - err = r.cleanupResource(ctx, &networkingv1.Ingress{}, namespacedName) + err = k8sutil.CleanupResource(ctx, r.GetClient(), &networkingv1.Ingress{}, namespacedName) if err != nil && !errors.IsNotFound(err) { return ctrl.Result{}, err } @@ -338,7 +336,7 @@ func (r *NemoGuardrailReconciler) reconcileNemoGuardrail(ctx context.Context, Ne } } else { // If autoscaling is disabled, ensure the HPA is deleted - err = r.cleanupResource(ctx, &autoscalingv2.HorizontalPodAutoscaler{}, namespacedName) + err = k8sutil.CleanupResource(ctx, r.GetClient(), &autoscalingv2.HorizontalPodAutoscaler{}, namespacedName) if err != nil { return ctrl.Result{}, err } @@ -371,19 +369,19 @@ func (r *NemoGuardrailReconciler) reconcileNemoGuardrail(ctx context.Context, Ne } // Wait for deployment - msg, ready, err := r.isDeploymentReady(ctx, &namespacedName) + msg, ready, err := k8sutil.IsDeploymentReady(ctx, r.GetClient(), &namespacedName) if err != nil { return ctrl.Result{}, err } if !ready { // Update status as NotReady - err = r.SetConditionsNotReady(ctx, NemoGuardrail, conditions.NotReady, msg) + err = r.updater.SetConditionsNotReady(ctx, NemoGuardrail, conditions.NotReady, msg) r.GetEventRecorder().Eventf(NemoGuardrail, corev1.EventTypeNormal, conditions.NotReady, "NemoGuardrail %s not ready yet, msg: %s", NemoGuardrail.Name, msg) } else { // Update status as ready - err = r.SetConditionsReady(ctx, NemoGuardrail, conditions.Ready, msg) + err = r.updater.SetConditionsReady(ctx, NemoGuardrail, conditions.Ready, msg) r.GetEventRecorder().Eventf(NemoGuardrail, corev1.EventTypeNormal, conditions.Ready, "NemoGuardrail %s ready, msg: %s", NemoGuardrail.Name, msg) } @@ -437,7 +435,7 @@ func (r *NemoGuardrailReconciler) renderAndSyncResource(ctx context.Context, Nem return err } - err = r.syncResource(ctx, obj, resource, namespacedName) + err = k8sutil.SyncResource(ctx, r.GetClient(), obj, resource, namespacedName) if err != nil { logger.Error(err, "failed to sync", conditionType, namespacedName) statusError := r.updater.SetConditionsFailed(ctx, NemoGuardrail, reason, err.Error()) @@ -448,167 +446,3 @@ func (r *NemoGuardrailReconciler) renderAndSyncResource(ctx context.Context, Nem } return nil } - -// CheckDeploymentReadiness checks if the Deployment is ready -func (r *NemoGuardrailReconciler) isDeploymentReady(ctx context.Context, namespacedName *types.NamespacedName) (string, bool, error) { - deployment := &appsv1.Deployment{} - err := r.Get(ctx, client.ObjectKey{Name: namespacedName.Name, Namespace: namespacedName.Namespace}, deployment) - if err != nil { - if errors.IsNotFound(err) { - return "", false, nil - } - return "", false, err - } - - cond := getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing) - if cond != nil && cond.Reason == "ProgressDeadlineExceeded" { - return fmt.Sprintf("deployment %q exceeded its progress deadline", deployment.Name), false, nil - } - if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d out of %d new replicas have been updated...\n", deployment.Name, deployment.Status.UpdatedReplicas, *deployment.Spec.Replicas), false, nil - } - if deployment.Status.Replicas > deployment.Status.UpdatedReplicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d old replicas are pending termination...\n", deployment.Name, deployment.Status.Replicas-deployment.Status.UpdatedReplicas), false, nil - } - if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d of %d updated replicas are available...\n", deployment.Name, deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas), false, nil - } - return fmt.Sprintf("deployment %q successfully rolled out\n", deployment.Name), true, nil -} - -func getDeploymentCondition(status appsv1.DeploymentStatus, condType appsv1.DeploymentConditionType) *appsv1.DeploymentCondition { - for i := range status.Conditions { - c := status.Conditions[i] - if c.Type == condType { - return &c - } - } - return nil -} - -func (r *NemoGuardrailReconciler) syncResource(ctx context.Context, obj client.Object, desired client.Object, namespacedName types.NamespacedName) error { - logger := log.FromContext(ctx) - - err := r.Get(ctx, namespacedName, obj) - if err != nil && !errors.IsNotFound(err) { - return err - } - - if !utils.IsSpecChanged(obj, desired) { - logger.V(2).Info("Object spec has not changed, skipping update", "obj", obj) - return nil - } - logger.V(2).Info("Object spec has changed, updating") - - if errors.IsNotFound(err) { - err = r.Create(ctx, desired) - if err != nil { - return err - } - } else { - err = r.Update(ctx, desired) - if err != nil { - return err - } - } - return nil -} - -// cleanupResource deletes the given Kubernetes resource if it exists. -// If the resource does not exist or an error occurs during deletion, the function returns nil or the error. -// -// Parameters: -// ctx (context.Context): The context for the operation. -// obj (client.Object): The Kubernetes resource to delete. -// namespacedName (types.NamespacedName): The namespaced name of the resource. -// -// Returns: -// error: An error if the resource deletion fails, or nil if the resource is not found or deletion is successful. -func (r *NemoGuardrailReconciler) cleanupResource(ctx context.Context, obj client.Object, namespacedName types.NamespacedName) error { - - logger := log.FromContext(ctx) - - err := r.Get(ctx, namespacedName, obj) - if err != nil && !errors.IsNotFound(err) { - return err - } - - if errors.IsNotFound(err) { - return nil - } - - err = r.Delete(ctx, obj) - if err != nil { - return err - } - logger.V(2).Info("NIM Service object changed, deleting ", "obj", obj) - return nil -} - -func (r *NemoGuardrailReconciler) SetConditionsReady(ctx context.Context, cr *appsv1alpha1.NemoGuardrail, reason, message string) error { - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Ready, - Status: metav1.ConditionTrue, - Reason: reason, - Message: message, - }) - - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Failed, - Status: metav1.ConditionFalse, - Reason: conditions.Ready, - }) - - cr.Status.State = appsv1alpha1.NemoGuardrailStatusReady - return r.updateNemoGuardrailStatus(ctx, cr) -} - -func (r *NemoGuardrailReconciler) SetConditionsNotReady(ctx context.Context, cr *appsv1alpha1.NemoGuardrail, reason, message string) error { - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Ready, - Status: metav1.ConditionFalse, - Reason: reason, - Message: message, - }) - - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Failed, - Status: metav1.ConditionFalse, - Reason: conditions.Ready, - Message: message, - }) - - cr.Status.State = appsv1alpha1.NemoGuardrailStatusNotReady - return r.updateNemoGuardrailStatus(ctx, cr) -} - -func (r *NemoGuardrailReconciler) SetConditionsFailed(ctx context.Context, cr *appsv1alpha1.NemoGuardrail, reason, message string) error { - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Ready, - Status: metav1.ConditionFalse, - Reason: conditions.Failed, - }) - - meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ - Type: conditions.Failed, - Status: metav1.ConditionTrue, - Reason: reason, - Message: message, - }) - cr.Status.State = appsv1alpha1.NemoGuardrailStatusFailed - return r.updateNemoGuardrailStatus(ctx, cr) -} - -func (r *NemoGuardrailReconciler) updateNemoGuardrailStatus(ctx context.Context, cr *appsv1alpha1.NemoGuardrail) error { - - obj := &appsv1alpha1.NemoGuardrail{} - errGet := r.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.GetNamespace()}, obj) - if errGet != nil { - return errGet - } - obj.Status = cr.Status - if err := r.Status().Update(ctx, obj); err != nil { - return err - } - return nil -} diff --git a/internal/controller/nemocustomizer_controller.go b/internal/controller/nemocustomizer_controller.go index 73173f47..12a18544 100644 --- a/internal/controller/nemocustomizer_controller.go +++ b/internal/controller/nemocustomizer_controller.go @@ -328,7 +328,7 @@ func (r *NemoCustomizerReconciler) reconcileNemoCustomizer(ctx context.Context, return ctrl.Result{}, err } } else { - err = r.cleanupResource(ctx, &networkingv1.Ingress{}, namespacedName) + err = k8sutil.CleanupResource(ctx, r.GetClient(), &networkingv1.Ingress{}, namespacedName) if err != nil && !errors.IsNotFound(err) { return ctrl.Result{}, err } @@ -344,7 +344,7 @@ func (r *NemoCustomizerReconciler) reconcileNemoCustomizer(ctx context.Context, } } else { // If autoscaling is disabled, ensure the HPA is deleted - err = r.cleanupResource(ctx, &autoscalingv2.HorizontalPodAutoscaler{}, namespacedName) + err = k8sutil.CleanupResource(ctx, r.GetClient(), &autoscalingv2.HorizontalPodAutoscaler{}, namespacedName) if err != nil { return ctrl.Result{}, err } @@ -397,7 +397,7 @@ func (r *NemoCustomizerReconciler) reconcileNemoCustomizer(ctx context.Context, } // Wait for deployment - msg, ready, err := r.isDeploymentReady(ctx, &namespacedName) + msg, ready, err := k8sutil.IsDeploymentReady(ctx, r.GetClient(), &namespacedName) if err != nil { return ctrl.Result{}, err } @@ -463,7 +463,7 @@ func (r *NemoCustomizerReconciler) renderAndSyncResource(ctx context.Context, Ne return err } - err = r.syncResource(ctx, obj, resource, namespacedName) + err = k8sutil.SyncResource(ctx, r.GetClient(), obj, resource, namespacedName) if err != nil { logger.Error(err, "failed to sync", conditionType, namespacedName) statusError := r.updater.SetConditionsFailed(ctx, NemoCustomizer, reason, err.Error()) @@ -474,89 +474,3 @@ func (r *NemoCustomizerReconciler) renderAndSyncResource(ctx context.Context, Ne } return nil } - -// CheckDeploymentReadiness checks if the Deployment is ready -func (r *NemoCustomizerReconciler) isDeploymentReady(ctx context.Context, namespacedName *types.NamespacedName) (string, bool, error) { - deployment := &appsv1.Deployment{} - err := r.Get(ctx, client.ObjectKey{Name: namespacedName.Name, Namespace: namespacedName.Namespace}, deployment) - if err != nil { - if errors.IsNotFound(err) { - return "", false, nil - } - return "", false, err - } - - cond := getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing) - if cond != nil && cond.Reason == "ProgressDeadlineExceeded" { - return fmt.Sprintf("deployment %q exceeded its progress deadline", deployment.Name), false, nil - } - if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d out of %d new replicas have been updated...\n", deployment.Name, deployment.Status.UpdatedReplicas, *deployment.Spec.Replicas), false, nil - } - if deployment.Status.Replicas > deployment.Status.UpdatedReplicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d old replicas are pending termination...\n", deployment.Name, deployment.Status.Replicas-deployment.Status.UpdatedReplicas), false, nil - } - if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas { - return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d of %d updated replicas are available...\n", deployment.Name, deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas), false, nil - } - return fmt.Sprintf("deployment %q successfully rolled out\n", deployment.Name), true, nil -} - -func (r *NemoCustomizerReconciler) syncResource(ctx context.Context, obj client.Object, desired client.Object, namespacedName types.NamespacedName) error { - logger := log.FromContext(ctx) - - err := r.Get(ctx, namespacedName, obj) - if err != nil && !errors.IsNotFound(err) { - return err - } - - if !utils.IsSpecChanged(obj, desired) { - logger.V(2).Info("Object spec has not changed, skipping update", "obj", obj) - return nil - } - logger.V(2).Info("Object spec has changed, updating") - - if errors.IsNotFound(err) { - err = r.Create(ctx, desired) - if err != nil { - return err - } - } else { - err = r.Update(ctx, desired) - if err != nil { - return err - } - } - return nil -} - -// cleanupResource deletes the given Kubernetes resource if it exists. -// If the resource does not exist or an error occurs during deletion, the function returns nil or the error. -// -// Parameters: -// ctx (context.Context): The context for the operation. -// obj (client.Object): The Kubernetes resource to delete. -// namespacedName (types.NamespacedName): The namespaced name of the resource. -// -// Returns: -// error: An error if the resource deletion fails, or nil if the resource is not found or deletion is successful. -func (r *NemoCustomizerReconciler) cleanupResource(ctx context.Context, obj client.Object, namespacedName types.NamespacedName) error { - - logger := log.FromContext(ctx) - - err := r.Get(ctx, namespacedName, obj) - if err != nil && !errors.IsNotFound(err) { - return err - } - - if errors.IsNotFound(err) { - return nil - } - - err = r.Delete(ctx, obj) - if err != nil { - return err - } - logger.V(2).Info("NIM Service object changed, deleting ", "obj", obj) - return nil -} diff --git a/internal/k8sutil/k8sutil.go b/internal/k8sutil/k8sutil.go index bb143491..0c525715 100644 --- a/internal/k8sutil/k8sutil.go +++ b/internal/k8sutil/k8sutil.go @@ -20,8 +20,13 @@ import ( "context" "fmt" + "github.com/NVIDIA/k8s-nim-operator/internal/utils" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" ) // OrchestratorType is the underlying container orchestrator type @@ -107,3 +112,100 @@ func GetOrchestratorType(k8sClient client.Client) (OrchestratorType, error) { // Default to Upstream Kubernetes if no specific platform labels are found return K8s, nil } + +// CleanupResource deletes the given Kubernetes resource if it exists. +// If the resource does not exist or an error occurs during deletion, the function returns nil or the error. +// +// Parameters: +// ctx (context.Context): The context for the operation. +// obj (client.Object): The Kubernetes resource to delete. +// namespacedName (types.NamespacedName): The namespaced name of the resource. +// +// Returns: +// error: An error if the resource deletion fails, or nil if the resource is not found or deletion is successful. +func CleanupResource(ctx context.Context, k8sClient client.Client, obj client.Object, namespacedName types.NamespacedName) error { + + logger := log.FromContext(ctx) + + err := k8sClient.Get(ctx, namespacedName, obj) + if err != nil && !errors.IsNotFound(err) { + return err + } + + if errors.IsNotFound(err) { + return nil + } + + err = k8sClient.Delete(ctx, obj) + if err != nil { + return err + } + logger.V(2).Info("NIM Service object changed, deleting ", "obj", obj) + return nil +} + +// SyncResource sync the current object with the desired object spec +func SyncResource(ctx context.Context, k8sClient client.Client, obj client.Object, desired client.Object, namespacedName types.NamespacedName) error { + logger := log.FromContext(ctx) + + err := k8sClient.Get(ctx, namespacedName, obj) + if err != nil && !errors.IsNotFound(err) { + return err + } + + if !utils.IsSpecChanged(obj, desired) { + logger.V(2).Info("Object spec has not changed, skipping update", "obj", obj) + return nil + } + logger.V(2).Info("Object spec has changed, updating") + + if errors.IsNotFound(err) { + err = k8sClient.Create(ctx, desired) + if err != nil { + return err + } + } else { + err = k8sClient.Update(ctx, desired) + if err != nil { + return err + } + } + return nil +} + +// IsDeploymentReady checks if the Deployment is ready +func IsDeploymentReady(ctx context.Context, k8sClient client.Client, namespacedName *types.NamespacedName) (string, bool, error) { + deployment := &appsv1.Deployment{} + err := k8sClient.Get(ctx, client.ObjectKey{Name: namespacedName.Name, Namespace: namespacedName.Namespace}, deployment) + if err != nil { + if errors.IsNotFound(err) { + return "", false, nil + } + return "", false, err + } + + cond := getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing) + if cond != nil && cond.Reason == "ProgressDeadlineExceeded" { + return fmt.Sprintf("deployment %q exceeded its progress deadline", deployment.Name), false, nil + } + if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas { + return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d out of %d new replicas have been updated...\n", deployment.Name, deployment.Status.UpdatedReplicas, *deployment.Spec.Replicas), false, nil + } + if deployment.Status.Replicas > deployment.Status.UpdatedReplicas { + return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d old replicas are pending termination...\n", deployment.Name, deployment.Status.Replicas-deployment.Status.UpdatedReplicas), false, nil + } + if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas { + return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d of %d updated replicas are available...\n", deployment.Name, deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas), false, nil + } + return fmt.Sprintf("deployment %q successfully rolled out\n", deployment.Name), true, nil +} + +func getDeploymentCondition(status appsv1.DeploymentStatus, condType appsv1.DeploymentConditionType) *appsv1.DeploymentCondition { + for i := range status.Conditions { + c := status.Conditions[i] + if c.Type == condType { + return &c + } + } + return nil +}