From 9d2e4d99a03cececc91c4313075acc8c7026111c Mon Sep 17 00:00:00 2001 From: Annaraya Narasagond Date: Tue, 28 Jan 2025 22:52:31 +0530 Subject: [PATCH] recipe: adding failOn and essential checks --- internal/controller/kubeobjects/requests.go | 3 + internal/controller/vrg_kubeobjects.go | 88 ++++++++++++++++++--- internal/controller/vrg_recipe.go | 26 ++++-- 3 files changed, 99 insertions(+), 18 deletions(-) diff --git a/internal/controller/kubeobjects/requests.go b/internal/controller/kubeobjects/requests.go index f9b437037..b4df45770 100644 --- a/internal/controller/kubeobjects/requests.go +++ b/internal/controller/kubeobjects/requests.go @@ -89,6 +89,9 @@ type KubeResourcesSpec struct { //+optional IsHook bool `json:"isHook,omitempty"` + + //+optional + GroupEssential *bool `json:"essential,omitempty"` } // HookSpec provides spec of either check or exec hook that needs to be executed diff --git a/internal/controller/vrg_kubeobjects.go b/internal/controller/vrg_kubeobjects.go index 5ed7d4dcf..a804194d2 100644 --- a/internal/controller/vrg_kubeobjects.go +++ b/internal/controller/vrg_kubeobjects.go @@ -242,9 +242,12 @@ func (v *VRGInstance) kubeObjectsCaptureStartOrResume( log logr.Logger, ) { groups := v.recipeElements.CaptureWorkflow + failOn := v.recipeElements.CaptureFailOn requestsProcessedCount := 0 requestsCompletedCount := 0 annotations := map[string]string{vrgGenerationKey: strconv.FormatInt(generation, vrgGenerationNumberBase)} + //executionResults := make(map[string]bool) + var workflowResult = true for groupNumber, captureGroup := range groups { cg := captureGroup @@ -252,7 +255,20 @@ func (v *VRGInstance) kubeObjectsCaptureStartOrResume( if cg.IsHook { if err := v.executeHook(cg.Hook, log1); err != nil { - break + + if failOn == anyError { + v.kubeObjectsCaptureStatusFalse("KubeObjectsHookExecutionError", err.Error()) + break + } else if failOn == essentialError { + if cg.Hook.Essential != nil && *cg.Hook.Essential { + v.kubeObjectsCaptureStatusFalse("KubeObjectsHookExecutionError", err.Error()) + break + } + } else if failOn == fullError { + if cg.Hook.Essential != nil && *cg.Hook.Essential { + workflowResult = false + } + } } } else { requestsCompletedCount += v.kubeObjectsGroupCapture( @@ -260,6 +276,25 @@ func (v *VRGInstance) kubeObjectsCaptureStartOrResume( captureInProgressStatusUpdate, labels, annotations, requests, log, ) + // result.Requeue true could be used to determine if error has occured or not + if result.Requeue { + if failOn == anyError { + // mark as backup failed + v.kubeObjectsCaptureStatusFalse("KubeObjectsCaptureError", fmt.Errorf( + "kube objects group capture failed").Error()) + break + } else if failOn == essentialError { + if cg.Spec.KubeResourcesSpec.GroupEssential != nil && *cg.Spec.KubeResourcesSpec.GroupEssential { + v.kubeObjectsCaptureStatusFalse("KubeObjectsCaptureError", fmt.Errorf( + "kube objects group capture failed").Error()) + break + } + } else if failOn == fullError { + if cg.Spec.KubeResourcesSpec.GroupEssential != nil && *cg.Spec.KubeResourcesSpec.GroupEssential { + workflowResult = false + } + } + } requestsProcessedCount += len(v.s3StoreAccessors) if requestsCompletedCount < requestsProcessedCount { @@ -270,8 +305,12 @@ func (v *VRGInstance) kubeObjectsCaptureStartOrResume( } } - firstRequest := getFirstRequest(groups, requests, namePrefix, v.s3StoreAccessors[0].S3ProfileName) + if !workflowResult { + v.kubeObjectsCaptureFailed("KubeObjectsCaptureError", "Kube objects capture failed") + return + } + firstRequest := getFirstRequest(groups, requests, namePrefix, v.s3StoreAccessors[0].S3ProfileName) v.kubeObjectsCaptureComplete( result, captureStartConditionally, @@ -692,6 +731,8 @@ func (v *VRGInstance) kubeObjectsRecoveryStartOrResume( groups := v.recipeElements.RecoverWorkflow requests := make([]kubeobjects.Request, len(groups)) + failOn := v.recipeElements.RestoreFailOn + workflowResult := true s3StoreAccessor, err := v.findS3StoreAccessor(s3ProfileName) if err != nil { @@ -704,20 +745,44 @@ func (v *VRGInstance) kubeObjectsRecoveryStartOrResume( if rg.IsHook { if err := v.executeHook(rg.Hook, log1); err != nil { - return fmt.Errorf("check hook execution failed during restore %s: %v", rg.Hook.Name, err) + if failOn == anyError { + return fmt.Errorf("check hook execution failed during restore %s: %v", rg.Hook.Name, err) + } else if failOn == essentialError { + if rg.Hook.Essential != nil && *rg.Hook.Essential { + return fmt.Errorf("check hook execution failed during restore %s: %v", rg.Hook.Name, err) + } + } else if failOn == fullError { + if rg.Hook.Essential != nil && *rg.Hook.Essential { + workflowResult = false + } + } + } } else { if err := v.executeRecoverGroup(result, s3StoreAccessor, captureToRecoverFromIdentifier, captureRequests, recoverRequests, labels, groupNumber, rg, requests, log1); err != nil { - return err + if failOn == anyError { + return err + } else if failOn == essentialError { + if rg.GroupEssential != nil && *rg.GroupEssential { + return err + } + } else if failOn == fullError { + if rg.GroupEssential != nil && *rg.GroupEssential { + workflowResult = false + } + } } } } - startTime := getRequestsStartTime(requests) + if !workflowResult { + return fmt.Errorf("workflow execution failed during restore") + } + startTime := getRequestsStartTime(requests) duration := time.Since(startTime.Time) log.Info("Kube objects recovered", "groups", len(groups), "start", startTime, "duration", duration) @@ -864,14 +929,14 @@ func getCaptureGroups(recipe Recipe.Recipe) ([]kubeobjects.CaptureSpec, error) { continue } - return resources, err + return resources, workflow.FailOn, err } resources[index] = *captureInstance } } - return resources, nil + return resources, workflow.FailOn, nil } func getRecoverGroups(recipe Recipe.Recipe) ([]kubeobjects.RecoverSpec, error) { @@ -897,14 +962,14 @@ func getRecoverGroups(recipe Recipe.Recipe) ([]kubeobjects.RecoverSpec, error) { continue } - return resources, err + return resources, workflow.FailOn, err } resources[index] = *captureInstance } } - return resources, nil + return resources, workflow.FailOn, nil } var ( @@ -1079,6 +1144,7 @@ func getChkHookSpec(hook *Recipe.Hook, suffix string) kubeobjects.HookSpec { Name: suffix, Condition: chk.Condition, }, + Essential: hook.Essential, } } } @@ -1089,8 +1155,7 @@ func getChkHookSpec(hook *Recipe.Hook, suffix string) kubeobjects.HookSpec { func getOpHookSpec(hook *Recipe.Hook, suffix string) kubeobjects.HookSpec { for _, op := range hook.Ops { if op.Name == suffix { - // TODO: There are two timeouts, onErrors one in hooks and other one in - // check or operation, which one to consider while running hook? + return kubeobjects.HookSpec{ Name: hook.Name, Namespace: hook.Namespace, @@ -1144,6 +1209,7 @@ func convertRecipeGroupToCaptureSpec(group Recipe.Group) (*kubeobjects.CaptureSp IncludedNamespaces: group.IncludedNamespaces, IncludedResources: group.IncludedResourceTypes, ExcludedResources: group.ExcludedResourceTypes, + GroupEssential: group.Essential, }, LabelSelector: group.LabelSelector, OrLabelSelectors: []*metav1.LabelSelector{}, diff --git a/internal/controller/vrg_recipe.go b/internal/controller/vrg_recipe.go index 0049629d1..1ba59b332 100644 --- a/internal/controller/vrg_recipe.go +++ b/internal/controller/vrg_recipe.go @@ -14,7 +14,7 @@ import ( ramen "github.com/ramendr/ramen/api/v1alpha1" "github.com/ramendr/ramen/internal/controller/kubeobjects" "github.com/ramendr/ramen/internal/controller/util" - recipe "github.com/ramendr/recipe/api/v1alpha1" + recipev1 "github.com/ramendr/recipe/api/v1alpha1" "golang.org/x/exp/slices" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" @@ -24,10 +24,18 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" ) +const ( + anyError = "any-error" + essentialError = "essential-error" + fullError = "full-error" +) + type RecipeElements struct { PvcSelector PvcSelector CaptureWorkflow []kubeobjects.CaptureSpec RecoverWorkflow []kubeobjects.RecoverSpec + CaptureFailOn string + RestoreFailOn string } func captureWorkflowDefault(vrg ramen.VolumeReplicationGroup, ramenConfig ramen.RamenConfig) []kubeobjects.CaptureSpec { @@ -105,6 +113,8 @@ func RecipeElementsGet(ctx context.Context, reader client.Reader, vrg ramen.Volu PvcSelector: getPVCSelector(vrg, ramenConfig, nil, nil), CaptureWorkflow: captureWorkflowDefault(vrg, ramenConfig), RecoverWorkflow: recoverWorkflowDefault(vrg, ramenConfig), + CaptureFailOn: anyError, + RestoreFailOn: anyError, } return recipeElements, nil @@ -115,7 +125,7 @@ func RecipeElementsGet(ctx context.Context, reader client.Reader, vrg ramen.Volu Name: vrg.Spec.KubeObjectProtection.RecipeRef.Name, } - recipe := recipe.Recipe{} + recipe := recipev1.Recipe{} if err := reader.Get(ctx, recipeNamespacedName, &recipe); err != nil { return recipeElements, fmt.Errorf("recipe %v get error: %w", recipeNamespacedName.String(), err) } @@ -147,7 +157,7 @@ func RecipeElementsGet(ctx context.Context, reader client.Reader, vrg ramen.Volu return recipeElements, nil } -func RecipeParametersExpand(recipe *recipe.Recipe, parameters map[string][]string, +func RecipeParametersExpand(recipe *recipev1.Recipe, parameters map[string][]string, log logr.Logger, ) error { spec := &recipe.Spec @@ -178,27 +188,29 @@ func parametersExpand(s string, parameters map[string][]string) string { }) } -func recipeWorkflowsGet(recipe recipe.Recipe, recipeElements *RecipeElements, vrg ramen.VolumeReplicationGroup, +func recipeWorkflowsGet(recipe recipev1.Recipe, recipeElements *RecipeElements, vrg ramen.VolumeReplicationGroup, ramenConfig ramen.RamenConfig, ) error { var err error - recipeElements.CaptureWorkflow, err = getCaptureGroups(recipe) + recipeElements.CaptureWorkflow, recipeElements.CaptureFailOn, err = getCaptureGroups(recipe) if err != nil && err != ErrWorkflowNotFound { return fmt.Errorf("failed to get groups from capture workflow: %w", err) } if err != nil { recipeElements.CaptureWorkflow = captureWorkflowDefault(vrg, ramenConfig) + recipeElements.CaptureFailOn = anyError } - recipeElements.RecoverWorkflow, err = getRecoverGroups(recipe) + recipeElements.RecoverWorkflow, recipeElements.RestoreFailOn, err = getRecoverGroups(recipe) if err != nil && err != ErrWorkflowNotFound { return fmt.Errorf("failed to get groups from recovery workflow: %w", err) } if err != nil { recipeElements.RecoverWorkflow = recoverWorkflowDefault(vrg, ramenConfig) + recipeElements.RestoreFailOn = anyError } return nil @@ -263,7 +275,7 @@ func recipeNamespaceNames(recipeElements RecipeElements) sets.Set[string] { func recipesWatch(b *builder.Builder, m objectToReconcileRequestsMapper) *builder.Builder { return b.Watches( - &recipe.Recipe{}, + &recipev1.Recipe{}, handler.EnqueueRequestsFromMapFunc(m.recipeToVrgReconcileRequestsMapper), builder.WithPredicates(util.CreateOrResourceVersionUpdatePredicate{}), )