From 45a6163592adba7cc60c9de970688279d0959678 Mon Sep 17 00:00:00 2001 From: akutz Date: Wed, 5 Feb 2025 10:17:52 -0600 Subject: [PATCH] Introduce staggered exit when caps change This patch changes the behavior of how the pod is exited when the capabilities have changed. Instead of all the replicas exiting at the same time, the exits are staggered. If the pod is the leader or no leader election is configured, the pod is exited after a brief delay. Otherwise the pod is exited immediately. This should allow the non-leaders to come back online before the leader is exited, enabling one of the restarted pods to take over as the leader when it exits. --- .golangci.yml | 4 +- .../local/vmoperator/local_env_var_patch.yaml | 2 + .../wcp/vmoperator/manager_env_var_patch.yaml | 6 ++ .../configmap_capability_controller.go | 14 +++- ...figmap_capability_controller_suite_test.go | 5 +- .../crd/crd_capability_controller.go | 14 +++- .../crd_capability_controller_suite_test.go | 5 +- controllers/infra/capability/exit/exit.go | 17 ---- main.go | 7 ++ pkg/config/config.go | 8 ++ pkg/config/default.go | 1 + pkg/config/env.go | 1 + pkg/config/env/env.go | 3 + pkg/config/env_test.go | 2 + pkg/exit/exit.go | 53 ++++++++++++ pkg/exit/exit_suite_test.go | 17 ++++ pkg/exit/exit_test.go | 80 +++++++++++++++++++ 17 files changed, 209 insertions(+), 30 deletions(-) delete mode 100644 controllers/infra/capability/exit/exit.go create mode 100644 pkg/exit/exit.go create mode 100644 pkg/exit/exit_suite_test.go create mode 100644 pkg/exit/exit_test.go diff --git a/.golangci.yml b/.golangci.yml index 0bf5244c2..ad749fde3 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -85,7 +85,9 @@ linters-settings: - alias: pkgctx pkg: github.com/vmware-tanzu/vm-operator/pkg/context - alias: pkgerr - pkg: github.com/vmware-tanzu/vm-operator/pkg/pkgerr + pkg: github.com/vmware-tanzu/vm-operator/pkg/errors + - alias: pkgexit + pkg: github.com/vmware-tanzu/vm-operator/pkg/exit - alias: ctxop pkg: github.com/vmware-tanzu/vm-operator/pkg/context/operation - alias: pkgmgr diff --git a/config/local/vmoperator/local_env_var_patch.yaml b/config/local/vmoperator/local_env_var_patch.yaml index 71771fce2..1237728da 100644 --- a/config/local/vmoperator/local_env_var_patch.yaml +++ b/config/local/vmoperator/local_env_var_patch.yaml @@ -13,6 +13,8 @@ spec: value: "true" - name: ASYNC_CREATE_ENABLED value: "true" + - name: EXIT_DELAY + value: "3s" - name: MEM_STATS_PERIOD value: "10m" - name: VSPHERE_NETWORKING diff --git a/config/wcp/vmoperator/manager_env_var_patch.yaml b/config/wcp/vmoperator/manager_env_var_patch.yaml index ed8e053f9..d09581568 100644 --- a/config/wcp/vmoperator/manager_env_var_patch.yaml +++ b/config/wcp/vmoperator/manager_env_var_patch.yaml @@ -40,6 +40,12 @@ name: PRIVILEGED_USERS value: "" +- op: add + path: /spec/template/spec/containers/0/env/- + value: + name: EXIT_DELAY + value: "3s" + - op: add path: /spec/template/spec/containers/0/env/- value: diff --git a/controllers/infra/capability/configmap/configmap_capability_controller.go b/controllers/infra/capability/configmap/configmap_capability_controller.go index 954ef6c22..e577e6181 100644 --- a/controllers/infra/capability/configmap/configmap_capability_controller.go +++ b/controllers/infra/capability/configmap/configmap_capability_controller.go @@ -20,10 +20,10 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/manager" - "github.com/vmware-tanzu/vm-operator/controllers/infra/capability/exit" pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config" "github.com/vmware-tanzu/vm-operator/pkg/config/capabilities" pkgctx "github.com/vmware-tanzu/vm-operator/pkg/context" + pkgexit "github.com/vmware-tanzu/vm-operator/pkg/exit" pkgmgr "github.com/vmware-tanzu/vm-operator/pkg/manager" "github.com/vmware-tanzu/vm-operator/pkg/record" kubeutil "github.com/vmware-tanzu/vm-operator/pkg/util/kube" @@ -53,6 +53,7 @@ func AddToManager(ctx *pkgctx.ControllerManagerContext, mgr manager.Manager) err cache, ctrl.Log.WithName("controllers").WithName(controllerName), record.New(mgr.GetEventRecorderFor(controllerNameLong)), + mgr.Elected(), ) // This controller is also run on the non-leaders (webhooks) pods too @@ -89,13 +90,15 @@ func NewReconciler( ctx context.Context, client ctrlclient.Reader, logger logr.Logger, - recorder record.Recorder) *Reconciler { + recorder record.Recorder, + elected <-chan struct{}) *Reconciler { return &Reconciler{ Context: ctx, Client: client, Logger: logger, Recorder: recorder, + Elected: elected, } } @@ -104,6 +107,7 @@ type Reconciler struct { Client ctrlclient.Reader Logger logr.Logger Recorder record.Recorder + Elected <-chan struct{} } // +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch @@ -120,8 +124,10 @@ func (r *Reconciler) Reconcile( } if capabilities.UpdateCapabilitiesFeatures(ctx, obj) { - r.Logger.Info("killing pod due to changed capabilities") - exit.Exit() + pkgexit.Exit( + logr.NewContext(ctx, r.Logger), + "capabilities have changed", + r.Elected) } return ctrl.Result{}, nil diff --git a/controllers/infra/capability/configmap/configmap_capability_controller_suite_test.go b/controllers/infra/capability/configmap/configmap_capability_controller_suite_test.go index 3f838c9df..5cb10dd65 100644 --- a/controllers/infra/capability/configmap/configmap_capability_controller_suite_test.go +++ b/controllers/infra/capability/configmap/configmap_capability_controller_suite_test.go @@ -7,12 +7,13 @@ package capability_test import ( "sync/atomic" "testing" + "time" . "github.com/onsi/ginkgo/v2" capability "github.com/vmware-tanzu/vm-operator/controllers/infra/capability/configmap" - "github.com/vmware-tanzu/vm-operator/controllers/infra/capability/exit" pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config" + pkgexit "github.com/vmware-tanzu/vm-operator/pkg/exit" "github.com/vmware-tanzu/vm-operator/pkg/manager" "github.com/vmware-tanzu/vm-operator/test/builder" ) @@ -20,7 +21,7 @@ import ( var numExits int32 func init() { - exit.Exit = func() { + pkgexit.ExitFn = func(_ time.Duration) { atomic.AddInt32(&numExits, 1) } } diff --git a/controllers/infra/capability/crd/crd_capability_controller.go b/controllers/infra/capability/crd/crd_capability_controller.go index 5a5348481..caeb305f2 100644 --- a/controllers/infra/capability/crd/crd_capability_controller.go +++ b/controllers/infra/capability/crd/crd_capability_controller.go @@ -16,11 +16,11 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" - "github.com/vmware-tanzu/vm-operator/controllers/infra/capability/exit" capv1 "github.com/vmware-tanzu/vm-operator/external/capabilities/api/v1alpha1" pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config" "github.com/vmware-tanzu/vm-operator/pkg/config/capabilities" pkgctx "github.com/vmware-tanzu/vm-operator/pkg/context" + pkgexit "github.com/vmware-tanzu/vm-operator/pkg/exit" "github.com/vmware-tanzu/vm-operator/pkg/record" "github.com/vmware-tanzu/vm-operator/pkg/util/ptr" ) @@ -39,6 +39,7 @@ func AddToManager(ctx *pkgctx.ControllerManagerContext, mgr manager.Manager) err mgr.GetClient(), ctrl.Log.WithName("controllers").WithName(controllerName), record.New(mgr.GetEventRecorderFor(controllerNameLong)), + mgr.Elected(), ) return ctrl.NewControllerManagedBy(mgr). @@ -66,13 +67,15 @@ func NewReconciler( ctx context.Context, client ctrlclient.Client, logger logr.Logger, - recorder record.Recorder) *Reconciler { + recorder record.Recorder, + elected <-chan struct{}) *Reconciler { return &Reconciler{ Context: ctx, Client: client, Logger: logger, Recorder: recorder, + Elected: elected, } } @@ -81,6 +84,7 @@ type Reconciler struct { Client ctrlclient.Client Logger logr.Logger Recorder record.Recorder + Elected <-chan struct{} } // +kubebuilder:rbac:groups=iaas.vmware.com,resources=capabilities,verbs=get;list;watch @@ -98,8 +102,10 @@ func (r *Reconciler) Reconcile( } if capabilities.UpdateCapabilitiesFeatures(ctx, obj) { - r.Logger.Info("killing pod due to changed capabilities") - exit.Exit() + pkgexit.Exit( + logr.NewContext(ctx, r.Logger), + "capabilities have changed", + r.Elected) } return ctrl.Result{}, nil diff --git a/controllers/infra/capability/crd/crd_capability_controller_suite_test.go b/controllers/infra/capability/crd/crd_capability_controller_suite_test.go index a5418ae26..06dcd1f30 100644 --- a/controllers/infra/capability/crd/crd_capability_controller_suite_test.go +++ b/controllers/infra/capability/crd/crd_capability_controller_suite_test.go @@ -7,12 +7,13 @@ package capability_test import ( "sync/atomic" "testing" + "time" . "github.com/onsi/ginkgo/v2" capability "github.com/vmware-tanzu/vm-operator/controllers/infra/capability/crd" - "github.com/vmware-tanzu/vm-operator/controllers/infra/capability/exit" pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config" + pkgexit "github.com/vmware-tanzu/vm-operator/pkg/exit" "github.com/vmware-tanzu/vm-operator/pkg/manager" "github.com/vmware-tanzu/vm-operator/test/builder" ) @@ -20,7 +21,7 @@ import ( var numExits int32 func init() { - exit.Exit = func() { + pkgexit.ExitFn = func(_ time.Duration) { atomic.AddInt32(&numExits, 1) } } diff --git a/controllers/infra/capability/exit/exit.go b/controllers/infra/capability/exit/exit.go deleted file mode 100644 index 7c279c45b..000000000 --- a/controllers/infra/capability/exit/exit.go +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) 2024 Broadcom. All Rights Reserved. -// Broadcom Confidential. The term "Broadcom" refers to Broadcom Inc. -// and/or its subsidiaries. - -package exit - -import ( - "os" -) - -// Exit is used in testing to assert the capability controller exits the process -// when the capabilities have changed. -var Exit func() - -func init() { - Exit = func() { os.Exit(1) } -} diff --git a/main.go b/main.go index aa3081a3b..4fa2f462a 100644 --- a/main.go +++ b/main.go @@ -34,6 +34,7 @@ import ( pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config" "github.com/vmware-tanzu/vm-operator/pkg/config/capabilities" pkgctx "github.com/vmware-tanzu/vm-operator/pkg/context" + pkgexit "github.com/vmware-tanzu/vm-operator/pkg/exit" pkgmgr "github.com/vmware-tanzu/vm-operator/pkg/manager" pkgmgrinit "github.com/vmware-tanzu/vm-operator/pkg/manager/init" "github.com/vmware-tanzu/vm-operator/pkg/mem" @@ -72,6 +73,8 @@ func main() { "buildtype", pkg.BuildType, "commit", pkg.BuildCommit) + initExitFn() + initContext() initFlags() @@ -128,6 +131,10 @@ func initMemStats() { metrics.Registry.MustRegister) } +func initExitFn() { + pkgexit.ExitFn = func(_ time.Duration) { os.Exit(1) } +} + func initContext() { ctx = pkgcfg.WithConfig(defaultConfig) ctx = cource.WithContext(ctx) diff --git a/pkg/config/config.go b/pkg/config/config.go index e411e65f5..1cf5991e3 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -106,6 +106,14 @@ type Config struct { // such as passwords. Defaults to false. LogSensitiveData bool + // ExitDelay may be set to a time.Duration value and is the maximum delay + // used when exiting the pod due to an event that requires the pod be + // restarted, ex. the capabilities have changed. The exit will be delayed + // by some duration between 0-ExitDelay. + // + // Defaults to 3s. + ExitDelay time.Duration + // AsyncSignalEnabled may be set to false to disable the vm-watcher service // used to reconcile VirtualMachine objects if their backend state has // changed. diff --git a/pkg/config/default.go b/pkg/config/default.go index e67f0a548..d1349ab53 100644 --- a/pkg/config/default.go +++ b/pkg/config/default.go @@ -39,6 +39,7 @@ func Default() Config { LeaderElectionID: defaultPrefix + "controller-manager-runtime", MaxCreateVMsOnProvider: 80, MaxConcurrentReconciles: 1, + ExitDelay: 3 * time.Second, AsyncSignalEnabled: true, AsyncCreateEnabled: true, MemStatsPeriod: 10 * time.Minute, diff --git a/pkg/config/env.go b/pkg/config/env.go index 30a71a5ff..4604552e5 100644 --- a/pkg/config/env.go +++ b/pkg/config/env.go @@ -29,6 +29,7 @@ func FromEnv() Config { setBool(env.VSphereNetworking, &config.VSphereNetworking) setStringSlice(env.PrivilegedUsers, &config.PrivilegedUsers) setBool(env.LogSensitiveData, &config.LogSensitiveData) + setDuration(env.ExitDelay, &config.ExitDelay) setBool(env.AsyncSignalEnabled, &config.AsyncSignalEnabled) setBool(env.AsyncCreateEnabled, &config.AsyncCreateEnabled) setDuration(env.MemStatsPeriod, &config.MemStatsPeriod) diff --git a/pkg/config/env/env.go b/pkg/config/env/env.go index b9206cdc7..689b1e9cd 100644 --- a/pkg/config/env/env.go +++ b/pkg/config/env/env.go @@ -14,6 +14,7 @@ type VarName uint8 const ( _varNameBegin VarName = iota + ExitDelay DefaultVMClassControllerName MaxCreateVMsOnProvider CreateVMRequeueDelay @@ -89,6 +90,8 @@ func All() []VarName { //nolint:gocyclo func (n VarName) String() string { switch n { + case ExitDelay: + return "EXIT_DELAY" case DefaultVMClassControllerName: return "DEFAULT_VM_CLASS_CONTROLLER_NAME" case MaxCreateVMsOnProvider: diff --git a/pkg/config/env_test.go b/pkg/config/env_test.go index b3015984b..56d8a1545 100644 --- a/pkg/config/env_test.go +++ b/pkg/config/env_test.go @@ -108,6 +108,7 @@ var _ = Describe( Expect(os.Setenv("POWERED_ON_VM_HAS_IP_REQUEUE_DELAY", "126h")).To(Succeed()) Expect(os.Setenv("MEM_STATS_PERIOD", "127h")).To(Succeed()) Expect(os.Setenv("SYNC_IMAGE_REQUEUE_DELAY", "128h")).To(Succeed()) + Expect(os.Setenv("EXIT_DELAY", "129h")).To(Succeed()) }) It("Should return a default config overridden by the environment", func() { Expect(config).To(BeComparableTo(pkgcfg.Config{ @@ -162,6 +163,7 @@ var _ = Describe( PoweredOnVMHasIPRequeueDelay: 126 * time.Hour, MemStatsPeriod: 127 * time.Hour, SyncImageRequeueDelay: 128 * time.Hour, + ExitDelay: 129 * time.Hour, })) }) }) diff --git a/pkg/exit/exit.go b/pkg/exit/exit.go new file mode 100644 index 000000000..3a975e846 --- /dev/null +++ b/pkg/exit/exit.go @@ -0,0 +1,53 @@ +// Copyright (c) 2024 Broadcom. All Rights Reserved. +// Broadcom Confidential. The term "Broadcom" refers to Broadcom Inc. +// and/or its subsidiaries. + +package exit + +import ( + "context" + "math/rand/v2" + "time" + + "github.com/go-logr/logr" + + pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config" +) + +// ExitFn is used in testing to assert the Exit() function is called. +// The default value is a no-op. +var ExitFn func(exitDelay time.Duration) + +func init() { + ExitFn = func(_ time.Duration) {} +} + +// Exit will exit the pod immediately if it is not the leader, otherwise if +// the pod is the leader or leader election is not enabled, the pod will exit +// after a brief delay. +func Exit( + ctx context.Context, + reason string, + elected <-chan struct{}) { + + var exitDelay time.Duration + select { + case <-elected: + // When the leader or when there is no leader election, delay the + // exit up to some random amount of time. + n := rand.Float64() //nolint:gosec + exitDelay = time.Duration(n * float64(pkgcfg.FromContext(ctx).ExitDelay)) + default: + // When not the leader, exit immediately. This should allow one of + // the non-leader pods to come back online to become the leader when + // the leader pod exits. + } + if exitDelay > 0 { + time.Sleep(exitDelay) + } + + logr.FromContextOrDiscard(ctx). + Info("Exiting pod", "exitDelay", exitDelay, "reason", reason) + + ExitFn(exitDelay) +} diff --git a/pkg/exit/exit_suite_test.go b/pkg/exit/exit_suite_test.go new file mode 100644 index 000000000..b3df9034a --- /dev/null +++ b/pkg/exit/exit_suite_test.go @@ -0,0 +1,17 @@ +// © Broadcom. All Rights Reserved. +// The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. +// SPDX-License-Identifier: Apache-2.0 + +package exit_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestExit(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Exit Suite") +} diff --git a/pkg/exit/exit_test.go b/pkg/exit/exit_test.go new file mode 100644 index 000000000..f887200a8 --- /dev/null +++ b/pkg/exit/exit_test.go @@ -0,0 +1,80 @@ +// © Broadcom. All Rights Reserved. +// The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. +// SPDX-License-Identifier: Apache-2.0 + +package exit_test + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config" + pkgexit "github.com/vmware-tanzu/vm-operator/pkg/exit" +) + +var _ = DescribeTable("Exit", Ordered, MustPassRepeatedly(5), + func(exitDelay time.Duration, leaderOrNoElection bool) { + + uniqueExitDelayValues := map[time.Duration]struct{}{} + + fn := func() { + var ( + actualExitDelay time.Duration + + ctx = pkgcfg.WithConfig(pkgcfg.Config{ExitDelay: exitDelay}) + done = make(chan struct{}) + elected = make(chan struct{}) + ) + + pkgexit.ExitFn = func(d time.Duration) { + actualExitDelay = d + uniqueExitDelayValues[d] = struct{}{} + close(done) + } + + if leaderOrNoElection { + close(elected) + } + + pkgexit.Exit(ctx, "", elected) + <-done + + if leaderOrNoElection { + Expect(actualExitDelay).ToNot(BeZero()) + Expect(actualExitDelay).To(BeNumerically("<=", exitDelay)) + } else { + Expect(actualExitDelay).To(BeZero()) + } + } + + n := 1 + if leaderOrNoElection { + // If the test is for a leader or when leader election is not + // configured, then run the test fn N times and assert there are + // N unique exitDelay values. + n = 10 + } + + for i := 0; i < n; i++ { + fn() + } + Expect(uniqueExitDelayValues).To(HaveLen(n)) + }, + Entry( + "leader election is not configured", + time.Millisecond*100, + true, + ), + Entry( + "leader election is configured and pod is the leader", + time.Millisecond*100, + true, + ), + Entry( + "leader election is configured and pod is not the leader", + time.Millisecond*100, + false, + ), +)