Skip to content

Commit

Permalink
fix(RELEASE-1277): release controller crashes
Browse files Browse the repository at this point in the history
this PR tries to fix/decrease the daily crashes of the release
controller by filtering objects to cache and increasing the
lease renew time.
Signed-off-by: Leandro Mendes <[email protected]>
  • Loading branch information
theflockers committed Jan 27, 2025
1 parent c497639 commit a00e14c
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 36 deletions.
6 changes: 6 additions & 0 deletions config/default/manager_auth_proxy_patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,9 @@ spec:
- "--health-probe-bind-address=:8081"
- "--metrics-bind-address=127.0.0.1:8080"
- "--leader-elect"
- "--lease-duration"
- "60"
- "--leader-renew-deadline"
- "30"
- "--leader-elector-retry-period"
- "10"
3 changes: 3 additions & 0 deletions controllers/release/adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,7 @@ func (a *adapter) createFinalPipelineRun(releasePlan *v1alpha1.ReleasePlan, snap
WithLabels(map[string]string{
metadata.ApplicationNameLabel: releasePlan.Spec.Application,
metadata.PipelinesTypeLabel: metadata.FinalPipelineType,
metadata.ServiceNameLabel: metadata.ServiceName,
metadata.ReleaseNameLabel: a.release.Name,
metadata.ReleaseNamespaceLabel: a.release.Namespace,
metadata.ReleaseSnapshotLabel: a.release.Spec.Snapshot,
Expand Down Expand Up @@ -530,6 +531,7 @@ func (a *adapter) createManagedPipelineRun(resources *loader.ProcessingResources
WithLabels(map[string]string{
metadata.ApplicationNameLabel: resources.ReleasePlan.Spec.Application,
metadata.PipelinesTypeLabel: metadata.ManagedPipelineType,
metadata.ServiceNameLabel: metadata.ServiceName,
metadata.ReleaseNameLabel: a.release.Name,
metadata.ReleaseNamespaceLabel: a.release.Namespace,
metadata.ReleaseSnapshotLabel: a.release.Spec.Snapshot,
Expand Down Expand Up @@ -571,6 +573,7 @@ func (a *adapter) createTenantPipelineRun(releasePlan *v1alpha1.ReleasePlan, sna
WithLabels(map[string]string{
metadata.ApplicationNameLabel: releasePlan.Spec.Application,
metadata.PipelinesTypeLabel: metadata.TenantPipelineType,
metadata.ServiceNameLabel: metadata.ServiceName,
metadata.ReleaseNameLabel: a.release.Name,
metadata.ReleaseNamespaceLabel: a.release.Namespace,
metadata.ReleaseSnapshotLabel: a.release.Spec.Snapshot,
Expand Down
39 changes: 30 additions & 9 deletions controllers/releaseplan/adapter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,17 +98,38 @@ var _ = Describe("ReleasePlan adapter", Ordered, func() {
result, err := adapter.EnsureOwnerReferenceIsSet()
Expect(!result.RequeueRequest && !result.CancelRequest).To(BeTrue())
Expect(err).NotTo(HaveOccurred())
Expect(adapter.releasePlan.OwnerReferences).To(HaveLen(1))
})

boolTrue := true
expectedOwnerReference := metav1.OwnerReference{
Kind: "Application",
APIVersion: "appstudio.redhat.com/v1alpha1",
UID: application.UID,
Name: application.Name,
Controller: &boolTrue,
BlockOwnerDeletion: &boolTrue,
It("should delete the releasePlan if the owner is deleted", func() {
newApplication := &applicationapiv1alpha1.Application{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "application-",
Namespace: "default",
},
}
Expect(adapter.releasePlan.ObjectMeta.OwnerReferences).To(ContainElement(expectedOwnerReference))
Expect(k8sClient.Create(ctx, newApplication)).To(Succeed())

adapter.ctx = toolkit.GetMockedContext(ctx, []toolkit.MockData{
{
ContextKey: loader.ApplicationContextKey,
Resource: newApplication,
},
})

Expect(adapter.releasePlan.OwnerReferences).To(HaveLen(0))
result, err := adapter.EnsureOwnerReferenceIsSet()
Expect(!result.RequeueRequest && !result.CancelRequest).To(BeTrue())
Expect(err).NotTo(HaveOccurred())
Expect(adapter.releasePlan.OwnerReferences).To(HaveLen(1))

Expect(k8sClient.Delete(ctx, newApplication)).To(Succeed())
_, err = adapter.loader.GetReleasePlan(ctx, k8sClient, &v1alpha1.Release{
Spec: v1alpha1.ReleaseSpec{
ReleasePlan: adapter.releasePlan.Name,
},
})
Expect(err).To(HaveOccurred())
})
})

Expand Down
95 changes: 68 additions & 27 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,17 @@ package main
import (
"crypto/tls"
"flag"
"fmt"
"os"
"time"

"sigs.k8s.io/controller-runtime/pkg/metrics/server"
crwebhook "sigs.k8s.io/controller-runtime/pkg/webhook"

"github.com/konflux-ci/operator-toolkit/controller"
"github.com/konflux-ci/operator-toolkit/webhook"
"github.com/konflux-ci/release-service/api/v1alpha1/webhooks"
"github.com/konflux-ci/release-service/metadata"

"go.uber.org/zap/zapcore"

Expand All @@ -37,10 +40,13 @@ import (
ecapiv1alpha1 "github.com/enterprise-contract/enterprise-contract-controller/api/v1alpha1"
applicationapiv1alpha1 "github.com/redhat-appstudio/application-api/api/v1alpha1"

"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

Expand All @@ -56,6 +62,16 @@ var (
setupLog = ctrl.Log.WithName("setup")
)

type ControllerFlags struct {
metricsAddr string
enableHttp2 bool
enableLeaderElection bool
leaderRenewDeadline time.Duration
leaseDuration time.Duration
leaderElectorRetryPeriod time.Duration
probeAddr string
}

func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
utilruntime.Must(appstudiov1alpha1.AddToScheme(scheme))
Expand All @@ -66,38 +82,33 @@ func init() {
//+kubebuilder:scaffold:scheme
}

func main() {
var metricsAddr string
var enableHttp2 bool
var enableLeaderElection bool
var probeAddr string
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableHttp2, "enable-http2", false, "Enable HTTP/2 for the metrics and webhook servers.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
opts := zap.Options{
Development: true,
TimeEncoder: zapcore.ISO8601TimeEncoder,
}
opts.BindFlags(flag.CommandLine)
flag.Parse()
func setupManager() ctrl.Manager {

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
cf := ControllerFlags{}
readControllerFlags(&cf)

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
Cache: cache.Options{
ByObject: map[client.Object]cache.ByObject{
&tektonv1.PipelineRun{}: cache.ByObject{
Label: labels.SelectorFromSet(labels.Set{metadata.ServiceNameLabel: metadata.ServiceName}),
},
},
},
HealthProbeBindAddress: cf.probeAddr,
LeaderElection: cf.enableLeaderElection,
LeaderElectionID: "f3d4c01a.redhat.com",
RenewDeadline: &cf.leaderRenewDeadline,
LeaseDuration: &cf.leaseDuration,
RetryPeriod: &cf.leaderElectorRetryPeriod,
Metrics: server.Options{
BindAddress: metricsAddr,
BindAddress: cf.metricsAddr,
},
WebhookServer: crwebhook.NewServer(crwebhook.Options{
Port: 9443,
TLSOpts: []func(*tls.Config){
func(c *tls.Config) {
if !enableHttp2 {
if !cf.enableHttp2 {
c.NextProtos = []string{"http/1.1"}
}
},
Expand All @@ -110,6 +121,35 @@ func main() {
os.Exit(1)
}

return mgr
}

func readControllerFlags(c *ControllerFlags) {

flag.StringVar(&c.metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&c.probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&c.enableHttp2, "enable-http2", false, "Enable HTTP/2 for the metrics and webhook servers.")
flag.BoolVar(&c.enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
flag.DurationVar(&c.leaderRenewDeadline, "leader-renew-deadline", 10*time.Second,
"Leader RenewDeadline is the duration that the acting controlplane "+
"will retry refreshing leadership before giving up.")
flag.DurationVar(&c.leaseDuration, "lease-duration", 15*time.Second,
"Lease Duration is the duration that non-leader candidates will wait to force acquire leadership.")
flag.DurationVar(&c.leaderElectorRetryPeriod, "leader-elector-retry-period", 2*time.Second, "RetryPeriod is the duration the "+
"LeaderElector clients should wait between tries of actions.")
opts := zap.Options{
Development: true,
TimeEncoder: zapcore.ISO8601TimeEncoder,
}
opts.BindFlags(flag.CommandLine)
flag.Parse()

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
}

func main() {
// Set a default value for the DEFAULT_RELEASE_PVC environment variable
if os.Getenv("DEFAULT_RELEASE_PVC") == "" {
err := os.Setenv("DEFAULT_RELEASE_PVC", "release-pvc")
Expand All @@ -136,16 +176,16 @@ func main() {
os.Exit(1)
}
}

setUpControllers(mgr)
setUpWebhooks(mgr)

err = os.Setenv("ENTERPRISE_CONTRACT_CONFIG_MAP", "enterprise-contract-service/ec-defaults")
err := os.Setenv("ENTERPRISE_CONTRACT_CONFIG_MAP", "enterprise-contract-service/ec-defaults")
if err != nil {
setupLog.Error(err, "unable to setup ENTERPRISE_CONTRACT_CONFIG_MAP environment variable")
os.Exit(1)
}

mgr := setupManager()
setUpControllers(mgr)
setUpWebhooks(mgr)

//+kubebuilder:scaffold:builder

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
Expand All @@ -166,6 +206,7 @@ func main() {

// setUpControllers sets up controllers.
func setUpControllers(mgr ctrl.Manager) {
fmt.Printf("%+v", mgr)
err := controller.SetupControllers(mgr, nil, controllers.EnabledControllers...)
if err != nil {
setupLog.Error(err, "unable to setup controllers")
Expand Down
6 changes: 6 additions & 0 deletions metadata/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ const (

// MaxLabelLength is the maximum allowed characters in a label value
MaxLabelLength = 63

// Release service name
ServiceName = "release"
)

// Labels used by the release api package
Expand All @@ -41,6 +44,9 @@ var (
// AutomatedLabel is the label name for marking a Release as automated
AutomatedLabel = fmt.Sprintf("release.%s/automated", rhtapDomain)

// ServiceNameLabel is the label used to specify the service associated with an object
ServiceNameLabel = fmt.Sprintf("%s/%s", rhtapDomain, "service")

// ReleasePlanAdmissionLabel is the ReleasePlan label for the name of the ReleasePlanAdmission to use
ReleasePlanAdmissionLabel = fmt.Sprintf("release.%s/releasePlanAdmission", rhtapDomain)
)
Expand Down

0 comments on commit a00e14c

Please sign in to comment.