Skip to content

Commit

Permalink
fix(RELEASE-1277): release controller crashes (#655)
Browse files Browse the repository at this point in the history
this PR tries to fix/decrease the daily crashes of the release
controller by filtering objects to cache and increasing the
lease renew time.

Signed-off-by: Leandro Mendes <[email protected]>
  • Loading branch information
theflockers authored Jan 31, 2025
1 parent 6edf445 commit 6f93a81
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 5 deletions.
3 changes: 3 additions & 0 deletions config/default/manager_auth_proxy_patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,6 @@ spec:
- "--health-probe-bind-address=:8081"
- "--metrics-bind-address=127.0.0.1:8080"
- "--leader-elect"
- "--lease-duration=60s"
- "--leader-renew-deadline=30s"
- "--leader-elector-retry-period=10s"
3 changes: 3 additions & 0 deletions controllers/release/adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,7 @@ func (a *adapter) createFinalPipelineRun(releasePlan *v1alpha1.ReleasePlan, snap
WithLabels(map[string]string{
metadata.ApplicationNameLabel: releasePlan.Spec.Application,
metadata.PipelinesTypeLabel: metadata.FinalPipelineType,
metadata.ServiceNameLabel: metadata.ServiceName,
metadata.ReleaseNameLabel: a.release.Name,
metadata.ReleaseNamespaceLabel: a.release.Namespace,
metadata.ReleaseSnapshotLabel: a.release.Spec.Snapshot,
Expand Down Expand Up @@ -578,6 +579,7 @@ func (a *adapter) createManagedPipelineRun(resources *loader.ProcessingResources
WithLabels(map[string]string{
metadata.ApplicationNameLabel: resources.ReleasePlan.Spec.Application,
metadata.PipelinesTypeLabel: metadata.ManagedPipelineType,
metadata.ServiceNameLabel: metadata.ServiceName,
metadata.ReleaseNameLabel: a.release.Name,
metadata.ReleaseNamespaceLabel: a.release.Namespace,
metadata.ReleaseSnapshotLabel: a.release.Spec.Snapshot,
Expand Down Expand Up @@ -619,6 +621,7 @@ func (a *adapter) createTenantPipelineRun(releasePlan *v1alpha1.ReleasePlan, sna
WithLabels(map[string]string{
metadata.ApplicationNameLabel: releasePlan.Spec.Application,
metadata.PipelinesTypeLabel: metadata.TenantPipelineType,
metadata.ServiceNameLabel: metadata.ServiceName,
metadata.ReleaseNameLabel: a.release.Name,
metadata.ReleaseNamespaceLabel: a.release.Namespace,
metadata.ReleaseSnapshotLabel: a.release.Spec.Snapshot,
Expand Down
45 changes: 41 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@ import (
"crypto/tls"
"flag"
"os"
"time"

"sigs.k8s.io/controller-runtime/pkg/metrics/server"
crwebhook "sigs.k8s.io/controller-runtime/pkg/webhook"

"github.com/konflux-ci/operator-toolkit/controller"
"github.com/konflux-ci/operator-toolkit/webhook"
"github.com/konflux-ci/release-service/api/v1alpha1/webhooks"
"github.com/konflux-ci/release-service/metadata"

"go.uber.org/zap/zapcore"

Expand All @@ -37,10 +39,13 @@ import (
ecapiv1alpha1 "github.com/enterprise-contract/enterprise-contract-controller/api/v1alpha1"
applicationapiv1alpha1 "github.com/redhat-appstudio/application-api/api/v1alpha1"

"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

Expand All @@ -67,16 +72,29 @@ func init() {
}

func main() {
var metricsAddr string
var enableHttp2 bool
var enableLeaderElection bool
var probeAddr string
var (
metricsAddr string
enableHttp2 bool
enableLeaderElection bool
probeAddr string
leaderRenewDeadline time.Duration
leaseDuration time.Duration
leaderElectorRetryPeriod time.Duration
)

flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableHttp2, "enable-http2", false, "Enable HTTP/2 for the metrics and webhook servers.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
flag.DurationVar(&leaderRenewDeadline, "leader-renew-deadline", 10*time.Second,
"Leader RenewDeadline is the duration that the acting controlplane "+
"will retry refreshing leadership before giving up.")
flag.DurationVar(&leaseDuration, "lease-duration", 15*time.Second,
"Lease Duration is the duration that non-leader candidates will wait to force acquire leadership.")
flag.DurationVar(&leaderElectorRetryPeriod, "leader-elector-retry-period", 2*time.Second, "RetryPeriod is the duration the "+
"LeaderElector clients should wait between tries of actions.")
opts := zap.Options{
Development: true,
TimeEncoder: zapcore.ISO8601TimeEncoder,
Expand All @@ -87,9 +105,28 @@ func main() {
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
Cache: cache.Options{
ByObject: map[client.Object]cache.ByObject{
// we want to cache PipelineRuns only created by this operator.
&tektonv1.PipelineRun{}: cache.ByObject{
Label: labels.SelectorFromSet(labels.Set{metadata.ServiceNameLabel: metadata.ServiceName}),
},
// also cache other watched objects, but no filter is required.
&appstudiov1alpha1.Release{}: {},
&appstudiov1alpha1.ReleasePlan{}: {},
&appstudiov1alpha1.ReleasePlanAdmission{}: {},
// objects that the operator does not watch, but are used by it.
&appstudiov1alpha1.ReleaseServiceConfig{}: {},
&applicationapiv1alpha1.Snapshot{}: {},
&applicationapiv1alpha1.Application{}: {},
},
},
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
LeaderElectionID: "f3d4c01a.redhat.com",
RenewDeadline: &leaderRenewDeadline,
LeaseDuration: &leaseDuration,
RetryPeriod: &leaderElectorRetryPeriod,
Metrics: server.Options{
BindAddress: metricsAddr,
},
Expand Down
8 changes: 7 additions & 1 deletion metadata/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@ import "fmt"

// Common constants
const (
// rhtapDomain is the prefix of the application label
// RhtapDomain is the prefix of the application label
RhtapDomain = "appstudio.openshift.io"

// MaxLabelLength is the maximum allowed characters in a label value
MaxLabelLength = 63

// Release service name
ServiceName = "release"
)

// Prefixes used by the release controller package
Expand All @@ -47,6 +50,9 @@ var (
// AutomatedLabel is the label name for marking a Release as automated
AutomatedLabel = fmt.Sprintf("release.%s/automated", RhtapDomain)

// ServiceNameLabel is the label used to specify the service associated with an object
ServiceNameLabel = fmt.Sprintf("%s/%s", RhtapDomain, "service")

// ReleasePlanAdmissionLabel is the ReleasePlan label for the name of the ReleasePlanAdmission to use
ReleasePlanAdmissionLabel = fmt.Sprintf("release.%s/releasePlanAdmission", RhtapDomain)
)
Expand Down

0 comments on commit 6f93a81

Please sign in to comment.