Skip to content

Commit 81f261a

Browse files
committed
Add provider healthcheck controller
1 parent 07d7ec2 commit 81f261a

File tree

6 files changed

+488
-6
lines changed

6 files changed

+488
-6
lines changed

api/v1alpha2/conditions_consts.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ const (
4444
// CAPIVersionIncompatibilityReason documents that the provider version is incompatible with operator.
4545
CAPIVersionIncompatibilityReason = "CAPIVersionIncompatibility"
4646

47-
// ComponentsFetchErrorReason documents that an error occurred fetching the componets.
47+
// ComponentsFetchErrorReason documents that an error occurred fetching the components.
4848
ComponentsFetchErrorReason = "ComponentsFetchError"
4949

5050
// OldComponentsDeletionErrorReason documents that an error occurred deleting the old components prior to upgrading.
@@ -55,6 +55,9 @@ const (
5555

5656
// InvalidGithubTokenReason documents that the provided github token is invalid.
5757
InvalidGithubTokenReason = "InvalidGithubTokenError"
58+
59+
// NoDeploymentAvailableConditionReason documents that there is no Available condition for provider deployment yet.
60+
NoDeploymentAvailableConditionReason = "NoDeploymentAvailableConditionReason"
5861
)
5962

6063
const (

cmd/main.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
operatorv1alpha1 "sigs.k8s.io/cluster-api-operator/api/v1alpha1"
4444
operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2"
4545
providercontroller "sigs.k8s.io/cluster-api-operator/internal/controller"
46+
healtchcheckcontroller "sigs.k8s.io/cluster-api-operator/internal/controller/healthcheck"
4647
)
4748

4849
var (
@@ -233,6 +234,13 @@ func setupReconcilers(mgr ctrl.Manager) {
233234
setupLog.Error(err, "unable to create controller", "controller", "AddonProvider")
234235
os.Exit(1)
235236
}
237+
238+
if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{
239+
Client: mgr.GetClient(),
240+
}).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil {
241+
setupLog.Error(err, "unable to create controller", "controller", "Healthcheck")
242+
os.Exit(1)
243+
}
236244
}
237245

238246
func setupWebhooks(mgr ctrl.Manager) {

internal/controller/genericprovider_controller.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,7 @@ func patchProvider(ctx context.Context, provider genericprovider.GenericProvider
155155
operatorv1.ProviderInstalledCondition,
156156
}
157157

158-
conditions.SetSummary(provider, conditions.WithConditions(conds...))
159-
160-
options = append(options,
161-
patch.WithOwnedConditions{Conditions: append(conds, clusterv1.ReadyCondition)},
162-
)
158+
options = append(options, patch.WithOwnedConditions{Conditions: conds})
163159

164160
return patchHelper.Patch(ctx, provider.GetObject(), options...)
165161
}
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
/*
2+
Copyright 2023 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package healthcheck
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"time"
23+
24+
appsv1 "k8s.io/api/apps/v1"
25+
corev1 "k8s.io/api/core/v1"
26+
"k8s.io/apimachinery/pkg/runtime"
27+
"k8s.io/apimachinery/pkg/types"
28+
operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2"
29+
"sigs.k8s.io/cluster-api-operator/internal/controller/genericprovider"
30+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
31+
"sigs.k8s.io/cluster-api/util/conditions"
32+
"sigs.k8s.io/cluster-api/util/patch"
33+
ctrl "sigs.k8s.io/controller-runtime"
34+
"sigs.k8s.io/controller-runtime/pkg/builder"
35+
"sigs.k8s.io/controller-runtime/pkg/client"
36+
"sigs.k8s.io/controller-runtime/pkg/controller"
37+
"sigs.k8s.io/controller-runtime/pkg/event"
38+
"sigs.k8s.io/controller-runtime/pkg/predicate"
39+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
40+
)
41+
42+
type ProviderHealthCheckReconciler struct {
43+
Client client.Client
44+
}
45+
46+
const (
47+
providerLabelKey = "cluster.x-k8s.io/provider"
48+
)
49+
50+
func (r *ProviderHealthCheckReconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error {
51+
return ctrl.NewControllerManagedBy(mgr).
52+
For(&appsv1.Deployment{}, builder.WithPredicates(providerDeploymentPredicates())).
53+
WithOptions(options).
54+
Complete(r)
55+
}
56+
57+
func (r *ProviderHealthCheckReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, reterr error) {
58+
log := ctrl.LoggerFrom(ctx)
59+
60+
log.Info("Checking provider health")
61+
62+
result := ctrl.Result{}
63+
64+
deployment := &appsv1.Deployment{}
65+
66+
if err := r.Client.Get(ctx, req.NamespacedName, deployment); err != nil {
67+
// Error reading the object - requeue the request.
68+
return result, err
69+
}
70+
71+
// There should be just one owner reference - to a Provider resource.
72+
if len(deployment.GetOwnerReferences()) != 1 {
73+
return result, fmt.Errorf("incorrect number of owner references for provider deployment %s", req.NamespacedName)
74+
}
75+
76+
deploymentOwner := deployment.GetOwnerReferences()[0]
77+
78+
deploymentAvailableCondition := getDeploymentCondition(deployment.Status, appsv1.DeploymentAvailable)
79+
80+
typedProvider, err := r.getGenericProvider(ctx, deploymentOwner.Kind, deploymentOwner.Name, req.Namespace)
81+
if err != nil {
82+
return result, err
83+
}
84+
85+
// Stop earlier if this provider is not fully installed yet.
86+
if !conditions.IsTrue(typedProvider, operatorv1.ProviderInstalledCondition) {
87+
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
88+
}
89+
90+
// Compare provider's Ready condition with the deployment's Available condition and stop if they already match.
91+
currentReadyCondition := conditions.Get(typedProvider, clusterv1.ReadyCondition)
92+
if currentReadyCondition != nil && deploymentAvailableCondition != nil && currentReadyCondition.Status == deploymentAvailableCondition.Status {
93+
return result, nil
94+
}
95+
96+
// Initialize the patch helper
97+
patchHelper, err := patch.NewHelper(typedProvider.GetObject(), r.Client)
98+
if err != nil {
99+
return result, err
100+
}
101+
102+
if deploymentAvailableCondition != nil {
103+
conditions.Set(typedProvider, &clusterv1.Condition{
104+
Type: clusterv1.ReadyCondition,
105+
Status: deploymentAvailableCondition.Status,
106+
Reason: deploymentAvailableCondition.Reason,
107+
})
108+
} else {
109+
conditions.Set(typedProvider, &clusterv1.Condition{
110+
Type: clusterv1.ReadyCondition,
111+
Status: corev1.ConditionFalse,
112+
Reason: operatorv1.NoDeploymentAvailableConditionReason,
113+
})
114+
}
115+
116+
// Don't requeue immediately if the deployment is not ready, but rather wait 5 seconds.
117+
if conditions.IsFalse(typedProvider, clusterv1.ReadyCondition) {
118+
result = ctrl.Result{RequeueAfter: 5 * time.Second}
119+
}
120+
121+
options := patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{clusterv1.ReadyCondition}}
122+
123+
return result, patchHelper.Patch(ctx, typedProvider.GetObject(), options)
124+
}
125+
126+
func (r *ProviderHealthCheckReconciler) getGenericProvider(ctx context.Context, providerKind, providerName, providerNamespace string) (genericprovider.GenericProvider, error) {
127+
switch providerKind {
128+
case "CoreProvider":
129+
provider := &operatorv1.CoreProvider{}
130+
if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil {
131+
return nil, err
132+
}
133+
134+
return &genericprovider.CoreProviderWrapper{CoreProvider: provider}, nil
135+
case "BootstrapProvider":
136+
provider := &operatorv1.BootstrapProvider{}
137+
if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil {
138+
return nil, err
139+
}
140+
141+
return &genericprovider.BootstrapProviderWrapper{BootstrapProvider: provider}, nil
142+
case "ControlPlaneProvider":
143+
provider := &operatorv1.ControlPlaneProvider{}
144+
if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil {
145+
return nil, err
146+
}
147+
148+
return &genericprovider.ControlPlaneProviderWrapper{ControlPlaneProvider: provider}, nil
149+
case "InfrastructureProvider":
150+
provider := &operatorv1.InfrastructureProvider{}
151+
if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil {
152+
return nil, err
153+
}
154+
155+
return &genericprovider.InfrastructureProviderWrapper{InfrastructureProvider: provider}, nil
156+
case "AddonProvider":
157+
provider := &operatorv1.AddonProvider{}
158+
if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil {
159+
return nil, err
160+
}
161+
162+
return &genericprovider.AddonProviderWrapper{AddonProvider: provider}, nil
163+
default:
164+
return nil, fmt.Errorf("failed to cast interface for type: %s", providerKind)
165+
}
166+
}
167+
168+
// getDeploymentCondition returns the deployment condition with the provided type.
169+
func getDeploymentCondition(status appsv1.DeploymentStatus, condType appsv1.DeploymentConditionType) *appsv1.DeploymentCondition {
170+
for i := range status.Conditions {
171+
c := status.Conditions[i]
172+
if c.Type == condType {
173+
return &c
174+
}
175+
}
176+
177+
return nil
178+
}
179+
180+
func providerDeploymentPredicates() predicate.Funcs {
181+
isProviderDeployment := func(obj runtime.Object) bool {
182+
clusterOperator, ok := obj.(*appsv1.Deployment)
183+
if !ok {
184+
panic("expected to get an of object of type appsv1.Deployment")
185+
}
186+
187+
_, found := clusterOperator.GetLabels()[providerLabelKey]
188+
189+
return found
190+
}
191+
192+
return predicate.Funcs{
193+
CreateFunc: func(e event.CreateEvent) bool { return false },
194+
UpdateFunc: func(e event.UpdateEvent) bool { return isProviderDeployment(e.ObjectNew) },
195+
GenericFunc: func(e event.GenericEvent) bool { return false },
196+
DeleteFunc: func(e event.DeleteEvent) bool { return false },
197+
}
198+
}

0 commit comments

Comments
 (0)