Skip to content

Commit 535616d

Browse files
authored
feat(SliceGwReconciler): Add PodDisruptionBudget logic to SliceGwReconciler (#308)
2 parents ffc9890 + bd0283b commit 535616d

File tree

5 files changed

+359
-0
lines changed

5 files changed

+359
-0
lines changed

config/rbac/role.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,14 @@ rules:
289289
- patch
290290
- update
291291
- watch
292+
- apiGroups:
293+
- policy
294+
resources:
295+
- poddisruptionbudgets
296+
verbs:
297+
- create
298+
- delete
299+
- list
292300
- apiGroups:
293301
- rbac.authorization.k8s.io
294302
resources:
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package slicegateway
2+
3+
import (
4+
"context"
5+
"fmt"
6+
7+
"github.com/kubeslice/worker-operator/controllers"
8+
webhook "github.com/kubeslice/worker-operator/pkg/webhook/pod"
9+
policyv1 "k8s.io/api/policy/v1"
10+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11+
"k8s.io/apimachinery/pkg/util/intstr"
12+
"sigs.k8s.io/controller-runtime/pkg/client"
13+
)
14+
15+
// Default minAvailable value in PodDisruptionBudget
16+
var DefaultMinAvailablePodsInPDB = intstr.FromInt(1)
17+
18+
// constructPodDisruptionBudget creates the PodDisruptionBudget's manifest with labels matching the slice gateway pods.
19+
func constructPodDisruptionBudget(sliceName, sliceGwName string, minAvailable intstr.IntOrString) *policyv1.PodDisruptionBudget {
20+
return &policyv1.PodDisruptionBudget{
21+
ObjectMeta: metav1.ObjectMeta{
22+
Name: fmt.Sprintf("%s-pdb", sliceGwName),
23+
Namespace: controllers.ControlPlaneNamespace,
24+
Labels: map[string]string{
25+
controllers.ApplicationNamespaceSelectorLabelKey: sliceName,
26+
controllers.SliceGatewaySelectorLabelKey: sliceGwName,
27+
},
28+
},
29+
Spec: policyv1.PodDisruptionBudgetSpec{
30+
MinAvailable: &minAvailable,
31+
Selector: &metav1.LabelSelector{
32+
MatchLabels: map[string]string{
33+
controllers.ApplicationNamespaceSelectorLabelKey: sliceName,
34+
webhook.PodInjectLabelKey: "slicegateway",
35+
controllers.SliceGatewaySelectorLabelKey: sliceGwName,
36+
},
37+
},
38+
},
39+
}
40+
}
41+
42+
// listPodDisruptionBudgetForSliceGateway lists the PodDisruptionBudget objects that match the slice gateway pods.
43+
func listPodDisruptionBudgetForSliceGateway(ctx context.Context, kubeClient client.Client,
44+
sliceName, sliceGwName string) ([]policyv1.PodDisruptionBudget, error) {
45+
// Options for listing the PDBs that match the slice and slice gateway
46+
listOpts := []client.ListOption{
47+
client.MatchingLabels(map[string]string{
48+
controllers.ApplicationNamespaceSelectorLabelKey: sliceName,
49+
controllers.SliceGatewaySelectorLabelKey: sliceGwName,
50+
}),
51+
client.InNamespace(controllers.ControlPlaneNamespace),
52+
}
53+
54+
// List PDBs from cluster that match the slice and slice gateway
55+
pdbList := policyv1.PodDisruptionBudgetList{}
56+
if err := kubeClient.List(ctx, &pdbList, listOpts...); err != nil {
57+
return nil, err
58+
}
59+
60+
return pdbList.Items, nil
61+
}

controllers/slicegateway/reconciler.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
webhook "github.com/kubeslice/worker-operator/pkg/webhook/pod"
3030
appsv1 "k8s.io/api/apps/v1"
3131
corev1 "k8s.io/api/core/v1"
32+
policyv1 "k8s.io/api/policy/v1"
3233
"k8s.io/apimachinery/pkg/api/errors"
3334
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3435
"k8s.io/apimachinery/pkg/runtime"
@@ -78,6 +79,7 @@ type SliceGwReconciler struct {
7879
//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch
7980
//+kubebuilder:rbac:groups=core,resources=endpoints,verbs=get;list;watch;create;update;patch;delete
8081
//+kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;
82+
//+kubebuilder:rbac:groups=policy,resources=poddisruptionbudgets,verbs=list;create;delete
8183

8284
func (r *SliceGwReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
8385
var sliceGwNodePorts []int
@@ -490,6 +492,7 @@ func (r *SliceGwReconciler) SetupWithManager(mgr ctrl.Manager) error {
490492
For(&kubeslicev1beta1.SliceGateway{}).
491493
Owns(&appsv1.Deployment{}).
492494
Owns(&corev1.Service{}).
495+
Owns(&policyv1.PodDisruptionBudget{}).
493496
Watches(
494497
&corev1.Pod{},
495498
handler.EnqueueRequestsFromMapFunc(r.findSliceGwObjectsToReconcile),

controllers/slicegateway/slicegateway.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,6 +1375,18 @@ func (r *SliceGwReconciler) ReconcileGatewayDeployments(ctx context.Context, sli
13751375
}
13761376
}
13771377

1378+
// Create PodDisruptionBudget for slice gateway's pod to at least have 1 instance of pods on each worker
1379+
// when disruption has occurred.
1380+
//
1381+
// Note: This should run an attempt to create PDB regardless of whether current reconciliation creating deployments
1382+
// as the request could've been requeued due to failure at the creation of PDB.
1383+
if err = r.createPodDisruptionBudgetForSliceGatewayPods(ctx, sliceName, sliceGw); err != nil {
1384+
log.Error(err, "Failed to create PodDisruptionBudget for SliceGW deployments",
1385+
"SliceName", sliceName, "SliceGwName", sliceGwName)
1386+
1387+
return ctrl.Result{}, err, true
1388+
}
1389+
13781390
// Reconcile deployment to node port mapping for gw client deployments
13791391
if isClient(sliceGw) {
13801392
for _, deployment := range deployments.Items {
@@ -1534,3 +1546,56 @@ func (r *SliceGwReconciler) ReconcileIntermediateGatewayDeployments(ctx context.
15341546

15351547
return ctrl.Result{}, nil, false
15361548
}
1549+
1550+
// createPodDisruptionBudgetForSliceGatewayPods checks for PodDisruptionBudget objects in the cluster that match the
1551+
// slice gateway pods, and if missing, it creates a PDB with minimum availability of 1 so at least one pod remains in
1552+
// case of a disruption.
1553+
func (r *SliceGwReconciler) createPodDisruptionBudgetForSliceGatewayPods(ctx context.Context,
1554+
sliceName string, sliceGateway *kubeslicev1beta1.SliceGateway) error {
1555+
log := r.Log.WithValues("sliceName", sliceName, "sliceGwName", sliceGateway.Name)
1556+
1557+
// List PDBs in cluster that match the slice gateway pods
1558+
pdbs, err := listPodDisruptionBudgetForSliceGateway(ctx, r.Client, sliceName, sliceGateway.Name)
1559+
if err != nil && !apierrors.IsNotFound(err) {
1560+
log.Error(err, "failed to list PodDisruptionBudgets that match the slice gateway")
1561+
1562+
// When some unexpected error occurred, return the error for requeuing the request
1563+
return err
1564+
}
1565+
1566+
// Check if PDB already exists that matches the current slice gateway
1567+
if len(pdbs) > 0 {
1568+
// PodDisruptionBudget matching the slice gateway already exists. Skipping creation.
1569+
return nil
1570+
}
1571+
1572+
// Create PDB manifest with minimum availability of 1 pod
1573+
pdb := constructPodDisruptionBudget(sliceName, sliceGateway.Name, DefaultMinAvailablePodsInPDB)
1574+
1575+
// Set SliceGateway instance as the owner and controller for PDB
1576+
if err = ctrl.SetControllerReference(sliceGateway, pdb, r.Scheme); err != nil {
1577+
log.Error(err, "Failed to set slice gateway as owner to PodDisruptionBudget",
1578+
"pdb", pdb.Name)
1579+
1580+
return fmt.Errorf("failed to set slice gateway %q as owner to PodDisruptionBudget %q: %v",
1581+
sliceGateway.Name, pdb.Name, err)
1582+
}
1583+
1584+
// Create PDB for slice gateway's pod to have at least 1 pod on each worker when disruption occurs
1585+
if err = r.Create(ctx, pdb); err != nil {
1586+
if apierrors.IsAlreadyExists(err) {
1587+
// PDB is already exists. So, ignoring the current request.
1588+
return nil
1589+
}
1590+
1591+
log.Error(err, "PodDisruptionBudget creation failed", "pdb", pdb.Name)
1592+
1593+
// When any other unexpected error occurred when attempting to create PDB, fail the request
1594+
return fmt.Errorf("failed to create PodDisruptionBudget for SliceGW pods: %v", err)
1595+
}
1596+
1597+
// PDB created successfully
1598+
log.Info("PodDisruptionBudget for slice gateway pods created successfully")
1599+
1600+
return nil
1601+
}

0 commit comments

Comments
 (0)