From 7a0ce6578aa46ae6370bc4bf0ead1e1e04b56154 Mon Sep 17 00:00:00 2001 From: iuri aranda Date: Wed, 12 Feb 2025 16:15:36 +0100 Subject: [PATCH 1/3] Scale down cilium-operator before deleting a cluster (only in eni mode) This is an attempt to prevent a race-condition on cluster deletion, where Cilium could leak ENIs, preventing the full removal of the VPC. Towards https://github.com/giantswarm/giantswarm/issues/32350 --- CHANGELOG.md | 4 ++ .../templates/cilium-cleanup/job.yaml | 72 +++++++++++++++++++ .../templates/cilium-cleanup/role.yaml | 22 ++++++ .../templates/cilium-cleanup/rolebinding.yaml | 21 ++++++ .../cilium-cleanup/serviceaccount.yaml | 13 ++++ 5 files changed, 132 insertions(+) create mode 100644 helm/cluster-aws/templates/cilium-cleanup/job.yaml create mode 100644 helm/cluster-aws/templates/cilium-cleanup/role.yaml create mode 100644 helm/cluster-aws/templates/cilium-cleanup/rolebinding.yaml create mode 100644 helm/cluster-aws/templates/cilium-cleanup/serviceaccount.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index a58cfd36..b2b6b59e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- Scale down cilium-operator before deleting a cluster (only in eni mode) + ## [2.6.1] - 2025-02-07 ### Added diff --git a/helm/cluster-aws/templates/cilium-cleanup/job.yaml b/helm/cluster-aws/templates/cilium-cleanup/job.yaml new file mode 100644 index 00000000..0fac8afb --- /dev/null +++ b/helm/cluster-aws/templates/cilium-cleanup/job.yaml @@ -0,0 +1,72 @@ +{{- if eq (required "global.connectivity.cilium.ipamMode is required" .Values.global.connectivity.cilium.ipamMode) "eni" }} +# +# When working in eni mode and ENIs need to be deleted (e.g. on cluster deletion), Cilium first needs to detach +# the ENIs from the EC2 instance, and then delete them. +# If cilium-operator gets killed / removed after it detaches an ENI but before it deletes it, the ENI will be orphaned. +# +# This is a possible scenario on cluster deletion. To prevent it, this pre-delete hook will scale down cilium-operator before deleting. +# This way ENIs remain attached to the EC2 instances and get automatically deleted by AWS upon instance termination. +# +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "resource.default.name" . }}-cilium-cleanup + namespace: {{ .Release.Namespace }} + annotations: + helm.sh/hook: pre-delete + helm.sh/hook-delete-policy: before-hook-creation + helm.sh/hook-weight: "0" + labels: + {{- include "labels.common" . | nindent 4 }} +spec: + template: + metadata: + labels: + {{- include "labels.common" . | nindent 8 }} + spec: + serviceAccountName: {{ include "resource.default.name" . }}-cilium-cleanup + containers: + - name: kubectl + image: {{ include "awsContainerImageRegistry" . }}/giantswarm/kubectl:{{ .Values.cluster.providerIntegration.kubernetesVersion }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + allowPrivilegeEscalation: false + seccompProfile: + type: RuntimeDefault + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + env: + - name: CLUSTER + value: {{ include "resource.default.name" . }} + - name: KUBECONFIG + value: /etc/kubeconfig/value + command: + - /bin/sh + args: + - -c + - | + echo "Scaling down cilium-operator on cluster ${CLUSTER} as it is about to be deleted." + kubectl scale --namespace kube-system deployment cilium-operator --replicas=0 + resources: + requests: + cpu: 10m + memory: 64Mi + limits: + cpu: 100m + memory: 256Mi + volumeMounts: + - name: kubeconfig + mountPath: /etc/kubeconfig + readOnly: true + restartPolicy: Never + volumes: + - name: kubeconfig + secret: + secretName: {{ include "resource.default.name" . }}-kubeconfig + defaultMode: 420 + ttlSecondsAfterFinished: 86400 # 24h +{{- end }} diff --git a/helm/cluster-aws/templates/cilium-cleanup/role.yaml b/helm/cluster-aws/templates/cilium-cleanup/role.yaml new file mode 100644 index 00000000..ffab68a3 --- /dev/null +++ b/helm/cluster-aws/templates/cilium-cleanup/role.yaml @@ -0,0 +1,22 @@ +{{- if eq (required "global.connectivity.cilium.ipamMode is required" .Values.global.connectivity.cilium.ipamMode) "eni" }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "resource.default.name" . }}-cilium-cleanup + namespace: {{ .Release.Namespace }} + annotations: + helm.sh/hook: pre-delete + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded,hook-failed + helm.sh/hook-weight: "-1" + labels: + {{- include "labels.common" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - secrets + verbs: + - get + resourceNames: + - {{ include "resource.default.name" . }}-kubeconfig +{{- end }} diff --git a/helm/cluster-aws/templates/cilium-cleanup/rolebinding.yaml b/helm/cluster-aws/templates/cilium-cleanup/rolebinding.yaml new file mode 100644 index 00000000..11114425 --- /dev/null +++ b/helm/cluster-aws/templates/cilium-cleanup/rolebinding.yaml @@ -0,0 +1,21 @@ +{{- if eq (required "global.connectivity.cilium.ipamMode is required" .Values.global.connectivity.cilium.ipamMode) "eni" }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "resource.default.name" . }}-cilium-cleanup + namespace: {{ .Release.Namespace }} + annotations: + helm.sh/hook: pre-delete + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded,hook-failed + helm.sh/hook-weight: "-1" + labels: + {{- include "labels.common" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "resource.default.name" . }}-cilium-cleanup +subjects: +- kind: ServiceAccount + name: {{ include "resource.default.name" . }}-cilium-cleanup + namespace: {{ .Release.Namespace }} +{{- end }} diff --git a/helm/cluster-aws/templates/cilium-cleanup/serviceaccount.yaml b/helm/cluster-aws/templates/cilium-cleanup/serviceaccount.yaml new file mode 100644 index 00000000..c009cd7d --- /dev/null +++ b/helm/cluster-aws/templates/cilium-cleanup/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if eq (required "global.connectivity.cilium.ipamMode is required" .Values.global.connectivity.cilium.ipamMode) "eni" }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "resource.default.name" . }}-cilium-cleanup + namespace: {{ .Release.Namespace }} + annotations: + helm.sh/hook: pre-delete + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded,hook-failed + helm.sh/hook-weight: "-1" + labels: + {{- include "labels.common" . | nindent 4 }} +{{- end }} From 23cc5143473a5ac7e8caf15d6310f484af4c2ddf Mon Sep 17 00:00:00 2001 From: iuri Date: Wed, 12 Feb 2025 18:28:50 +0100 Subject: [PATCH 2/3] Update helm/cluster-aws/templates/cilium-cleanup/job.yaml Co-authored-by: Jose Armesto --- helm/cluster-aws/templates/cilium-cleanup/job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/cluster-aws/templates/cilium-cleanup/job.yaml b/helm/cluster-aws/templates/cilium-cleanup/job.yaml index 0fac8afb..c8c57c0e 100644 --- a/helm/cluster-aws/templates/cilium-cleanup/job.yaml +++ b/helm/cluster-aws/templates/cilium-cleanup/job.yaml @@ -14,7 +14,7 @@ metadata: namespace: {{ .Release.Namespace }} annotations: helm.sh/hook: pre-delete - helm.sh/hook-delete-policy: before-hook-creation + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded helm.sh/hook-weight: "0" labels: {{- include "labels.common" . | nindent 4 }} From 5b21cb5718790551fc225fb6196a482313353204 Mon Sep 17 00:00:00 2001 From: iuri aranda Date: Wed, 12 Feb 2025 18:30:58 +0100 Subject: [PATCH 3/3] Make the job fail gracefully in case the cluster is in half-deleted state e.g. - if the kubeconfig secret does not exist - if the workload cluster API is inaccessible --- .../templates/cilium-cleanup/job.yaml | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/helm/cluster-aws/templates/cilium-cleanup/job.yaml b/helm/cluster-aws/templates/cilium-cleanup/job.yaml index c8c57c0e..73b6370e 100644 --- a/helm/cluster-aws/templates/cilium-cleanup/job.yaml +++ b/helm/cluster-aws/templates/cilium-cleanup/job.yaml @@ -38,19 +38,27 @@ spec: capabilities: drop: - ALL - readOnlyRootFilesystem: true env: - name: CLUSTER value: {{ include "resource.default.name" . }} - - name: KUBECONFIG - value: /etc/kubeconfig/value + - name: NAMESPACE + value: {{ .Release.Namespace }} command: - /bin/sh args: - -c - | - echo "Scaling down cilium-operator on cluster ${CLUSTER} as it is about to be deleted." - kubectl scale --namespace kube-system deployment cilium-operator --replicas=0 + echo "Fetching ${CLUSTER} kubeconfig..." + kubectl get secret -n ${NAMESPACE} ${CLUSTER}-kubeconfig -o jsonpath='{.data.value}' | base64 -d > /tmp/${CLUSTER}-kubeconfig + if [ ! -f /tmp/${CLUSTER}-kubeconfig ]; then + echo "Failed to fetch kubeconfig for cluster ${CLUSTER}. Assuming it is already deleted." + exit 0 + fi + + echo "Scaling down cilium-operator on cluster ${CLUSTER} as it is about to be deleted..." + kubectl --kubeconfig /tmp/${CLUSTER}-kubeconfig scale --namespace kube-system deployment cilium-operator --replicas=0 + + exit 0 # Ignore previous exit code, we don't want to block app deletion resources: requests: cpu: 10m @@ -58,15 +66,6 @@ spec: limits: cpu: 100m memory: 256Mi - volumeMounts: - - name: kubeconfig - mountPath: /etc/kubeconfig - readOnly: true restartPolicy: Never - volumes: - - name: kubeconfig - secret: - secretName: {{ include "resource.default.name" . }}-kubeconfig - defaultMode: 420 ttlSecondsAfterFinished: 86400 # 24h {{- end }}