Skip to content

Commit 09fec27

Browse files
committed
ci: Add workload cluster creation and kubectl verification to smoke test
This ensures the Cluster API Operator can successfully create and manage a functional Kubernetes cluster where kubectl commands work properly. Signed-off-by: kahirokunn <[email protected]>
1 parent c697f7f commit 09fec27

File tree

1 file changed

+210
-4
lines changed

1 file changed

+210
-4
lines changed

.github/workflows/smoke-test.yaml

Lines changed: 210 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,19 @@ jobs:
5959
run: |
6060
chmod +x ./hack/ensure-kind.sh
6161
./hack/ensure-kind.sh
62-
kind create cluster --name capi-operator-smoke-test --wait 5m
62+
63+
# Create kind cluster with Docker socket mount for CAPD
64+
cat <<EOF > /tmp/kind-config.yaml
65+
kind: Cluster
66+
apiVersion: kind.x-k8s.io/v1alpha4
67+
nodes:
68+
- role: control-plane
69+
extraMounts:
70+
- hostPath: /var/run/docker.sock
71+
containerPath: /var/run/docker.sock
72+
EOF
73+
74+
kind create cluster --name capi-operator-smoke-test --config /tmp/kind-config.yaml --wait 5m
6375
kubectl cluster-info --context kind-capi-operator-smoke-test
6476
6577
- name: Load Docker image to kind
@@ -115,6 +127,12 @@ jobs:
115127
core:
116128
cluster-api:
117129
namespace: capi-system
130+
bootstrap:
131+
kubeadm:
132+
namespace: capi-kubeadm-bootstrap-system
133+
controlPlane:
134+
kubeadm:
135+
namespace: capi-kubeadm-control-plane-system
118136
infrastructure:
119137
docker:
120138
namespace: capd-system
@@ -133,20 +151,35 @@ jobs:
133151
134152
helm install capi-providers "$PROVIDERS_CHART_PACKAGE" \
135153
-f /tmp/providers-values.yaml \
136-
--wait
154+
--wait \
155+
--timeout 3m
137156
138157
- name: Wait for providers to be ready
139158
run: |
140-
echo "Waiting for Core Provider to be ready..."
159+
echo "=== Waiting for Core Provider to be ready ==="
141160
kubectl wait --for=condition=Ready --timeout=300s -n capi-system coreprovider/cluster-api || true
142161
143-
echo "Waiting for Infrastructure Provider to be ready..."
162+
echo -e "\n=== Waiting for Bootstrap Provider to be ready ==="
163+
kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-bootstrap-system bootstrapprovider/kubeadm || true
164+
165+
echo -e "\n=== Waiting for Control Plane Provider to be ready ==="
166+
kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-control-plane-system controlplaneprovider/kubeadm || true
167+
168+
echo -e "\n=== Waiting for Infrastructure Provider to be ready ==="
144169
kubectl wait --for=condition=Ready --timeout=300s -n capd-system infrastructureprovider/docker || true
145170
146171
# Additional wait for deployments
172+
echo -e "\n=== Waiting for provider deployments ==="
147173
kubectl wait --for=condition=Available --timeout=300s -n capi-system deployment/capi-controller-manager || true
174+
kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager || true
175+
kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager || true
148176
kubectl wait --for=condition=Available --timeout=300s -n capd-system deployment/capd-controller-manager || true
149177
178+
# Wait for webhooks to be ready
179+
echo -e "\n=== Waiting for webhook services ==="
180+
kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-bootstrap-system service/capi-kubeadm-bootstrap-webhook-service || true
181+
kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-control-plane-system service/capi-kubeadm-control-plane-webhook-service || true
182+
150183
- name: Verify installation
151184
run: |
152185
echo "=== Cluster API Operator Status ==="
@@ -156,13 +189,27 @@ jobs:
156189
kubectl get coreprovider -A -o wide
157190
kubectl describe coreprovider -n capi-system cluster-api || true
158191
192+
echo -e "\n=== Bootstrap Provider Status ==="
193+
kubectl get bootstrapprovider -A -o wide
194+
kubectl describe bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm || true
195+
196+
echo -e "\n=== Control Plane Provider Status ==="
197+
kubectl get controlplaneprovider -A -o wide
198+
kubectl describe controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm || true
199+
159200
echo -e "\n=== Infrastructure Provider Status ==="
160201
kubectl get infrastructureprovider -A -o wide
161202
kubectl describe infrastructureprovider -n capd-system docker || true
162203
163204
echo -e "\n=== All Pods ==="
164205
kubectl get pods -A | grep -E "(capi-|capd-)"
165206
207+
echo -e "\n=== Webhook Services ==="
208+
kubectl get svc -A | grep webhook
209+
210+
echo -e "\n=== Webhook Certificates ==="
211+
kubectl get certificate,certificaterequest -A | grep -E "(capi-|capd-)"
212+
166213
echo -e "\n=== CRDs ==="
167214
kubectl get crds | grep -E "(cluster.x-k8s.io|operator.cluster.x-k8s.io)"
168215
@@ -176,6 +223,22 @@ jobs:
176223
exit 1
177224
fi
178225
226+
# Check if bootstrap provider is ready
227+
BOOTSTRAP_READY=$(kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
228+
if [ "$BOOTSTRAP_READY" != "True" ]; then
229+
echo "Bootstrap provider is not ready"
230+
kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o yaml
231+
exit 1
232+
fi
233+
234+
# Check if control plane provider is ready
235+
CONTROLPLANE_READY=$(kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
236+
if [ "$CONTROLPLANE_READY" != "True" ]; then
237+
echo "Control plane provider is not ready"
238+
kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o yaml
239+
exit 1
240+
fi
241+
179242
# Check if infrastructure provider is ready
180243
INFRA_READY=$(kubectl get infrastructureprovider -n capd-system docker -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
181244
if [ "$INFRA_READY" != "True" ]; then
@@ -186,6 +249,107 @@ jobs:
186249
187250
echo "All providers are ready!"
188251
252+
# Additional webhook readiness check
253+
echo -e "\n=== Checking webhook endpoints ==="
254+
kubectl get endpoints -A | grep webhook
255+
256+
- name: Download cluster manifest
257+
run: |
258+
echo "=== Downloading cluster manifest ==="
259+
curl -L https://raw.githubusercontent.com/kubernetes-sigs/cluster-api/refs/heads/main/test/infrastructure/docker/examples/simple-cluster.yaml -o simple-cluster.yaml
260+
261+
# Show the manifest for debugging
262+
echo "=== Cluster manifest ==="
263+
cat simple-cluster.yaml
264+
265+
- name: Create workload cluster
266+
run: |
267+
echo "=== Pre-creation diagnostics ==="
268+
echo "Checking webhook services..."
269+
kubectl get svc -A | grep webhook
270+
271+
echo -e "\nChecking webhook endpoints..."
272+
kubectl get endpoints -A | grep webhook
273+
274+
echo -e "\nChecking webhook certificates..."
275+
kubectl get secret -A | grep webhook-service-cert
276+
277+
echo -e "\n=== Creating workload cluster ==="
278+
kubectl apply -f simple-cluster.yaml
279+
280+
echo -e "\n=== Cluster resources created ==="
281+
kubectl get cluster,dockercluster,kubeadmcontrolplane,machinedeployment -A
282+
283+
- name: Wait for cluster to be ready
284+
run: |
285+
echo "=== Waiting for cluster to be provisioned ==="
286+
kubectl wait --for=condition=Ready --timeout=600s cluster/capi-quickstart
287+
288+
echo "=== Waiting for control plane to be initialized ==="
289+
kubectl wait --for=condition=Ready --timeout=600s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=capi-quickstart
290+
291+
echo "=== Waiting for first control plane node ==="
292+
kubectl wait --for=jsonpath='{.status.readyReplicas}'=1 --timeout=600s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=capi-quickstart
293+
294+
echo "=== Cluster status ==="
295+
kubectl get cluster capi-quickstart -o wide
296+
kubectl get machines -l cluster.x-k8s.io/cluster-name=capi-quickstart
297+
298+
- name: Get workload cluster kubeconfig
299+
run: |
300+
echo "=== Getting workload cluster kubeconfig ==="
301+
# For Docker provider, use kind to get kubeconfig
302+
kind get kubeconfig --name capi-quickstart > capi-quickstart.kubeconfig
303+
304+
echo "=== Testing kubeconfig ==="
305+
kubectl --kubeconfig=capi-quickstart.kubeconfig cluster-info
306+
307+
- name: Verify kubectl commands work on workload cluster
308+
run: |
309+
echo "=== Testing kubectl get po on workload cluster ==="
310+
kubectl --kubeconfig=capi-quickstart.kubeconfig get po -A
311+
312+
echo -e "\n=== Testing kubectl get nodes ==="
313+
kubectl --kubeconfig=capi-quickstart.kubeconfig get nodes
314+
315+
echo -e "\n=== Waiting for system pods to be ready ==="
316+
kubectl --kubeconfig=capi-quickstart.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l k8s-app=kube-proxy
317+
kubectl --kubeconfig=capi-quickstart.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-apiserver
318+
kubectl --kubeconfig=capi-quickstart.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-controller-manager
319+
kubectl --kubeconfig=capi-quickstart.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-scheduler
320+
321+
- name: Deploy and test sample application
322+
run: |
323+
echo "=== Deploying nginx test application ==="
324+
kubectl --kubeconfig=capi-quickstart.kubeconfig create deployment nginx --image=nginx:alpine --replicas=2
325+
326+
echo "=== Waiting for deployment to be ready ==="
327+
kubectl --kubeconfig=capi-quickstart.kubeconfig wait --for=condition=Available --timeout=120s deployment/nginx
328+
329+
echo "=== Verifying pods are running ==="
330+
kubectl --kubeconfig=capi-quickstart.kubeconfig get po -l app=nginx
331+
332+
echo "=== Creating a service ==="
333+
kubectl --kubeconfig=capi-quickstart.kubeconfig expose deployment nginx --port=80 --type=ClusterIP
334+
335+
echo "=== Verifying service ==="
336+
kubectl --kubeconfig=capi-quickstart.kubeconfig get svc nginx
337+
338+
- name: Verify cluster functionality
339+
run: |
340+
echo "=== Final cluster verification ==="
341+
echo "Cluster nodes:"
342+
kubectl --kubeconfig=capi-quickstart.kubeconfig get nodes -o wide
343+
344+
echo -e "\nAll pods:"
345+
kubectl --kubeconfig=capi-quickstart.kubeconfig get po -A
346+
347+
echo -e "\nAll services:"
348+
kubectl --kubeconfig=capi-quickstart.kubeconfig get svc -A
349+
350+
echo -e "\nCluster info:"
351+
kubectl --kubeconfig=capi-quickstart.kubeconfig cluster-info
352+
189353
- name: Collect debug information on failure
190354
if: failure()
191355
run: |
@@ -198,9 +362,36 @@ jobs:
198362
echo -e "\n=== Core Provider Logs ==="
199363
kubectl logs -n capi-system deployment/capi-controller-manager --tail=100 || true
200364
365+
echo -e "\n=== Bootstrap Provider Logs ==="
366+
kubectl logs -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager --tail=100 || true
367+
368+
echo -e "\n=== Control Plane Provider Logs ==="
369+
kubectl logs -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager --tail=100 || true
370+
201371
echo -e "\n=== Infrastructure Provider Logs ==="
202372
kubectl logs -n capd-system deployment/capd-controller-manager --tail=100 || true
203373
374+
echo -e "\n=== Webhook Services and Endpoints ==="
375+
kubectl get svc,endpoints -A | grep webhook || true
376+
377+
echo -e "\n=== Webhook Certificates ==="
378+
kubectl get certificate,certificaterequest,secret -A | grep -E "(webhook|serving-cert)" || true
379+
380+
echo -e "\n=== Cluster Resources ==="
381+
kubectl get cluster,dockercluster,kubeadmcontrolplane,machine,dockermachine -A -o wide || true
382+
383+
echo -e "\n=== Describe Cluster ==="
384+
kubectl describe cluster capi-quickstart || true
385+
386+
echo -e "\n=== Describe Machines ==="
387+
kubectl describe machines -l cluster.x-k8s.io/cluster-name=capi-quickstart || true
388+
389+
echo -e "\n=== Docker Containers ==="
390+
docker ps -a | grep -E "(smoke-test|kind)" || true
391+
392+
echo -e "\n=== Kind Clusters ==="
393+
kind get clusters || true
394+
204395
echo -e "\n=== Describe Failed Pods ==="
205396
kubectl get pods -A | grep -v Running | grep -v Completed | tail -n +2 | while read namespace name ready status restarts age; do
206397
echo "Describing pod $name in namespace $namespace"
@@ -211,4 +402,19 @@ jobs:
211402
- name: Clean up
212403
if: always()
213404
run: |
405+
echo "=== Cleaning up kind clusters ==="
406+
# List all kind clusters before cleanup
407+
echo "Current kind clusters:"
408+
kind get clusters || true
409+
410+
# Delete workload cluster
411+
echo "Deleting workload cluster: capi-quickstart"
412+
kind delete cluster --name capi-quickstart || true
413+
414+
# Delete management cluster
415+
echo "Deleting management cluster: capi-operator-smoke-test"
214416
kind delete cluster --name capi-operator-smoke-test || true
417+
418+
# Verify all clusters are deleted
419+
echo "Remaining kind clusters:"
420+
kind get clusters || true

0 commit comments

Comments
 (0)