Skip to content

Commit 99063eb

Browse files
committed
ci: Add workload cluster creation and kubectl verification to smoke test
This ensures the Cluster API Operator can successfully create and manage a functional Kubernetes cluster where kubectl commands work properly. Signed-off-by: kahirokunn <[email protected]>
1 parent c697f7f commit 99063eb

File tree

1 file changed

+233
-4
lines changed

1 file changed

+233
-4
lines changed

.github/workflows/smoke-test.yaml

Lines changed: 233 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ jobs:
3333
chmod +x kubectl
3434
sudo mv kubectl /usr/local/bin/
3535
36+
- name: Install yq
37+
run: |
38+
wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O yq
39+
chmod +x yq
40+
sudo mv yq /usr/local/bin/
41+
3642
- name: Install Helm
3743
run: |
3844
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
@@ -59,7 +65,19 @@ jobs:
5965
run: |
6066
chmod +x ./hack/ensure-kind.sh
6167
./hack/ensure-kind.sh
62-
kind create cluster --name capi-operator-smoke-test --wait 5m
68+
69+
# Create kind cluster with Docker socket mount for CAPD
70+
cat <<EOF > /tmp/kind-config.yaml
71+
kind: Cluster
72+
apiVersion: kind.x-k8s.io/v1alpha4
73+
nodes:
74+
- role: control-plane
75+
extraMounts:
76+
- hostPath: /var/run/docker.sock
77+
containerPath: /var/run/docker.sock
78+
EOF
79+
80+
kind create cluster --name capi-operator-smoke-test --config /tmp/kind-config.yaml --wait 5m
6381
kubectl cluster-info --context kind-capi-operator-smoke-test
6482
6583
- name: Load Docker image to kind
@@ -115,6 +133,12 @@ jobs:
115133
core:
116134
cluster-api:
117135
namespace: capi-system
136+
bootstrap:
137+
kubeadm:
138+
namespace: capi-kubeadm-bootstrap-system
139+
controlPlane:
140+
kubeadm:
141+
namespace: capi-kubeadm-control-plane-system
118142
infrastructure:
119143
docker:
120144
namespace: capd-system
@@ -133,20 +157,39 @@ jobs:
133157
134158
helm install capi-providers "$PROVIDERS_CHART_PACKAGE" \
135159
-f /tmp/providers-values.yaml \
136-
--wait
160+
--wait \
161+
--timeout 3m
137162
138163
- name: Wait for providers to be ready
139164
run: |
140-
echo "Waiting for Core Provider to be ready..."
165+
echo "=== Waiting for Core Provider to be ready ==="
141166
kubectl wait --for=condition=Ready --timeout=300s -n capi-system coreprovider/cluster-api || true
142167
143-
echo "Waiting for Infrastructure Provider to be ready..."
168+
# Additional check for CAPD provider to ensure Docker socket is accessible
169+
echo -e "\n=== Checking Docker socket access in CAPD pod ==="
170+
kubectl exec -n capd-system deployment/capd-controller-manager -- ls -la /var/run/docker.sock || echo "Docker socket not mounted"
171+
172+
echo -e "\n=== Waiting for Bootstrap Provider to be ready ==="
173+
kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-bootstrap-system bootstrapprovider/kubeadm || true
174+
175+
echo -e "\n=== Waiting for Control Plane Provider to be ready ==="
176+
kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-control-plane-system controlplaneprovider/kubeadm || true
177+
178+
echo -e "\n=== Waiting for Infrastructure Provider to be ready ==="
144179
kubectl wait --for=condition=Ready --timeout=300s -n capd-system infrastructureprovider/docker || true
145180
146181
# Additional wait for deployments
182+
echo -e "\n=== Waiting for provider deployments ==="
147183
kubectl wait --for=condition=Available --timeout=300s -n capi-system deployment/capi-controller-manager || true
184+
kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager || true
185+
kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager || true
148186
kubectl wait --for=condition=Available --timeout=300s -n capd-system deployment/capd-controller-manager || true
149187
188+
# Wait for webhooks to be ready
189+
echo -e "\n=== Waiting for webhook services ==="
190+
kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-bootstrap-system service/capi-kubeadm-bootstrap-webhook-service || true
191+
kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-control-plane-system service/capi-kubeadm-control-plane-webhook-service || true
192+
150193
- name: Verify installation
151194
run: |
152195
echo "=== Cluster API Operator Status ==="
@@ -156,13 +199,27 @@ jobs:
156199
kubectl get coreprovider -A -o wide
157200
kubectl describe coreprovider -n capi-system cluster-api || true
158201
202+
echo -e "\n=== Bootstrap Provider Status ==="
203+
kubectl get bootstrapprovider -A -o wide
204+
kubectl describe bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm || true
205+
206+
echo -e "\n=== Control Plane Provider Status ==="
207+
kubectl get controlplaneprovider -A -o wide
208+
kubectl describe controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm || true
209+
159210
echo -e "\n=== Infrastructure Provider Status ==="
160211
kubectl get infrastructureprovider -A -o wide
161212
kubectl describe infrastructureprovider -n capd-system docker || true
162213
163214
echo -e "\n=== All Pods ==="
164215
kubectl get pods -A | grep -E "(capi-|capd-)"
165216
217+
echo -e "\n=== Webhook Services ==="
218+
kubectl get svc -A | grep webhook
219+
220+
echo -e "\n=== Webhook Certificates ==="
221+
kubectl get certificate,certificaterequest -A | grep -E "(capi-|capd-)"
222+
166223
echo -e "\n=== CRDs ==="
167224
kubectl get crds | grep -E "(cluster.x-k8s.io|operator.cluster.x-k8s.io)"
168225
@@ -176,6 +233,22 @@ jobs:
176233
exit 1
177234
fi
178235
236+
# Check if bootstrap provider is ready
237+
BOOTSTRAP_READY=$(kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
238+
if [ "$BOOTSTRAP_READY" != "True" ]; then
239+
echo "Bootstrap provider is not ready"
240+
kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o yaml
241+
exit 1
242+
fi
243+
244+
# Check if control plane provider is ready
245+
CONTROLPLANE_READY=$(kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
246+
if [ "$CONTROLPLANE_READY" != "True" ]; then
247+
echo "Control plane provider is not ready"
248+
kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o yaml
249+
exit 1
250+
fi
251+
179252
# Check if infrastructure provider is ready
180253
INFRA_READY=$(kubectl get infrastructureprovider -n capd-system docker -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
181254
if [ "$INFRA_READY" != "True" ]; then
@@ -186,6 +259,120 @@ jobs:
186259
187260
echo "All providers are ready!"
188261
262+
# Additional webhook readiness check
263+
echo -e "\n=== Checking webhook endpoints ==="
264+
kubectl get endpoints -A | grep webhook
265+
266+
- name: Download cluster manifest
267+
run: |
268+
echo "=== Downloading cluster manifest ==="
269+
curl -L https://raw.githubusercontent.com/kubernetes-sigs/cluster-api/refs/heads/main/test/infrastructure/docker/examples/simple-cluster.yaml -o simple-cluster.yaml
270+
271+
# Show the manifest for debugging
272+
echo "=== Cluster manifest ==="
273+
cat simple-cluster.yaml
274+
275+
# Extract cluster name from the manifest using yq
276+
CLUSTER_NAME=$(yq eval 'select(.kind == "Cluster") | .metadata.name' simple-cluster.yaml)
277+
278+
# Ensure cluster name was extracted successfully
279+
if [ -z "$CLUSTER_NAME" ]; then
280+
echo "ERROR: Failed to extract cluster name from simple-cluster.yaml"
281+
echo "Please check the manifest structure"
282+
exit 1
283+
fi
284+
285+
echo "Detected cluster name: $CLUSTER_NAME"
286+
echo "CLUSTER_NAME=$CLUSTER_NAME" >> $GITHUB_ENV
287+
288+
- name: Create workload cluster
289+
run: |
290+
echo "=== Pre-creation diagnostics ==="
291+
echo "Checking webhook services..."
292+
kubectl get svc -A | grep webhook
293+
294+
echo -e "\nChecking webhook endpoints..."
295+
kubectl get endpoints -A | grep webhook
296+
297+
echo -e "\nChecking webhook certificates..."
298+
kubectl get secret -A | grep webhook-service-cert
299+
300+
echo -e "\n=== Creating workload cluster ==="
301+
kubectl apply -f simple-cluster.yaml
302+
303+
echo -e "\n=== Cluster resources created ==="
304+
kubectl get cluster,dockercluster,kubeadmcontrolplane,machinedeployment -A
305+
306+
- name: Wait for cluster to be ready
307+
run: |
308+
echo "=== Waiting for cluster to be provisioned ==="
309+
kubectl wait --for=condition=Ready --timeout=600s cluster/${CLUSTER_NAME}
310+
311+
echo "=== Waiting for control plane to be initialized ==="
312+
kubectl wait --for=condition=Ready --timeout=600s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
313+
314+
echo "=== Waiting for first control plane node ==="
315+
kubectl wait --for=jsonpath='{.status.readyReplicas}'=1 --timeout=600s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
316+
317+
echo "=== Cluster status ==="
318+
kubectl get cluster ${CLUSTER_NAME} -o wide
319+
kubectl get machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
320+
321+
- name: Get workload cluster kubeconfig
322+
run: |
323+
echo "=== Getting workload cluster kubeconfig ==="
324+
# Get kubeconfig from the cluster
325+
kubectl get secret ${CLUSTER_NAME}-kubeconfig -o jsonpath='{.data.value}' | base64 -d > ${CLUSTER_NAME}.kubeconfig
326+
327+
echo "=== Testing kubeconfig ==="
328+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig cluster-info || echo "Cluster may not be ready yet"
329+
330+
- name: Verify kubectl commands work on workload cluster
331+
run: |
332+
echo "=== Testing kubectl get po on workload cluster ==="
333+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -A
334+
335+
echo -e "\n=== Testing kubectl get nodes ==="
336+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get nodes
337+
338+
echo -e "\n=== Waiting for system pods to be ready ==="
339+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l k8s-app=kube-proxy
340+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-apiserver
341+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-controller-manager
342+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-scheduler
343+
344+
- name: Deploy and test sample application
345+
run: |
346+
echo "=== Deploying nginx test application ==="
347+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig create deployment nginx --image=nginx:alpine --replicas=2
348+
349+
echo "=== Waiting for deployment to be ready ==="
350+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Available --timeout=120s deployment/nginx
351+
352+
echo "=== Verifying pods are running ==="
353+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -l app=nginx
354+
355+
echo "=== Creating a service ==="
356+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig expose deployment nginx --port=80 --type=ClusterIP
357+
358+
echo "=== Verifying service ==="
359+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get svc nginx
360+
361+
- name: Verify cluster functionality
362+
run: |
363+
echo "=== Final cluster verification ==="
364+
echo "Cluster nodes:"
365+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get nodes -o wide
366+
367+
echo -e "\nAll pods:"
368+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -A
369+
370+
echo -e "\nAll services:"
371+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get svc -A
372+
373+
echo -e "\nCluster info:"
374+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig cluster-info
375+
189376
- name: Collect debug information on failure
190377
if: failure()
191378
run: |
@@ -198,9 +385,36 @@ jobs:
198385
echo -e "\n=== Core Provider Logs ==="
199386
kubectl logs -n capi-system deployment/capi-controller-manager --tail=100 || true
200387
388+
echo -e "\n=== Bootstrap Provider Logs ==="
389+
kubectl logs -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager --tail=100 || true
390+
391+
echo -e "\n=== Control Plane Provider Logs ==="
392+
kubectl logs -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager --tail=100 || true
393+
201394
echo -e "\n=== Infrastructure Provider Logs ==="
202395
kubectl logs -n capd-system deployment/capd-controller-manager --tail=100 || true
203396
397+
echo -e "\n=== Webhook Services and Endpoints ==="
398+
kubectl get svc,endpoints -A | grep webhook || true
399+
400+
echo -e "\n=== Webhook Certificates ==="
401+
kubectl get certificate,certificaterequest,secret -A | grep -E "(webhook|serving-cert)" || true
402+
403+
echo -e "\n=== Cluster Resources ==="
404+
kubectl get cluster,dockercluster,kubeadmcontrolplane,machine,dockermachine -A -o wide || true
405+
406+
echo -e "\n=== Describe Cluster ==="
407+
kubectl describe cluster ${CLUSTER_NAME} || true
408+
409+
echo -e "\n=== Describe Machines ==="
410+
kubectl describe machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME} || true
411+
412+
echo -e "\n=== Docker Containers ==="
413+
docker ps -a | grep -E "(smoke-test|kind)" || true
414+
415+
echo -e "\n=== Kind Clusters ==="
416+
kind get clusters || true
417+
204418
echo -e "\n=== Describe Failed Pods ==="
205419
kubectl get pods -A | grep -v Running | grep -v Completed | tail -n +2 | while read namespace name ready status restarts age; do
206420
echo "Describing pod $name in namespace $namespace"
@@ -211,4 +425,19 @@ jobs:
211425
- name: Clean up
212426
if: always()
213427
run: |
428+
echo "=== Cleaning up kind clusters ==="
429+
# List all kind clusters before cleanup
430+
echo "Current kind clusters:"
431+
kind get clusters || true
432+
433+
# Delete workload cluster if it exists
434+
echo "Deleting workload cluster: ${CLUSTER_NAME}"
435+
kind delete cluster --name ${CLUSTER_NAME} || true
436+
437+
# Delete management cluster
438+
echo "Deleting management cluster: capi-operator-smoke-test"
214439
kind delete cluster --name capi-operator-smoke-test || true
440+
441+
# Verify all clusters are deleted
442+
echo "Remaining kind clusters:"
443+
kind get clusters || true

0 commit comments

Comments
 (0)