Skip to content

Commit 63bdf77

Browse files
committed
ci: Add workload cluster creation and kubectl verification to smoke test
This ensures the Cluster API Operator can successfully create and manage a functional Kubernetes cluster where kubectl commands work properly. Signed-off-by: kahirokunn <[email protected]>
1 parent c697f7f commit 63bdf77

File tree

1 file changed

+235
-4
lines changed

1 file changed

+235
-4
lines changed

.github/workflows/smoke-test.yaml

Lines changed: 235 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ jobs:
3333
chmod +x kubectl
3434
sudo mv kubectl /usr/local/bin/
3535
36+
- name: Install yq
37+
run: |
38+
wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O yq
39+
chmod +x yq
40+
sudo mv yq /usr/local/bin/
41+
3642
- name: Install Helm
3743
run: |
3844
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
@@ -59,7 +65,21 @@ jobs:
5965
run: |
6066
chmod +x ./hack/ensure-kind.sh
6167
./hack/ensure-kind.sh
62-
kind create cluster --name capi-operator-smoke-test --wait 5m
68+
69+
# Create kind cluster with Docker socket mount for CAPD
70+
cat <<EOF > /tmp/kind-config.yaml
71+
kind: Cluster
72+
apiVersion: kind.x-k8s.io/v1alpha4
73+
nodes:
74+
- role: control-plane
75+
extraMounts:
76+
- hostPath: /var/run/docker.sock
77+
containerPath: /var/run/docker.sock
78+
- hostPath: /var/lib/docker
79+
containerPath: /var/lib/docker
80+
EOF
81+
82+
kind create cluster --name capi-operator-smoke-test --config /tmp/kind-config.yaml --wait 5m
6383
kubectl cluster-info --context kind-capi-operator-smoke-test
6484
6585
- name: Load Docker image to kind
@@ -115,6 +135,12 @@ jobs:
115135
core:
116136
cluster-api:
117137
namespace: capi-system
138+
bootstrap:
139+
kubeadm:
140+
namespace: capi-kubeadm-bootstrap-system
141+
controlPlane:
142+
kubeadm:
143+
namespace: capi-kubeadm-control-plane-system
118144
infrastructure:
119145
docker:
120146
namespace: capd-system
@@ -133,20 +159,39 @@ jobs:
133159
134160
helm install capi-providers "$PROVIDERS_CHART_PACKAGE" \
135161
-f /tmp/providers-values.yaml \
136-
--wait
162+
--wait \
163+
--timeout 3m
137164
138165
- name: Wait for providers to be ready
139166
run: |
140-
echo "Waiting for Core Provider to be ready..."
167+
echo "=== Waiting for Core Provider to be ready ==="
141168
kubectl wait --for=condition=Ready --timeout=300s -n capi-system coreprovider/cluster-api || true
142169
143-
echo "Waiting for Infrastructure Provider to be ready..."
170+
# Additional check for CAPD provider to ensure Docker socket is accessible
171+
echo -e "\n=== Checking Docker socket access in CAPD pod ==="
172+
kubectl exec -n capd-system deployment/capd-controller-manager -- ls -la /var/run/docker.sock || echo "Docker socket not mounted"
173+
174+
echo -e "\n=== Waiting for Bootstrap Provider to be ready ==="
175+
kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-bootstrap-system bootstrapprovider/kubeadm || true
176+
177+
echo -e "\n=== Waiting for Control Plane Provider to be ready ==="
178+
kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-control-plane-system controlplaneprovider/kubeadm || true
179+
180+
echo -e "\n=== Waiting for Infrastructure Provider to be ready ==="
144181
kubectl wait --for=condition=Ready --timeout=300s -n capd-system infrastructureprovider/docker || true
145182
146183
# Additional wait for deployments
184+
echo -e "\n=== Waiting for provider deployments ==="
147185
kubectl wait --for=condition=Available --timeout=300s -n capi-system deployment/capi-controller-manager || true
186+
kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager || true
187+
kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager || true
148188
kubectl wait --for=condition=Available --timeout=300s -n capd-system deployment/capd-controller-manager || true
149189
190+
# Wait for webhooks to be ready
191+
echo -e "\n=== Waiting for webhook services ==="
192+
kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-bootstrap-system service/capi-kubeadm-bootstrap-webhook-service || true
193+
kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-control-plane-system service/capi-kubeadm-control-plane-webhook-service || true
194+
150195
- name: Verify installation
151196
run: |
152197
echo "=== Cluster API Operator Status ==="
@@ -156,13 +201,27 @@ jobs:
156201
kubectl get coreprovider -A -o wide
157202
kubectl describe coreprovider -n capi-system cluster-api || true
158203
204+
echo -e "\n=== Bootstrap Provider Status ==="
205+
kubectl get bootstrapprovider -A -o wide
206+
kubectl describe bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm || true
207+
208+
echo -e "\n=== Control Plane Provider Status ==="
209+
kubectl get controlplaneprovider -A -o wide
210+
kubectl describe controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm || true
211+
159212
echo -e "\n=== Infrastructure Provider Status ==="
160213
kubectl get infrastructureprovider -A -o wide
161214
kubectl describe infrastructureprovider -n capd-system docker || true
162215
163216
echo -e "\n=== All Pods ==="
164217
kubectl get pods -A | grep -E "(capi-|capd-)"
165218
219+
echo -e "\n=== Webhook Services ==="
220+
kubectl get svc -A | grep webhook
221+
222+
echo -e "\n=== Webhook Certificates ==="
223+
kubectl get certificate,certificaterequest -A | grep -E "(capi-|capd-)"
224+
166225
echo -e "\n=== CRDs ==="
167226
kubectl get crds | grep -E "(cluster.x-k8s.io|operator.cluster.x-k8s.io)"
168227
@@ -176,6 +235,22 @@ jobs:
176235
exit 1
177236
fi
178237
238+
# Check if bootstrap provider is ready
239+
BOOTSTRAP_READY=$(kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
240+
if [ "$BOOTSTRAP_READY" != "True" ]; then
241+
echo "Bootstrap provider is not ready"
242+
kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o yaml
243+
exit 1
244+
fi
245+
246+
# Check if control plane provider is ready
247+
CONTROLPLANE_READY=$(kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
248+
if [ "$CONTROLPLANE_READY" != "True" ]; then
249+
echo "Control plane provider is not ready"
250+
kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o yaml
251+
exit 1
252+
fi
253+
179254
# Check if infrastructure provider is ready
180255
INFRA_READY=$(kubectl get infrastructureprovider -n capd-system docker -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
181256
if [ "$INFRA_READY" != "True" ]; then
@@ -186,6 +261,120 @@ jobs:
186261
187262
echo "All providers are ready!"
188263
264+
# Additional webhook readiness check
265+
echo -e "\n=== Checking webhook endpoints ==="
266+
kubectl get endpoints -A | grep webhook
267+
268+
- name: Download cluster manifest
269+
run: |
270+
echo "=== Downloading cluster manifest ==="
271+
curl -L https://raw.githubusercontent.com/kubernetes-sigs/cluster-api/refs/heads/main/test/infrastructure/docker/examples/simple-cluster.yaml -o simple-cluster.yaml
272+
273+
# Show the manifest for debugging
274+
echo "=== Cluster manifest ==="
275+
cat simple-cluster.yaml
276+
277+
# Extract cluster name from the manifest using yq
278+
CLUSTER_NAME=$(yq eval 'select(.kind == "Cluster") | .metadata.name' simple-cluster.yaml)
279+
280+
# Ensure cluster name was extracted successfully
281+
if [ -z "$CLUSTER_NAME" ]; then
282+
echo "ERROR: Failed to extract cluster name from simple-cluster.yaml"
283+
echo "Please check the manifest structure"
284+
exit 1
285+
fi
286+
287+
echo "Detected cluster name: $CLUSTER_NAME"
288+
echo "CLUSTER_NAME=$CLUSTER_NAME" >> $GITHUB_ENV
289+
290+
- name: Create workload cluster
291+
run: |
292+
echo "=== Pre-creation diagnostics ==="
293+
echo "Checking webhook services..."
294+
kubectl get svc -A | grep webhook
295+
296+
echo -e "\nChecking webhook endpoints..."
297+
kubectl get endpoints -A | grep webhook
298+
299+
echo -e "\nChecking webhook certificates..."
300+
kubectl get secret -A | grep webhook-service-cert
301+
302+
echo -e "\n=== Creating workload cluster ==="
303+
kubectl apply -f simple-cluster.yaml
304+
305+
echo -e "\n=== Cluster resources created ==="
306+
kubectl get cluster,dockercluster,kubeadmcontrolplane,machinedeployment -A
307+
308+
- name: Wait for cluster to be ready
309+
run: |
310+
echo "=== Waiting for cluster to be provisioned ==="
311+
kubectl wait --for=condition=Ready --timeout=600s cluster/${CLUSTER_NAME}
312+
313+
echo "=== Waiting for control plane to be initialized ==="
314+
kubectl wait --for=condition=Ready --timeout=600s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
315+
316+
echo "=== Waiting for first control plane node ==="
317+
kubectl wait --for=jsonpath='{.status.readyReplicas}'=1 --timeout=600s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
318+
319+
echo "=== Cluster status ==="
320+
kubectl get cluster ${CLUSTER_NAME} -o wide
321+
kubectl get machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
322+
323+
- name: Get workload cluster kubeconfig
324+
run: |
325+
echo "=== Getting workload cluster kubeconfig ==="
326+
# Get kubeconfig from the cluster
327+
kubectl get secret ${CLUSTER_NAME}-kubeconfig -o jsonpath='{.data.value}' | base64 -d > ${CLUSTER_NAME}.kubeconfig
328+
329+
echo "=== Testing kubeconfig ==="
330+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig cluster-info || echo "Cluster may not be ready yet"
331+
332+
- name: Verify kubectl commands work on workload cluster
333+
run: |
334+
echo "=== Testing kubectl get po on workload cluster ==="
335+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -A
336+
337+
echo -e "\n=== Testing kubectl get nodes ==="
338+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get nodes
339+
340+
echo -e "\n=== Waiting for system pods to be ready ==="
341+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l k8s-app=kube-proxy
342+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-apiserver
343+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-controller-manager
344+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-scheduler
345+
346+
- name: Deploy and test sample application
347+
run: |
348+
echo "=== Deploying nginx test application ==="
349+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig create deployment nginx --image=nginx:alpine --replicas=2
350+
351+
echo "=== Waiting for deployment to be ready ==="
352+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Available --timeout=120s deployment/nginx
353+
354+
echo "=== Verifying pods are running ==="
355+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -l app=nginx
356+
357+
echo "=== Creating a service ==="
358+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig expose deployment nginx --port=80 --type=ClusterIP
359+
360+
echo "=== Verifying service ==="
361+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get svc nginx
362+
363+
- name: Verify cluster functionality
364+
run: |
365+
echo "=== Final cluster verification ==="
366+
echo "Cluster nodes:"
367+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get nodes -o wide
368+
369+
echo -e "\nAll pods:"
370+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -A
371+
372+
echo -e "\nAll services:"
373+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get svc -A
374+
375+
echo -e "\nCluster info:"
376+
kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig cluster-info
377+
189378
- name: Collect debug information on failure
190379
if: failure()
191380
run: |
@@ -198,9 +387,36 @@ jobs:
198387
echo -e "\n=== Core Provider Logs ==="
199388
kubectl logs -n capi-system deployment/capi-controller-manager --tail=100 || true
200389
390+
echo -e "\n=== Bootstrap Provider Logs ==="
391+
kubectl logs -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager --tail=100 || true
392+
393+
echo -e "\n=== Control Plane Provider Logs ==="
394+
kubectl logs -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager --tail=100 || true
395+
201396
echo -e "\n=== Infrastructure Provider Logs ==="
202397
kubectl logs -n capd-system deployment/capd-controller-manager --tail=100 || true
203398
399+
echo -e "\n=== Webhook Services and Endpoints ==="
400+
kubectl get svc,endpoints -A | grep webhook || true
401+
402+
echo -e "\n=== Webhook Certificates ==="
403+
kubectl get certificate,certificaterequest,secret -A | grep -E "(webhook|serving-cert)" || true
404+
405+
echo -e "\n=== Cluster Resources ==="
406+
kubectl get cluster,dockercluster,kubeadmcontrolplane,machine,dockermachine -A -o wide || true
407+
408+
echo -e "\n=== Describe Cluster ==="
409+
kubectl describe cluster ${CLUSTER_NAME} || true
410+
411+
echo -e "\n=== Describe Machines ==="
412+
kubectl describe machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME} || true
413+
414+
echo -e "\n=== Docker Containers ==="
415+
docker ps -a | grep -E "(smoke-test|kind)" || true
416+
417+
echo -e "\n=== Kind Clusters ==="
418+
kind get clusters || true
419+
204420
echo -e "\n=== Describe Failed Pods ==="
205421
kubectl get pods -A | grep -v Running | grep -v Completed | tail -n +2 | while read namespace name ready status restarts age; do
206422
echo "Describing pod $name in namespace $namespace"
@@ -211,4 +427,19 @@ jobs:
211427
- name: Clean up
212428
if: always()
213429
run: |
430+
echo "=== Cleaning up kind clusters ==="
431+
# List all kind clusters before cleanup
432+
echo "Current kind clusters:"
433+
kind get clusters || true
434+
435+
# Delete workload cluster if it exists
436+
echo "Deleting workload cluster: ${CLUSTER_NAME}"
437+
kind delete cluster --name ${CLUSTER_NAME} || true
438+
439+
# Delete management cluster
440+
echo "Deleting management cluster: capi-operator-smoke-test"
214441
kind delete cluster --name capi-operator-smoke-test || true
442+
443+
# Verify all clusters are deleted
444+
echo "Remaining kind clusters:"
445+
kind get clusters || true

0 commit comments

Comments
 (0)