3333 chmod +x kubectl
3434 sudo mv kubectl /usr/local/bin/
3535
36+ - name : Install yq
37+ run : |
38+ wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O yq
39+ chmod +x yq
40+ sudo mv yq /usr/local/bin/
41+
3642 - name : Install Helm
3743 run : |
3844 curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
5965 run : |
6066 chmod +x ./hack/ensure-kind.sh
6167 ./hack/ensure-kind.sh
62- kind create cluster --name capi-operator-smoke-test --wait 5m
68+
69+ # Create kind cluster with Docker socket mount for CAPD
70+ cat <<EOF > /tmp/kind-config.yaml
71+ kind: Cluster
72+ apiVersion: kind.x-k8s.io/v1alpha4
73+ nodes:
74+ - role: control-plane
75+ extraMounts:
76+ - hostPath: /var/run/docker.sock
77+ containerPath: /var/run/docker.sock
78+ EOF
79+
80+ kind create cluster --name capi-operator-smoke-test --config /tmp/kind-config.yaml --wait 5m
6381 kubectl cluster-info --context kind-capi-operator-smoke-test
6482
6583 - name : Load Docker image to kind
@@ -115,6 +133,12 @@ jobs:
115133 core:
116134 cluster-api:
117135 namespace: capi-system
136+ bootstrap:
137+ kubeadm:
138+ namespace: capi-kubeadm-bootstrap-system
139+ controlPlane:
140+ kubeadm:
141+ namespace: capi-kubeadm-control-plane-system
118142 infrastructure:
119143 docker:
120144 namespace: capd-system
@@ -133,20 +157,39 @@ jobs:
133157
134158 helm install capi-providers "$PROVIDERS_CHART_PACKAGE" \
135159 -f /tmp/providers-values.yaml \
136- --wait
160+ --wait \
161+ --timeout 3m
137162
138163 - name : Wait for providers to be ready
139164 run : |
140- echo "Waiting for Core Provider to be ready... "
165+ echo "=== Waiting for Core Provider to be ready === "
141166 kubectl wait --for=condition=Ready --timeout=300s -n capi-system coreprovider/cluster-api || true
142167
143- echo "Waiting for Infrastructure Provider to be ready..."
168+ # Additional check for CAPD provider to ensure Docker socket is accessible
169+ echo -e "\n=== Checking Docker socket access in CAPD pod ==="
170+ kubectl exec -n capd-system deployment/capd-controller-manager -- ls -la /var/run/docker.sock || echo "Docker socket not mounted"
171+
172+ echo -e "\n=== Waiting for Bootstrap Provider to be ready ==="
173+ kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-bootstrap-system bootstrapprovider/kubeadm || true
174+
175+ echo -e "\n=== Waiting for Control Plane Provider to be ready ==="
176+ kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-control-plane-system controlplaneprovider/kubeadm || true
177+
178+ echo -e "\n=== Waiting for Infrastructure Provider to be ready ==="
144179 kubectl wait --for=condition=Ready --timeout=300s -n capd-system infrastructureprovider/docker || true
145180
146181 # Additional wait for deployments
182+ echo -e "\n=== Waiting for provider deployments ==="
147183 kubectl wait --for=condition=Available --timeout=300s -n capi-system deployment/capi-controller-manager || true
184+ kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager || true
185+ kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager || true
148186 kubectl wait --for=condition=Available --timeout=300s -n capd-system deployment/capd-controller-manager || true
149187
188+ # Wait for webhooks to be ready
189+ echo -e "\n=== Waiting for webhook services ==="
190+ kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-bootstrap-system service/capi-kubeadm-bootstrap-webhook-service || true
191+ kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-control-plane-system service/capi-kubeadm-control-plane-webhook-service || true
192+
150193 - name : Verify installation
151194 run : |
152195 echo "=== Cluster API Operator Status ==="
@@ -156,13 +199,27 @@ jobs:
156199 kubectl get coreprovider -A -o wide
157200 kubectl describe coreprovider -n capi-system cluster-api || true
158201
202+ echo -e "\n=== Bootstrap Provider Status ==="
203+ kubectl get bootstrapprovider -A -o wide
204+ kubectl describe bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm || true
205+
206+ echo -e "\n=== Control Plane Provider Status ==="
207+ kubectl get controlplaneprovider -A -o wide
208+ kubectl describe controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm || true
209+
159210 echo -e "\n=== Infrastructure Provider Status ==="
160211 kubectl get infrastructureprovider -A -o wide
161212 kubectl describe infrastructureprovider -n capd-system docker || true
162213
163214 echo -e "\n=== All Pods ==="
164215 kubectl get pods -A | grep -E "(capi-|capd-)"
165216
217+ echo -e "\n=== Webhook Services ==="
218+ kubectl get svc -A | grep webhook
219+
220+ echo -e "\n=== Webhook Certificates ==="
221+ kubectl get certificate,certificaterequest -A | grep -E "(capi-|capd-)"
222+
166223 echo -e "\n=== CRDs ==="
167224 kubectl get crds | grep -E "(cluster.x-k8s.io|operator.cluster.x-k8s.io)"
168225
@@ -176,6 +233,22 @@ jobs:
176233 exit 1
177234 fi
178235
236+ # Check if bootstrap provider is ready
237+ BOOTSTRAP_READY=$(kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
238+ if [ "$BOOTSTRAP_READY" != "True" ]; then
239+ echo "Bootstrap provider is not ready"
240+ kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o yaml
241+ exit 1
242+ fi
243+
244+ # Check if control plane provider is ready
245+ CONTROLPLANE_READY=$(kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
246+ if [ "$CONTROLPLANE_READY" != "True" ]; then
247+ echo "Control plane provider is not ready"
248+ kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o yaml
249+ exit 1
250+ fi
251+
179252 # Check if infrastructure provider is ready
180253 INFRA_READY=$(kubectl get infrastructureprovider -n capd-system docker -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
181254 if [ "$INFRA_READY" != "True" ]; then
@@ -186,6 +259,120 @@ jobs:
186259
187260 echo "All providers are ready!"
188261
262+ # Additional webhook readiness check
263+ echo -e "\n=== Checking webhook endpoints ==="
264+ kubectl get endpoints -A | grep webhook
265+
266+ - name : Download cluster manifest
267+ run : |
268+ echo "=== Downloading cluster manifest ==="
269+ curl -L https://raw.githubusercontent.com/kubernetes-sigs/cluster-api/refs/heads/main/test/infrastructure/docker/examples/simple-cluster.yaml -o simple-cluster.yaml
270+
271+ # Show the manifest for debugging
272+ echo "=== Cluster manifest ==="
273+ cat simple-cluster.yaml
274+
275+ # Extract cluster name from the manifest using yq
276+ CLUSTER_NAME=$(yq eval 'select(.kind == "Cluster") | .metadata.name' simple-cluster.yaml)
277+
278+ # Ensure cluster name was extracted successfully
279+ if [ -z "$CLUSTER_NAME" ]; then
280+ echo "ERROR: Failed to extract cluster name from simple-cluster.yaml"
281+ echo "Please check the manifest structure"
282+ exit 1
283+ fi
284+
285+ echo "Detected cluster name: $CLUSTER_NAME"
286+ echo "CLUSTER_NAME=$CLUSTER_NAME" >> $GITHUB_ENV
287+
288+ - name : Create workload cluster
289+ run : |
290+ echo "=== Pre-creation diagnostics ==="
291+ echo "Checking webhook services..."
292+ kubectl get svc -A | grep webhook
293+
294+ echo -e "\nChecking webhook endpoints..."
295+ kubectl get endpoints -A | grep webhook
296+
297+ echo -e "\nChecking webhook certificates..."
298+ kubectl get secret -A | grep webhook-service-cert
299+
300+ echo -e "\n=== Creating workload cluster ==="
301+ kubectl apply -f simple-cluster.yaml
302+
303+ echo -e "\n=== Cluster resources created ==="
304+ kubectl get cluster,dockercluster,kubeadmcontrolplane,machinedeployment -A
305+
306+ - name : Wait for cluster to be ready
307+ run : |
308+ echo "=== Waiting for cluster to be provisioned ==="
309+ kubectl wait --for=condition=Ready --timeout=600s cluster/${CLUSTER_NAME}
310+
311+ echo "=== Waiting for control plane to be initialized ==="
312+ kubectl wait --for=condition=Ready --timeout=600s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
313+
314+ echo "=== Waiting for first control plane node ==="
315+ kubectl wait --for=jsonpath='{.status.readyReplicas}'=1 --timeout=600s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
316+
317+ echo "=== Cluster status ==="
318+ kubectl get cluster ${CLUSTER_NAME} -o wide
319+ kubectl get machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
320+
321+ - name : Get workload cluster kubeconfig
322+ run : |
323+ echo "=== Getting workload cluster kubeconfig ==="
324+ # Get kubeconfig from the cluster
325+ kubectl get secret ${CLUSTER_NAME}-kubeconfig -o jsonpath='{.data.value}' | base64 -d > ${CLUSTER_NAME}.kubeconfig
326+
327+ echo "=== Testing kubeconfig ==="
328+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig cluster-info || echo "Cluster may not be ready yet"
329+
330+ - name : Verify kubectl commands work on workload cluster
331+ run : |
332+ echo "=== Testing kubectl get po on workload cluster ==="
333+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -A
334+
335+ echo -e "\n=== Testing kubectl get nodes ==="
336+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get nodes
337+
338+ echo -e "\n=== Waiting for system pods to be ready ==="
339+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l k8s-app=kube-proxy
340+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-apiserver
341+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-controller-manager
342+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-scheduler
343+
344+ - name : Deploy and test sample application
345+ run : |
346+ echo "=== Deploying nginx test application ==="
347+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig create deployment nginx --image=nginx:alpine --replicas=2
348+
349+ echo "=== Waiting for deployment to be ready ==="
350+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Available --timeout=120s deployment/nginx
351+
352+ echo "=== Verifying pods are running ==="
353+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -l app=nginx
354+
355+ echo "=== Creating a service ==="
356+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig expose deployment nginx --port=80 --type=ClusterIP
357+
358+ echo "=== Verifying service ==="
359+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get svc nginx
360+
361+ - name : Verify cluster functionality
362+ run : |
363+ echo "=== Final cluster verification ==="
364+ echo "Cluster nodes:"
365+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get nodes -o wide
366+
367+ echo -e "\nAll pods:"
368+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -A
369+
370+ echo -e "\nAll services:"
371+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get svc -A
372+
373+ echo -e "\nCluster info:"
374+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig cluster-info
375+
189376 - name : Collect debug information on failure
190377 if : failure()
191378 run : |
@@ -198,9 +385,36 @@ jobs:
198385 echo -e "\n=== Core Provider Logs ==="
199386 kubectl logs -n capi-system deployment/capi-controller-manager --tail=100 || true
200387
388+ echo -e "\n=== Bootstrap Provider Logs ==="
389+ kubectl logs -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager --tail=100 || true
390+
391+ echo -e "\n=== Control Plane Provider Logs ==="
392+ kubectl logs -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager --tail=100 || true
393+
201394 echo -e "\n=== Infrastructure Provider Logs ==="
202395 kubectl logs -n capd-system deployment/capd-controller-manager --tail=100 || true
203396
397+ echo -e "\n=== Webhook Services and Endpoints ==="
398+ kubectl get svc,endpoints -A | grep webhook || true
399+
400+ echo -e "\n=== Webhook Certificates ==="
401+ kubectl get certificate,certificaterequest,secret -A | grep -E "(webhook|serving-cert)" || true
402+
403+ echo -e "\n=== Cluster Resources ==="
404+ kubectl get cluster,dockercluster,kubeadmcontrolplane,machine,dockermachine -A -o wide || true
405+
406+ echo -e "\n=== Describe Cluster ==="
407+ kubectl describe cluster ${CLUSTER_NAME} || true
408+
409+ echo -e "\n=== Describe Machines ==="
410+ kubectl describe machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME} || true
411+
412+ echo -e "\n=== Docker Containers ==="
413+ docker ps -a | grep -E "(smoke-test|kind)" || true
414+
415+ echo -e "\n=== Kind Clusters ==="
416+ kind get clusters || true
417+
204418 echo -e "\n=== Describe Failed Pods ==="
205419 kubectl get pods -A | grep -v Running | grep -v Completed | tail -n +2 | while read namespace name ready status restarts age; do
206420 echo "Describing pod $name in namespace $namespace"
@@ -211,4 +425,19 @@ jobs:
211425 - name : Clean up
212426 if : always()
213427 run : |
428+ echo "=== Cleaning up kind clusters ==="
429+ # List all kind clusters before cleanup
430+ echo "Current kind clusters:"
431+ kind get clusters || true
432+
433+ # Delete workload cluster if it exists
434+ echo "Deleting workload cluster: ${CLUSTER_NAME}"
435+ kind delete cluster --name ${CLUSTER_NAME} || true
436+
437+ # Delete management cluster
438+ echo "Deleting management cluster: capi-operator-smoke-test"
214439 kind delete cluster --name capi-operator-smoke-test || true
440+
441+ # Verify all clusters are deleted
442+ echo "Remaining kind clusters:"
443+ kind get clusters || true
0 commit comments