3333 chmod +x kubectl
3434 sudo mv kubectl /usr/local/bin/
3535
36+ - name : Install yq
37+ run : |
38+ wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O yq
39+ chmod +x yq
40+ sudo mv yq /usr/local/bin/
41+
3642 - name : Install Helm
3743 run : |
3844 curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
5965 run : |
6066 chmod +x ./hack/ensure-kind.sh
6167 ./hack/ensure-kind.sh
62- kind create cluster --name capi-operator-smoke-test --wait 5m
68+
69+ # Create kind cluster with Docker socket mount for CAPD
70+ cat <<EOF > /tmp/kind-config.yaml
71+ kind: Cluster
72+ apiVersion: kind.x-k8s.io/v1alpha4
73+ nodes:
74+ - role: control-plane
75+ extraMounts:
76+ - hostPath: /var/run/docker.sock
77+ containerPath: /var/run/docker.sock
78+ - hostPath: /var/lib/docker
79+ containerPath: /var/lib/docker
80+ EOF
81+
82+ kind create cluster --name capi-operator-smoke-test --config /tmp/kind-config.yaml --wait 5m
6383 kubectl cluster-info --context kind-capi-operator-smoke-test
6484
6585 - name : Load Docker image to kind
@@ -115,6 +135,12 @@ jobs:
115135 core:
116136 cluster-api:
117137 namespace: capi-system
138+ bootstrap:
139+ kubeadm:
140+ namespace: capi-kubeadm-bootstrap-system
141+ controlPlane:
142+ kubeadm:
143+ namespace: capi-kubeadm-control-plane-system
118144 infrastructure:
119145 docker:
120146 namespace: capd-system
@@ -133,20 +159,39 @@ jobs:
133159
134160 helm install capi-providers "$PROVIDERS_CHART_PACKAGE" \
135161 -f /tmp/providers-values.yaml \
136- --wait
162+ --wait \
163+ --timeout 3m
137164
138165 - name : Wait for providers to be ready
139166 run : |
140- echo "Waiting for Core Provider to be ready... "
167+ echo "=== Waiting for Core Provider to be ready === "
141168 kubectl wait --for=condition=Ready --timeout=300s -n capi-system coreprovider/cluster-api || true
142169
143- echo "Waiting for Infrastructure Provider to be ready..."
170+ # Additional check for CAPD provider to ensure Docker socket is accessible
171+ echo -e "\n=== Checking Docker socket access in CAPD pod ==="
172+ kubectl exec -n capd-system deployment/capd-controller-manager -- ls -la /var/run/docker.sock || echo "Docker socket not mounted"
173+
174+ echo -e "\n=== Waiting for Bootstrap Provider to be ready ==="
175+ kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-bootstrap-system bootstrapprovider/kubeadm || true
176+
177+ echo -e "\n=== Waiting for Control Plane Provider to be ready ==="
178+ kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-control-plane-system controlplaneprovider/kubeadm || true
179+
180+ echo -e "\n=== Waiting for Infrastructure Provider to be ready ==="
144181 kubectl wait --for=condition=Ready --timeout=300s -n capd-system infrastructureprovider/docker || true
145182
146183 # Additional wait for deployments
184+ echo -e "\n=== Waiting for provider deployments ==="
147185 kubectl wait --for=condition=Available --timeout=300s -n capi-system deployment/capi-controller-manager || true
186+ kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager || true
187+ kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager || true
148188 kubectl wait --for=condition=Available --timeout=300s -n capd-system deployment/capd-controller-manager || true
149189
190+ # Wait for webhooks to be ready
191+ echo -e "\n=== Waiting for webhook services ==="
192+ kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-bootstrap-system service/capi-kubeadm-bootstrap-webhook-service || true
193+ kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-control-plane-system service/capi-kubeadm-control-plane-webhook-service || true
194+
150195 - name : Verify installation
151196 run : |
152197 echo "=== Cluster API Operator Status ==="
@@ -156,13 +201,27 @@ jobs:
156201 kubectl get coreprovider -A -o wide
157202 kubectl describe coreprovider -n capi-system cluster-api || true
158203
204+ echo -e "\n=== Bootstrap Provider Status ==="
205+ kubectl get bootstrapprovider -A -o wide
206+ kubectl describe bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm || true
207+
208+ echo -e "\n=== Control Plane Provider Status ==="
209+ kubectl get controlplaneprovider -A -o wide
210+ kubectl describe controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm || true
211+
159212 echo -e "\n=== Infrastructure Provider Status ==="
160213 kubectl get infrastructureprovider -A -o wide
161214 kubectl describe infrastructureprovider -n capd-system docker || true
162215
163216 echo -e "\n=== All Pods ==="
164217 kubectl get pods -A | grep -E "(capi-|capd-)"
165218
219+ echo -e "\n=== Webhook Services ==="
220+ kubectl get svc -A | grep webhook
221+
222+ echo -e "\n=== Webhook Certificates ==="
223+ kubectl get certificate,certificaterequest -A | grep -E "(capi-|capd-)"
224+
166225 echo -e "\n=== CRDs ==="
167226 kubectl get crds | grep -E "(cluster.x-k8s.io|operator.cluster.x-k8s.io)"
168227
@@ -176,6 +235,22 @@ jobs:
176235 exit 1
177236 fi
178237
238+ # Check if bootstrap provider is ready
239+ BOOTSTRAP_READY=$(kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
240+ if [ "$BOOTSTRAP_READY" != "True" ]; then
241+ echo "Bootstrap provider is not ready"
242+ kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o yaml
243+ exit 1
244+ fi
245+
246+ # Check if control plane provider is ready
247+ CONTROLPLANE_READY=$(kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
248+ if [ "$CONTROLPLANE_READY" != "True" ]; then
249+ echo "Control plane provider is not ready"
250+ kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o yaml
251+ exit 1
252+ fi
253+
179254 # Check if infrastructure provider is ready
180255 INFRA_READY=$(kubectl get infrastructureprovider -n capd-system docker -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
181256 if [ "$INFRA_READY" != "True" ]; then
@@ -186,6 +261,120 @@ jobs:
186261
187262 echo "All providers are ready!"
188263
264+ # Additional webhook readiness check
265+ echo -e "\n=== Checking webhook endpoints ==="
266+ kubectl get endpoints -A | grep webhook
267+
268+ - name : Download cluster manifest
269+ run : |
270+ echo "=== Downloading cluster manifest ==="
271+ curl -L https://raw.githubusercontent.com/kubernetes-sigs/cluster-api/refs/heads/main/test/infrastructure/docker/examples/simple-cluster.yaml -o simple-cluster.yaml
272+
273+ # Show the manifest for debugging
274+ echo "=== Cluster manifest ==="
275+ cat simple-cluster.yaml
276+
277+ # Extract cluster name from the manifest using yq
278+ CLUSTER_NAME=$(yq eval 'select(.kind == "Cluster") | .metadata.name' simple-cluster.yaml)
279+
280+ # Ensure cluster name was extracted successfully
281+ if [ -z "$CLUSTER_NAME" ]; then
282+ echo "ERROR: Failed to extract cluster name from simple-cluster.yaml"
283+ echo "Please check the manifest structure"
284+ exit 1
285+ fi
286+
287+ echo "Detected cluster name: $CLUSTER_NAME"
288+ echo "CLUSTER_NAME=$CLUSTER_NAME" >> $GITHUB_ENV
289+
290+ - name : Create workload cluster
291+ run : |
292+ echo "=== Pre-creation diagnostics ==="
293+ echo "Checking webhook services..."
294+ kubectl get svc -A | grep webhook
295+
296+ echo -e "\nChecking webhook endpoints..."
297+ kubectl get endpoints -A | grep webhook
298+
299+ echo -e "\nChecking webhook certificates..."
300+ kubectl get secret -A | grep webhook-service-cert
301+
302+ echo -e "\n=== Creating workload cluster ==="
303+ kubectl apply -f simple-cluster.yaml
304+
305+ echo -e "\n=== Cluster resources created ==="
306+ kubectl get cluster,dockercluster,kubeadmcontrolplane,machinedeployment -A
307+
308+ - name : Wait for cluster to be ready
309+ run : |
310+ echo "=== Waiting for cluster to be provisioned ==="
311+ kubectl wait --for=condition=Ready --timeout=600s cluster/${CLUSTER_NAME}
312+
313+ echo "=== Waiting for control plane to be initialized ==="
314+ kubectl wait --for=condition=Ready --timeout=600s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
315+
316+ echo "=== Waiting for first control plane node ==="
317+ kubectl wait --for=jsonpath='{.status.readyReplicas}'=1 --timeout=600s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
318+
319+ echo "=== Cluster status ==="
320+ kubectl get cluster ${CLUSTER_NAME} -o wide
321+ kubectl get machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
322+
323+ - name : Get workload cluster kubeconfig
324+ run : |
325+ echo "=== Getting workload cluster kubeconfig ==="
326+ # Get kubeconfig from the cluster
327+ kubectl get secret ${CLUSTER_NAME}-kubeconfig -o jsonpath='{.data.value}' | base64 -d > ${CLUSTER_NAME}.kubeconfig
328+
329+ echo "=== Testing kubeconfig ==="
330+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig cluster-info || echo "Cluster may not be ready yet"
331+
332+ - name : Verify kubectl commands work on workload cluster
333+ run : |
334+ echo "=== Testing kubectl get po on workload cluster ==="
335+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -A
336+
337+ echo -e "\n=== Testing kubectl get nodes ==="
338+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get nodes
339+
340+ echo -e "\n=== Waiting for system pods to be ready ==="
341+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l k8s-app=kube-proxy
342+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-apiserver
343+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-controller-manager
344+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-scheduler
345+
346+ - name : Deploy and test sample application
347+ run : |
348+ echo "=== Deploying nginx test application ==="
349+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig create deployment nginx --image=nginx:alpine --replicas=2
350+
351+ echo "=== Waiting for deployment to be ready ==="
352+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Available --timeout=120s deployment/nginx
353+
354+ echo "=== Verifying pods are running ==="
355+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -l app=nginx
356+
357+ echo "=== Creating a service ==="
358+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig expose deployment nginx --port=80 --type=ClusterIP
359+
360+ echo "=== Verifying service ==="
361+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get svc nginx
362+
363+ - name : Verify cluster functionality
364+ run : |
365+ echo "=== Final cluster verification ==="
366+ echo "Cluster nodes:"
367+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get nodes -o wide
368+
369+ echo -e "\nAll pods:"
370+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -A
371+
372+ echo -e "\nAll services:"
373+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get svc -A
374+
375+ echo -e "\nCluster info:"
376+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig cluster-info
377+
189378 - name : Collect debug information on failure
190379 if : failure()
191380 run : |
@@ -198,9 +387,36 @@ jobs:
198387 echo -e "\n=== Core Provider Logs ==="
199388 kubectl logs -n capi-system deployment/capi-controller-manager --tail=100 || true
200389
390+ echo -e "\n=== Bootstrap Provider Logs ==="
391+ kubectl logs -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager --tail=100 || true
392+
393+ echo -e "\n=== Control Plane Provider Logs ==="
394+ kubectl logs -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager --tail=100 || true
395+
201396 echo -e "\n=== Infrastructure Provider Logs ==="
202397 kubectl logs -n capd-system deployment/capd-controller-manager --tail=100 || true
203398
399+ echo -e "\n=== Webhook Services and Endpoints ==="
400+ kubectl get svc,endpoints -A | grep webhook || true
401+
402+ echo -e "\n=== Webhook Certificates ==="
403+ kubectl get certificate,certificaterequest,secret -A | grep -E "(webhook|serving-cert)" || true
404+
405+ echo -e "\n=== Cluster Resources ==="
406+ kubectl get cluster,dockercluster,kubeadmcontrolplane,machine,dockermachine -A -o wide || true
407+
408+ echo -e "\n=== Describe Cluster ==="
409+ kubectl describe cluster ${CLUSTER_NAME} || true
410+
411+ echo -e "\n=== Describe Machines ==="
412+ kubectl describe machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME} || true
413+
414+ echo -e "\n=== Docker Containers ==="
415+ docker ps -a | grep -E "(smoke-test|kind)" || true
416+
417+ echo -e "\n=== Kind Clusters ==="
418+ kind get clusters || true
419+
204420 echo -e "\n=== Describe Failed Pods ==="
205421 kubectl get pods -A | grep -v Running | grep -v Completed | tail -n +2 | while read namespace name ready status restarts age; do
206422 echo "Describing pod $name in namespace $namespace"
@@ -211,4 +427,19 @@ jobs:
211427 - name : Clean up
212428 if : always()
213429 run : |
430+ echo "=== Cleaning up kind clusters ==="
431+ # List all kind clusters before cleanup
432+ echo "Current kind clusters:"
433+ kind get clusters || true
434+
435+ # Delete workload cluster if it exists
436+ echo "Deleting workload cluster: ${CLUSTER_NAME}"
437+ kind delete cluster --name ${CLUSTER_NAME} || true
438+
439+ # Delete management cluster
440+ echo "Deleting management cluster: capi-operator-smoke-test"
214441 kind delete cluster --name capi-operator-smoke-test || true
442+
443+ # Verify all clusters are deleted
444+ echo "Remaining kind clusters:"
445+ kind get clusters || true
0 commit comments