3333 chmod +x kubectl
3434 sudo mv kubectl /usr/local/bin/
3535
36+ - name : Install yq
37+ run : |
38+ wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O yq
39+ chmod +x yq
40+ sudo mv yq /usr/local/bin/
41+
3642 - name : Install Helm
3743 run : |
3844 curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
5965 run : |
6066 chmod +x ./hack/ensure-kind.sh
6167 ./hack/ensure-kind.sh
62- kind create cluster --name capi-operator-smoke-test --wait 5m
68+
69+ # Create kind cluster with Docker socket mount for CAPD
70+ cat <<EOF > /tmp/kind-config.yaml
71+ kind: Cluster
72+ apiVersion: kind.x-k8s.io/v1alpha4
73+ nodes:
74+ - role: control-plane
75+ extraMounts:
76+ - hostPath: /var/run/docker.sock
77+ containerPath: /var/run/docker.sock
78+ - hostPath: /var/lib/docker
79+ containerPath: /var/lib/docker
80+ EOF
81+
82+ kind create cluster --name capi-operator-smoke-test --config /tmp/kind-config.yaml --wait 5m
6383 kubectl cluster-info --context kind-capi-operator-smoke-test
6484
6585 - name : Load Docker image to kind
@@ -115,9 +135,26 @@ jobs:
115135 core:
116136 cluster-api:
117137 namespace: capi-system
138+ bootstrap:
139+ kubeadm:
140+ namespace: capi-kubeadm-bootstrap-system
141+ controlPlane:
142+ kubeadm:
143+ namespace: capi-kubeadm-control-plane-system
118144 infrastructure:
119145 docker:
120146 namespace: capd-system
147+ manager:
148+ featureGates:
149+ core:
150+ ClusterTopology: true
151+ ClusterResourceSet: true
152+ MachinePool: true
153+ kubeadm:
154+ ClusterTopology: true
155+ MachinePool: true
156+ docker:
157+ ClusterTopology: true
121158 EOF
122159
123160 # Use exact providers chart filename based on HELM_CHART_TAG
@@ -137,16 +174,30 @@ jobs:
137174
138175 - name : Wait for providers to be ready
139176 run : |
140- echo "Waiting for Core Provider to be ready... "
177+ echo "=== Waiting for Core Provider to be ready === "
141178 kubectl wait --for=condition=Ready --timeout=300s -n capi-system coreprovider/cluster-api || true
142179
143- echo "Waiting for Infrastructure Provider to be ready..."
180+ echo -e "\n=== Waiting for Bootstrap Provider to be ready ==="
181+ kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-bootstrap-system bootstrapprovider/kubeadm || true
182+
183+ echo -e "\n=== Waiting for Control Plane Provider to be ready ==="
184+ kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-control-plane-system controlplaneprovider/kubeadm || true
185+
186+ echo -e "\n=== Waiting for Infrastructure Provider to be ready ==="
144187 kubectl wait --for=condition=Ready --timeout=300s -n capd-system infrastructureprovider/docker || true
145188
146189 # Additional wait for deployments
190+ echo -e "\n=== Waiting for provider deployments ==="
147191 kubectl wait --for=condition=Available --timeout=300s -n capi-system deployment/capi-controller-manager || true
192+ kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager || true
193+ kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager || true
148194 kubectl wait --for=condition=Available --timeout=300s -n capd-system deployment/capd-controller-manager || true
149195
196+ # Wait for webhooks to be ready
197+ echo -e "\n=== Waiting for webhook services ==="
198+ kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-bootstrap-system service/capi-kubeadm-bootstrap-webhook-service || true
199+ kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-control-plane-system service/capi-kubeadm-control-plane-webhook-service || true
200+
150201 - name : Verify installation
151202 run : |
152203 echo "=== Cluster API Operator Status ==="
@@ -156,13 +207,27 @@ jobs:
156207 kubectl get coreprovider -A -o wide
157208 kubectl describe coreprovider -n capi-system cluster-api || true
158209
210+ echo -e "\n=== Bootstrap Provider Status ==="
211+ kubectl get bootstrapprovider -A -o wide
212+ kubectl describe bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm || true
213+
214+ echo -e "\n=== Control Plane Provider Status ==="
215+ kubectl get controlplaneprovider -A -o wide
216+ kubectl describe controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm || true
217+
159218 echo -e "\n=== Infrastructure Provider Status ==="
160219 kubectl get infrastructureprovider -A -o wide
161220 kubectl describe infrastructureprovider -n capd-system docker || true
162221
163222 echo -e "\n=== All Pods ==="
164223 kubectl get pods -A | grep -E "(capi-|capd-)"
165224
225+ echo -e "\n=== Webhook Services ==="
226+ kubectl get svc -A | grep webhook
227+
228+ echo -e "\n=== Webhook Certificates ==="
229+ kubectl get certificate,certificaterequest -A | grep -E "(capi-|capd-)"
230+
166231 echo -e "\n=== CRDs ==="
167232 kubectl get crds | grep -E "(cluster.x-k8s.io|operator.cluster.x-k8s.io)"
168233
@@ -176,6 +241,22 @@ jobs:
176241 exit 1
177242 fi
178243
244+ # Check if bootstrap provider is ready
245+ BOOTSTRAP_READY=$(kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
246+ if [ "$BOOTSTRAP_READY" != "True" ]; then
247+ echo "Bootstrap provider is not ready"
248+ kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o yaml
249+ exit 1
250+ fi
251+
252+ # Check if control plane provider is ready
253+ CONTROLPLANE_READY=$(kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
254+ if [ "$CONTROLPLANE_READY" != "True" ]; then
255+ echo "Control plane provider is not ready"
256+ kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o yaml
257+ exit 1
258+ fi
259+
179260 # Check if infrastructure provider is ready
180261 INFRA_READY=$(kubectl get infrastructureprovider -n capd-system docker -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
181262 if [ "$INFRA_READY" != "True" ]; then
@@ -186,6 +267,140 @@ jobs:
186267
187268 echo "All providers are ready!"
188269
270+ # Additional webhook readiness check
271+ echo -e "\n=== Checking webhook endpoints ==="
272+ kubectl get endpoints -A | grep webhook
273+
274+ - name : Download cluster manifest
275+ run : |
276+ echo "=== Downloading cluster manifest ==="
277+ curl -L https://raw.githubusercontent.com/kubernetes-sigs/cluster-api/refs/heads/main/test/infrastructure/docker/examples/simple-cluster.yaml -o simple-cluster.yaml
278+
279+ # Show the manifest for debugging
280+ echo "=== Cluster manifest ==="
281+ cat simple-cluster.yaml
282+
283+ # Extract cluster name from the manifest using yq
284+ CLUSTER_NAME=$(yq eval 'select(.kind == "Cluster") | .metadata.name' simple-cluster.yaml)
285+
286+ # Ensure cluster name was extracted successfully
287+ if [ -z "$CLUSTER_NAME" ]; then
288+ echo "ERROR: Failed to extract cluster name from simple-cluster.yaml"
289+ echo "Please check the manifest structure"
290+ exit 1
291+ fi
292+
293+ echo "Detected cluster name: $CLUSTER_NAME"
294+ echo "CLUSTER_NAME=$CLUSTER_NAME" >> $GITHUB_ENV
295+
296+ - name : Create workload cluster
297+ run : |
298+ echo "=== Pre-creation diagnostics ==="
299+ echo "Checking webhook services..."
300+ kubectl get svc -A | grep webhook
301+
302+ echo -e "\nChecking webhook endpoints..."
303+ kubectl get endpoints -A | grep webhook
304+
305+ echo -e "\nChecking webhook certificates..."
306+ kubectl get secret -A | grep webhook-service-cert
307+
308+ echo -e "\n=== Analyzing cluster manifest for CNI configuration ==="
309+ echo "Checking for CNI-related settings in simple-cluster.yaml:"
310+ grep -i "cni\|calico\|flannel\|weave\|cilium" simple-cluster.yaml || echo "No CNI configuration found in manifest"
311+
312+ echo -e "\n=== Checking KubeadmControlPlane configuration ==="
313+ yq eval 'select(.kind == "KubeadmControlPlane") | .spec' simple-cluster.yaml || echo "Could not extract KubeadmControlPlane spec"
314+
315+ echo -e "\n=== Creating workload cluster ==="
316+ kubectl apply -f simple-cluster.yaml
317+
318+ echo -e "\n=== Cluster resources created ==="
319+ kubectl get cluster,dockercluster,kubeadmcontrolplane,machinedeployment -A
320+
321+ - name : Get workload cluster kubeconfig
322+ run : |
323+ echo "=== Getting workload cluster kubeconfig ==="
324+ # Get kubeconfig from the cluster
325+ kubectl get secret ${CLUSTER_NAME}-kubeconfig -o jsonpath='{.data.value}' | base64 -d > ${CLUSTER_NAME}.kubeconfig
326+
327+ echo "=== Testing kubeconfig ==="
328+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig cluster-info || echo "Cluster API endpoint may not be ready yet"
329+
330+ - name : Install CNI plugin (Calico) using Helm
331+ run : |
332+ echo "=== Installing Calico CNI plugin using Helm ==="
333+
334+ # Add Calico Helm repository
335+ helm repo add projectcalico https://docs.tigera.io/calico/charts --kubeconfig=${CLUSTER_NAME}.kubeconfig
336+ helm repo update --kubeconfig=${CLUSTER_NAME}.kubeconfig
337+
338+ # Install Calico using Helm with values from CAPI Azure provider
339+ helm install calico projectcalico/tigera-operator \
340+ --kubeconfig=${CLUSTER_NAME}.kubeconfig \
341+ -f https://raw.githubusercontent.com/kubernetes-sigs/cluster-api-provider-azure/main/templates/addons/calico/values.yaml \
342+ --namespace tigera-operator \
343+ --create-namespace \
344+ --wait \
345+ --timeout 5m
346+
347+ echo "=== Waiting for Calico to be ready ==="
348+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n tigera-operator -l app.kubernetes.io/name=tigera-operator
349+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n calico-system --all
350+
351+ echo "=== Calico installation complete ==="
352+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get pods -n tigera-operator
353+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get pods -n calico-system
354+
355+ - name : Wait for nodes to be ready
356+ run : |
357+ echo "=== Waiting for control plane node to be ready ==="
358+ # Wait for the node to become ready after CNI installation
359+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s nodes --all
360+
361+ echo "=== Checking node status ==="
362+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get nodes -o wide
363+
364+ echo "=== Waiting for control plane replicas ==="
365+ kubectl wait --for=jsonpath='{.status.readyReplicas}'=1 --timeout=300s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
366+
367+ echo "=== Final cluster status ==="
368+ kubectl get cluster ${CLUSTER_NAME} -o wide
369+ kubectl get machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
370+
371+ - name : Verify kubectl commands work on workload cluster
372+ run : |
373+ echo "=== Testing kubectl get po on workload cluster ==="
374+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -A
375+
376+ echo -e "\n=== Testing kubectl get nodes ==="
377+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get nodes
378+
379+ echo -e "\n=== Verifying CNI is working ==="
380+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get pods -n calico-system
381+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get pods -n tigera-operator
382+
383+ echo -e "\n=== Waiting for system pods to be ready ==="
384+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l k8s-app=kube-proxy
385+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-apiserver
386+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-controller-manager
387+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-scheduler
388+
389+ - name : Verify cluster functionality
390+ run : |
391+ echo "=== Final cluster verification ==="
392+ echo "Cluster nodes:"
393+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get nodes -o wide
394+
395+ echo -e "\nAll pods:"
396+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get po -A
397+
398+ echo -e "\nAll services:"
399+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get svc -A
400+
401+ echo -e "\nCluster info:"
402+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig cluster-info
403+
189404 - name : Collect debug information on failure
190405 if : failure()
191406 run : |
@@ -198,17 +413,96 @@ jobs:
198413 echo -e "\n=== Core Provider Logs ==="
199414 kubectl logs -n capi-system deployment/capi-controller-manager --tail=100 || true
200415
416+ echo -e "\n=== Bootstrap Provider Logs ==="
417+ kubectl logs -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager --tail=100 || true
418+
419+ echo -e "\n=== Control Plane Provider Logs ==="
420+ kubectl logs -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager --tail=100 || true
421+
201422 echo -e "\n=== Infrastructure Provider Logs ==="
202423 kubectl logs -n capd-system deployment/capd-controller-manager --tail=100 || true
203424
425+ echo -e "\n=== Webhook Services and Endpoints ==="
426+ kubectl get svc,endpoints -A | grep webhook || true
427+
428+ echo -e "\n=== Webhook Certificates ==="
429+ kubectl get certificate,certificaterequest,secret -A | grep -E "(webhook|serving-cert)" || true
430+
431+ echo -e "\n=== Cluster Resources ==="
432+ kubectl get cluster,dockercluster,kubeadmcontrolplane,machine,dockermachine -A -o wide || true
433+
434+ echo -e "\n=== Describe Cluster ==="
435+ kubectl describe cluster ${CLUSTER_NAME} || true
436+
437+ echo -e "\n=== Describe Machines ==="
438+ kubectl describe machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME} || true
439+
440+ echo -e "\n=== Docker Containers ==="
441+ docker ps -a | grep -E "(smoke-test|kind)" || true
442+
443+ echo -e "\n=== Kind Clusters ==="
444+ kind get clusters || true
445+
204446 echo -e "\n=== Describe Failed Pods ==="
205447 kubectl get pods -A | grep -v Running | grep -v Completed | tail -n +2 | while read namespace name ready status restarts age; do
206448 echo "Describing pod $name in namespace $namespace"
207449 kubectl describe pod -n $namespace $name
208450 echo "---"
209451 done
210452
453+ echo -e "\n=== CNI Diagnostics ==="
454+ echo "Checking Calico installation status..."
455+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get pods -n tigera-operator -o wide || true
456+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get pods -n calico-system -o wide || true
457+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig get pods -n calico-apiserver -o wide || true
458+
459+ echo -e "\n=== Calico logs ==="
460+ kubectl --kubeconfig=${CLUSTER_NAME}.kubeconfig logs -n tigera-operator -l app.kubernetes.io/name=tigera-operator --tail=50 || true
461+
462+ echo -e "\n=== Node CNI status ==="
463+ CONTROL_PLANE_CONTAINER=$(docker ps -a | grep ${CLUSTER_NAME}-controlplane | awk '{print $1}' | head -1)
464+ if [ ! -z "$CONTROL_PLANE_CONTAINER" ]; then
465+ echo "Control plane container: $CONTROL_PLANE_CONTAINER"
466+
467+ echo "=== Checking CNI binaries ==="
468+ docker exec $CONTROL_PLANE_CONTAINER ls -la /opt/cni/bin/ || echo "CNI binaries directory not found"
469+
470+ echo -e "\n=== Checking CNI configuration ==="
471+ docker exec $CONTROL_PLANE_CONTAINER ls -la /etc/cni/net.d/ || echo "CNI config directory not found"
472+ docker exec $CONTROL_PLANE_CONTAINER cat /etc/cni/net.d/* 2>/dev/null || echo "No CNI config files found"
473+
474+ echo -e "\n=== Checking kubelet configuration ==="
475+ docker exec $CONTROL_PLANE_CONTAINER cat /var/lib/kubelet/kubeadm-flags.env || true
476+ docker exec $CONTROL_PLANE_CONTAINER ps aux | grep kubelet || true
477+
478+ echo -e "\n=== Node status inside container ==="
479+ docker exec $CONTROL_PLANE_CONTAINER kubectl --kubeconfig=/etc/kubernetes/admin.conf get nodes -o wide || true
480+ docker exec $CONTROL_PLANE_CONTAINER kubectl --kubeconfig=/etc/kubernetes/admin.conf describe nodes || true
481+ fi
482+
483+ echo -e "\n=== CAPD Provider Configuration ==="
484+ kubectl get dockercluster ${CLUSTER_NAME} -o yaml || true
485+ kubectl get dockermachinetemplate -A -o yaml || true
486+
487+ echo -e "\n=== Helm releases ==="
488+ helm list --all-namespaces --kubeconfig=${CLUSTER_NAME}.kubeconfig || true
489+
211490 - name : Clean up
212491 if : always()
213492 run : |
493+ echo "=== Cleaning up kind clusters ==="
494+ # List all kind clusters before cleanup
495+ echo "Current kind clusters:"
496+ kind get clusters || true
497+
498+ # Delete workload cluster if it exists
499+ echo "Deleting workload cluster: ${CLUSTER_NAME}"
500+ kind delete cluster --name ${CLUSTER_NAME} || true
501+
502+ # Delete management cluster
503+ echo "Deleting management cluster: capi-operator-smoke-test"
214504 kind delete cluster --name capi-operator-smoke-test || true
505+
506+ # Verify all clusters are deleted
507+ echo "Remaining kind clusters:"
508+ kind get clusters || true
0 commit comments