⚠️ Split Helm chart into operator and providers charts with optional dependency #31
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Smoke Test | |
| on: | |
| pull_request: | |
| branches: | |
| - main | |
| - 'release-*' | |
| push: | |
| branches: | |
| - main | |
| workflow_dispatch: | |
| permissions: | |
| contents: read | |
| jobs: | |
| smoke-test: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Set up Go | |
| uses: actions/setup-go@v5 | |
| with: | |
| go-version-file: 'go.mod' | |
| - name: Install kubectl | |
| run: | | |
| curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" | |
| chmod +x kubectl | |
| sudo mv kubectl /usr/local/bin/ | |
| - name: Install yq | |
| run: | | |
| wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O yq | |
| chmod +x yq | |
| sudo mv yq /usr/local/bin/ | |
| - name: Install Helm | |
| run: | | |
| curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash | |
| - name: Build Docker image | |
| run: | | |
| # Build the operator image with a specific tag for smoke test | |
| CONTROLLER_IMG=cluster-api-operator TAG=smoke-test make docker-build | |
| echo "Built image: cluster-api-operator-amd64:smoke-test" | |
| # Tag the image for easier reference | |
| docker tag cluster-api-operator-amd64:smoke-test cluster-api-operator:smoke-test | |
| - name: Build charts | |
| run: | | |
| make release-chart | |
| # Extract HELM_CHART_TAG from Makefile | |
| HELM_CHART_TAG=$(make -s -f Makefile -p | grep '^HELM_CHART_TAG :=' | cut -d' ' -f3) | |
| echo "HELM_CHART_TAG=$HELM_CHART_TAG" >> $GITHUB_ENV | |
| echo "Detected HELM_CHART_TAG: $HELM_CHART_TAG" | |
| - name: Create kind cluster | |
| run: | | |
| chmod +x ./hack/ensure-kind.sh | |
| ./hack/ensure-kind.sh | |
| # Create kind cluster with Docker socket mount for CAPD | |
| cat <<EOF > /tmp/kind-config.yaml | |
| kind: Cluster | |
| apiVersion: kind.x-k8s.io/v1alpha4 | |
| networking: | |
| ipFamily: ipv4 | |
| nodes: | |
| - role: control-plane | |
| extraMounts: | |
| - hostPath: /var/run/docker.sock | |
| containerPath: /var/run/docker.sock | |
| containerdConfigPatches: | |
| - |- | |
| [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"] | |
| endpoint = ["https://mirror.gcr.io", "https://registry-1.docker.io"] | |
| EOF | |
| kind create cluster --name capi-operator-smoke-test --config /tmp/kind-config.yaml --wait 5m | |
| kubectl cluster-info --context kind-capi-operator-smoke-test | |
| - name: Load Docker image to kind | |
| run: | | |
| # Load the built image into kind cluster | |
| kind load docker-image cluster-api-operator:smoke-test --name capi-operator-smoke-test | |
| echo "Loaded image cluster-api-operator:smoke-test into kind cluster" | |
| - name: Add Helm repositories | |
| run: | | |
| helm repo add jetstack https://charts.jetstack.io | |
| helm repo update | |
| - name: Install cert-manager | |
| run: | | |
| helm install cert-manager jetstack/cert-manager \ | |
| --namespace cert-manager \ | |
| --create-namespace \ | |
| --set installCRDs=true \ | |
| --wait \ | |
| --timeout 5m | |
| - name: Install Cluster API Operator | |
| run: | | |
| # Use exact chart filename based on HELM_CHART_TAG | |
| CHART_PACKAGE="out/package/cluster-api-operator-${HELM_CHART_TAG}.tgz" | |
| echo "Using chart package: $CHART_PACKAGE" | |
| # Verify the file exists | |
| if [ ! -f "$CHART_PACKAGE" ]; then | |
| echo "Error: Chart package not found: $CHART_PACKAGE" | |
| ls -la out/package/ | |
| exit 1 | |
| fi | |
| helm install capi-operator "$CHART_PACKAGE" \ | |
| --create-namespace \ | |
| -n capi-operator-system \ | |
| --set image.manager.repository=cluster-api-operator \ | |
| --set image.manager.tag=smoke-test \ | |
| --set image.manager.pullPolicy=IfNotPresent \ | |
| --wait \ | |
| --timeout 90s | |
| - name: Wait for CAPI Operator to be ready | |
| run: | | |
| kubectl wait --for=condition=Available --timeout=300s -n capi-operator-system deployment/capi-operator-cluster-api-operator | |
| - name: Deploy providers using cluster-api-operator-providers chart | |
| run: | | |
| # Create values file for providers | |
| cat <<EOF > /tmp/providers-values.yaml | |
| core: | |
| cluster-api: | |
| namespace: capi-system | |
| bootstrap: | |
| kubeadm: | |
| namespace: capi-kubeadm-bootstrap-system | |
| controlPlane: | |
| kubeadm: | |
| namespace: capi-kubeadm-control-plane-system | |
| infrastructure: | |
| docker: | |
| namespace: capd-system | |
| manager: | |
| featureGates: | |
| core: | |
| ClusterTopology: true | |
| ClusterResourceSet: true | |
| MachinePool: true | |
| kubeadm: | |
| ClusterTopology: true | |
| MachinePool: true | |
| docker: | |
| ClusterTopology: true | |
| EOF | |
| # Use exact providers chart filename based on HELM_CHART_TAG | |
| PROVIDERS_CHART_PACKAGE="out/package/cluster-api-operator-providers-${HELM_CHART_TAG}.tgz" | |
| echo "Using providers chart package: $PROVIDERS_CHART_PACKAGE" | |
| # Verify the file exists | |
| if [ ! -f "$PROVIDERS_CHART_PACKAGE" ]; then | |
| echo "Error: Providers chart package not found: $PROVIDERS_CHART_PACKAGE" | |
| ls -la out/package/ | |
| exit 1 | |
| fi | |
| helm install capi-providers "$PROVIDERS_CHART_PACKAGE" \ | |
| -f /tmp/providers-values.yaml \ | |
| --wait | |
| - name: Wait for providers to be ready | |
| run: | | |
| echo "=== Waiting for Core Provider to be ready ===" | |
| kubectl wait --for=condition=Ready --timeout=300s -n capi-system coreprovider/cluster-api || true | |
| echo -e "\n=== Waiting for Bootstrap Provider to be ready ===" | |
| kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-bootstrap-system bootstrapprovider/kubeadm || true | |
| echo -e "\n=== Waiting for Control Plane Provider to be ready ===" | |
| kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-control-plane-system controlplaneprovider/kubeadm || true | |
| echo -e "\n=== Waiting for Infrastructure Provider to be ready ===" | |
| kubectl wait --for=condition=Ready --timeout=300s -n capd-system infrastructureprovider/docker || true | |
| # Additional wait for deployments | |
| echo -e "\n=== Waiting for provider deployments ===" | |
| kubectl wait --for=condition=Available --timeout=300s -n capi-system deployment/capi-controller-manager || true | |
| kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager || true | |
| kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager || true | |
| kubectl wait --for=condition=Available --timeout=300s -n capd-system deployment/capd-controller-manager || true | |
| # Wait for webhooks to be ready | |
| echo -e "\n=== Waiting for webhook services ===" | |
| kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-bootstrap-system service/capi-kubeadm-bootstrap-webhook-service || true | |
| kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-control-plane-system service/capi-kubeadm-control-plane-webhook-service || true | |
| - name: Verify installation | |
| run: | | |
| echo "=== Cluster API Operator Status ===" | |
| kubectl get pods -n capi-operator-system | |
| echo -e "\n=== Core Provider Status ===" | |
| kubectl get coreprovider -A -o wide | |
| kubectl describe coreprovider -n capi-system cluster-api || true | |
| echo -e "\n=== Bootstrap Provider Status ===" | |
| kubectl get bootstrapprovider -A -o wide | |
| kubectl describe bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm || true | |
| echo -e "\n=== Control Plane Provider Status ===" | |
| kubectl get controlplaneprovider -A -o wide | |
| kubectl describe controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm || true | |
| echo -e "\n=== Infrastructure Provider Status ===" | |
| kubectl get infrastructureprovider -A -o wide | |
| kubectl describe infrastructureprovider -n capd-system docker || true | |
| echo -e "\n=== All Pods ===" | |
| kubectl get pods -A | grep -E "(capi-|capd-)" | |
| echo -e "\n=== Webhook Services ===" | |
| kubectl get svc -A | grep webhook | |
| echo -e "\n=== Webhook Certificates ===" | |
| kubectl get certificate,certificaterequest -A | grep -E "(capi-|capd-)" | |
| echo -e "\n=== CRDs ===" | |
| kubectl get crds | grep -E "(cluster.x-k8s.io|operator.cluster.x-k8s.io)" | |
| - name: Check provider health | |
| run: | | |
| # Check if core provider is ready | |
| CORE_READY=$(kubectl get coreprovider -n capi-system cluster-api -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}') | |
| if [ "$CORE_READY" != "True" ]; then | |
| echo "Core provider is not ready" | |
| kubectl get coreprovider -n capi-system cluster-api -o yaml | |
| exit 1 | |
| fi | |
| # Check if bootstrap provider is ready | |
| BOOTSTRAP_READY=$(kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}') | |
| if [ "$BOOTSTRAP_READY" != "True" ]; then | |
| echo "Bootstrap provider is not ready" | |
| kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o yaml | |
| exit 1 | |
| fi | |
| # Check if control plane provider is ready | |
| CONTROLPLANE_READY=$(kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}') | |
| if [ "$CONTROLPLANE_READY" != "True" ]; then | |
| echo "Control plane provider is not ready" | |
| kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o yaml | |
| exit 1 | |
| fi | |
| # Check if infrastructure provider is ready | |
| INFRA_READY=$(kubectl get infrastructureprovider -n capd-system docker -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}') | |
| if [ "$INFRA_READY" != "True" ]; then | |
| echo "Infrastructure provider is not ready" | |
| kubectl get infrastructureprovider -n capd-system docker -o yaml | |
| exit 1 | |
| fi | |
| echo "All providers are ready!" | |
| # Additional webhook readiness check | |
| echo -e "\n=== Checking webhook endpoints ===" | |
| kubectl get endpoints -A | grep webhook | |
| - name: Install clusterctl | |
| run: | | |
| # Install clusterctl | |
| curl -L https://github.com/kubernetes-sigs/cluster-api/releases/latest/download/clusterctl-linux-amd64 -o clusterctl | |
| chmod +x clusterctl | |
| sudo mv clusterctl /usr/local/bin/ | |
| # Verify installation | |
| clusterctl version | |
| - name: Create workload cluster | |
| run: | | |
| echo "=== Generating workload cluster manifest ===" | |
| CLUSTER_NAME="capi-quickstart" | |
| echo "CLUSTER_NAME=$CLUSTER_NAME" >> $GITHUB_ENV | |
| clusterctl generate cluster $CLUSTER_NAME \ | |
| --infrastructure docker \ | |
| --flavor development \ | |
| --kubernetes-version v1.33.0 \ | |
| --control-plane-machine-count=1 \ | |
| --worker-machine-count=2 \ | |
| > capi-quickstart.yaml | |
| kubectl apply -f capi-quickstart.yaml | |
| - name: Get workload cluster kubeconfig | |
| run: | | |
| echo "=== Getting workload cluster kubeconfig ===" | |
| CLUSTER_NAMESPACE=default | |
| # Wait for kubeconfig secret to be available | |
| echo "Waiting for kubeconfig secret..." | |
| timeout 300s bash -c " | |
| while true; do | |
| if kubectl get secret ${CLUSTER_NAME}-kubeconfig -n ${CLUSTER_NAMESPACE} &>/dev/null; then | |
| echo 'Kubeconfig secret found' | |
| break | |
| fi | |
| echo -n '.' | |
| sleep 2 | |
| done | |
| " | |
| # Use clusterctl to get the proper admin kubeconfig instead of extracting from secret directly | |
| echo "=== Using clusterctl to get admin kubeconfig ===" | |
| clusterctl get kubeconfig ${CLUSTER_NAME} --namespace ${CLUSTER_NAMESPACE} > ${CLUSTER_NAME}.kubeconfig | |
| # Verify kubeconfig file exists and has content | |
| if [ ! -s "${CLUSTER_NAME}.kubeconfig" ]; then | |
| echo "ERROR: kubeconfig file is empty or does not exist" | |
| exit 1 | |
| fi | |
| echo "=== Kubeconfig content (first 10 lines) ===" | |
| head -n 10 ${CLUSTER_NAME}.kubeconfig | |
| # Export kubeconfig for subsequent steps | |
| export KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig | |
| echo "KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig" >> $GITHUB_ENV | |
| echo "=== Testing kubeconfig with admin privileges ===" | |
| # Wait for API server to be reachable | |
| for i in {1..30}; do | |
| if kubectl cluster-info &>/dev/null; then | |
| echo "Cluster API is reachable" | |
| kubectl cluster-info | |
| # Verify we have admin privileges by checking if we can list nodes | |
| if kubectl get nodes &>/dev/null; then | |
| echo "Admin privileges confirmed - can list nodes" | |
| break | |
| else | |
| echo "Waiting for admin privileges... (attempt $i/30)" | |
| fi | |
| else | |
| echo "Waiting for cluster API to be reachable... (attempt $i/30)" | |
| fi | |
| sleep 10 | |
| done | |
| # Final verification of admin access | |
| kubectl auth can-i "*" "*" --all-namespaces | |
| - name: Install CNI plugin (Calico) using Helm | |
| run: | | |
| echo "=== Installing Calico CNI plugin using Helm ===" | |
| # Ensure KUBECONFIG is set | |
| echo "Using KUBECONFIG: $KUBECONFIG" | |
| # Add Calico Helm repository | |
| helm repo add projectcalico https://docs.tigera.io/calico/charts | |
| helm repo update | |
| # Install Calico using Helm with values from CAPI Azure provider | |
| kubectl apply -f https://raw.githubusercontent.com/projectcalico/calico/v3.26.1/manifests/calico.yaml | |
| echo "=== Waiting for Calico to be ready ===" | |
| kubectl wait --for=condition=Ready --timeout=300s pods -n tigera-operator -l app.kubernetes.io/name=tigera-operator || true | |
| # Wait for calico-system namespace to be created | |
| for i in {1..30}; do | |
| if kubectl get namespace calico-system &>/dev/null; then | |
| echo "calico-system namespace exists" | |
| break | |
| else | |
| echo "Waiting for calico-system namespace... (attempt $i/30)" | |
| sleep 10 | |
| fi | |
| done | |
| kubectl wait --for=condition=Ready --timeout=300s pods -n calico-system --all || true | |
| echo "=== Calico installation complete ===" | |
| kubectl get pods -n tigera-operator | |
| kubectl get pods -n calico-system || echo "calico-system namespace may not exist yet" | |
| - name: Wait for nodes to be ready | |
| run: | | |
| echo "=== Waiting for control plane node to be ready ===" | |
| # Wait for the node to become ready after CNI installation | |
| kubectl wait --for=condition=Ready --timeout=300s nodes --all | |
| echo "=== Checking node status ===" | |
| kubectl get nodes -o wide | |
| echo "=== Waiting for control plane replicas ===" | |
| # Switch back to management cluster context for this check | |
| unset KUBECONFIG | |
| kubectl wait --for=jsonpath='{.status.readyReplicas}'=1 --timeout=300s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME} | |
| echo "=== Final cluster status ===" | |
| kubectl get cluster ${CLUSTER_NAME} -o wide | |
| kubectl get machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME} | |
| - name: Verify kubectl commands work on workload cluster | |
| run: | | |
| # Ensure we're using the workload cluster kubeconfig | |
| export KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig | |
| echo "=== Testing kubectl get po on workload cluster ===" | |
| kubectl get po -A | |
| echo -e "\n=== Testing kubectl get nodes ===" | |
| kubectl get nodes | |
| echo -e "\n=== Verifying CNI is working ===" | |
| kubectl get pods -n calico-system || echo "calico-system namespace may not exist" | |
| kubectl get pods -n tigera-operator | |
| echo -e "\n=== Waiting for system pods to be ready ===" | |
| kubectl wait --for=condition=Ready --timeout=300s pods -n kube-system -l k8s-app=kube-proxy | |
| kubectl wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-apiserver | |
| kubectl wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-controller-manager | |
| kubectl wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-scheduler | |
| - name: Verify cluster functionality | |
| run: | | |
| # Ensure we're using the workload cluster kubeconfig | |
| export KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig | |
| echo "=== Final cluster verification ===" | |
| echo "Cluster nodes:" | |
| kubectl get nodes -o wide | |
| echo -e "\nAll pods:" | |
| kubectl get po -A | |
| echo -e "\nAll services:" | |
| kubectl get svc -A | |
| echo -e "\nCluster info:" | |
| kubectl cluster-info | |
| - name: Collect debug information on failure | |
| if: failure() | |
| run: | | |
| echo "=== Events ===" | |
| kubectl get events -A --sort-by='.lastTimestamp' | tail -50 | |
| echo -e "\n=== CAPI Operator Logs ===" | |
| kubectl logs -n capi-operator-system deployment/capi-operator-cluster-api-operator --tail=100 || true | |
| echo -e "\n=== Core Provider Logs ===" | |
| kubectl logs -n capi-system deployment/capi-controller-manager --tail=100 || true | |
| echo -e "\n=== Bootstrap Provider Logs ===" | |
| kubectl logs -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager --tail=100 || true | |
| echo -e "\n=== Control Plane Provider Logs ===" | |
| kubectl logs -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager --tail=100 || true | |
| echo -e "\n=== Infrastructure Provider Logs ===" | |
| kubectl logs -n capd-system deployment/capd-controller-manager --tail=100 || true | |
| echo -e "\n=== Webhook Services and Endpoints ===" | |
| kubectl get svc,endpoints -A | grep webhook || true | |
| echo -e "\n=== Webhook Certificates ===" | |
| kubectl get certificate,certificaterequest,secret -A | grep -E "(webhook|serving-cert)" || true | |
| echo -e "\n=== Cluster Resources ===" | |
| kubectl get cluster,dockercluster,kubeadmcontrolplane,machine,dockermachine -A -o wide || true | |
| echo -e "\n=== Describe Cluster ===" | |
| kubectl describe cluster ${CLUSTER_NAME} || true | |
| echo -e "\n=== Describe Machines ===" | |
| kubectl describe machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME} || true | |
| echo -e "\n=== Docker Containers ===" | |
| docker ps -a | grep -E "(smoke-test|kind)" || true | |
| echo -e "\n=== Kind Clusters ===" | |
| kind get clusters || true | |
| echo -e "\n=== Describe Failed Pods ===" | |
| kubectl get pods -A | grep -v Running | grep -v Completed | tail -n +2 | while read namespace name ready status restarts age; do | |
| echo "Describing pod $name in namespace $namespace" | |
| kubectl describe pod -n $namespace $name | |
| echo "---" | |
| done | |
| echo -e "\n=== CNI Diagnostics ===" | |
| echo "Checking Calico installation status..." | |
| if [ -f "${CLUSTER_NAME}.kubeconfig" ]; then | |
| export KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig | |
| kubectl get pods -n tigera-operator -o wide || true | |
| kubectl get pods -n calico-system -o wide || true | |
| kubectl get pods -n calico-apiserver -o wide || true | |
| echo -e "\n=== Calico logs ===" | |
| kubectl logs -n tigera-operator -l app.kubernetes.io/name=tigera-operator --tail=50 || true | |
| else | |
| echo "Workload cluster kubeconfig not found" | |
| fi | |
| echo -e "\n=== Node CNI status ===" | |
| CONTROL_PLANE_CONTAINER=$(docker ps -a | grep ${CLUSTER_NAME}-controlplane | awk '{print $1}' | head -1) | |
| if [ ! -z "$CONTROL_PLANE_CONTAINER" ]; then | |
| echo "Control plane container: $CONTROL_PLANE_CONTAINER" | |
| echo "=== Checking CNI binaries ===" | |
| docker exec $CONTROL_PLANE_CONTAINER ls -la /opt/cni/bin/ || echo "CNI binaries directory not found" | |
| echo -e "\n=== Checking CNI configuration ===" | |
| docker exec $CONTROL_PLANE_CONTAINER ls -la /etc/cni/net.d/ || echo "CNI config directory not found" | |
| docker exec $CONTROL_PLANE_CONTAINER cat /etc/cni/net.d/* 2>/dev/null || echo "No CNI config files found" | |
| echo -e "\n=== Checking kubelet configuration ===" | |
| docker exec $CONTROL_PLANE_CONTAINER cat /var/lib/kubelet/kubeadm-flags.env || true | |
| docker exec $CONTROL_PLANE_CONTAINER ps aux | grep kubelet || true | |
| echo -e "\n=== Node status inside container ===" | |
| docker exec $CONTROL_PLANE_CONTAINER kubectl --kubeconfig=/etc/kubernetes/admin.conf get nodes -o wide || true | |
| docker exec $CONTROL_PLANE_CONTAINER kubectl --kubeconfig=/etc/kubernetes/admin.conf describe nodes || true | |
| fi | |
| echo -e "\n=== CAPD Provider Configuration ===" | |
| # Switch back to management cluster context | |
| unset KUBECONFIG | |
| kubectl get dockercluster ${CLUSTER_NAME} -o yaml || true | |
| kubectl get dockermachinetemplate -A -o yaml || true | |
| echo -e "\n=== Helm releases ===" | |
| if [ -f "${CLUSTER_NAME}.kubeconfig" ]; then | |
| export KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig | |
| helm list --all-namespaces || true | |
| else | |
| echo "Workload cluster kubeconfig not found" | |
| fi | |
| - name: Collect workload cluster debug information on failure | |
| if: failure() | |
| run: | | |
| echo "=== Workload Cluster Debug Information ===" | |
| # Check if workload cluster kubeconfig exists | |
| if [ -f "${CLUSTER_NAME}.kubeconfig" ]; then | |
| export KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig | |
| echo "=== Workload cluster status ===" | |
| kubectl cluster-info || echo "Failed to get cluster info" | |
| echo -e "\n=== All namespaces in workload cluster ===" | |
| kubectl get namespaces || echo "Failed to get namespaces" | |
| echo -e "\n=== All pods in workload cluster ===" | |
| kubectl get pods -A -o wide || echo "Failed to get pods" | |
| echo -e "\n=== Pod descriptions for non-running pods ===" | |
| kubectl get pods -A --field-selector=status.phase!=Running,status.phase!=Succeeded -o json | \ | |
| jq -r '.items[] | "\(.metadata.namespace)/\(.metadata.name)"' | \ | |
| while read pod; do | |
| echo "Describing pod: $pod" | |
| kubectl describe pod -n $(echo $pod | cut -d'/' -f1) $(echo $pod | cut -d'/' -f2) | |
| echo "---" | |
| done || echo "No non-running pods or failed to describe" | |
| echo -e "\n=== Events in workload cluster (last 100) ===" | |
| kubectl get events -A --sort-by='.lastTimestamp' | tail -100 || echo "Failed to get events" | |
| echo -e "\n=== Node conditions ===" | |
| kubectl get nodes -o json | jq -r '.items[] | .metadata.name as $name | .status.conditions[] | "\($name): \(.type)=\(.status) (\(.reason))"' || echo "Failed to get node conditions" | |
| echo -e "\n=== System pod logs ===" | |
| # kube-apiserver logs | |
| kubectl logs -n kube-system -l component=kube-apiserver --tail=50 || echo "No kube-apiserver logs" | |
| # kube-controller-manager logs | |
| kubectl logs -n kube-system -l component=kube-controller-manager --tail=50 || echo "No kube-controller-manager logs" | |
| # kube-scheduler logs | |
| kubectl logs -n kube-system -l component=kube-scheduler --tail=50 || echo "No kube-scheduler logs" | |
| else | |
| echo "Workload cluster kubeconfig not found at ${CLUSTER_NAME}.kubeconfig" | |
| fi | |
| - name: Clean up | |
| if: always() | |
| run: | | |
| echo "=== Cleaning up kind clusters ===" | |
| # List all kind clusters before cleanup | |
| echo "Current kind clusters:" | |
| kind get clusters || true | |
| # Delete workload cluster if it exists | |
| echo "Deleting workload cluster: ${CLUSTER_NAME}" | |
| kind delete cluster --name ${CLUSTER_NAME} || true | |
| # Delete management cluster | |
| echo "Deleting management cluster: capi-operator-smoke-test" | |
| kind delete cluster --name capi-operator-smoke-test || true | |
| # Verify all clusters are deleted | |
| echo "Remaining kind clusters:" | |
| kind get clusters || true |