Skip to content

⚠️ Split Helm chart into operator and providers charts with optional dependency #31

⚠️ Split Helm chart into operator and providers charts with optional dependency

⚠️ Split Helm chart into operator and providers charts with optional dependency #31

Workflow file for this run

name: Smoke Test
on:
pull_request:
branches:
- main
- 'release-*'
push:
branches:
- main
workflow_dispatch:
permissions:
contents: read
jobs:
smoke-test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'
- name: Install kubectl
run: |
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
chmod +x kubectl
sudo mv kubectl /usr/local/bin/
- name: Install yq
run: |
wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O yq
chmod +x yq
sudo mv yq /usr/local/bin/
- name: Install Helm
run: |
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
- name: Build Docker image
run: |
# Build the operator image with a specific tag for smoke test
CONTROLLER_IMG=cluster-api-operator TAG=smoke-test make docker-build
echo "Built image: cluster-api-operator-amd64:smoke-test"
# Tag the image for easier reference
docker tag cluster-api-operator-amd64:smoke-test cluster-api-operator:smoke-test
- name: Build charts
run: |
make release-chart
# Extract HELM_CHART_TAG from Makefile
HELM_CHART_TAG=$(make -s -f Makefile -p | grep '^HELM_CHART_TAG :=' | cut -d' ' -f3)
echo "HELM_CHART_TAG=$HELM_CHART_TAG" >> $GITHUB_ENV
echo "Detected HELM_CHART_TAG: $HELM_CHART_TAG"
- name: Create kind cluster
run: |
chmod +x ./hack/ensure-kind.sh
./hack/ensure-kind.sh
# Create kind cluster with Docker socket mount for CAPD
cat <<EOF > /tmp/kind-config.yaml
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
networking:
ipFamily: ipv4
nodes:
- role: control-plane
extraMounts:
- hostPath: /var/run/docker.sock
containerPath: /var/run/docker.sock
containerdConfigPatches:
- |-
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
endpoint = ["https://mirror.gcr.io", "https://registry-1.docker.io"]
EOF
kind create cluster --name capi-operator-smoke-test --config /tmp/kind-config.yaml --wait 5m
kubectl cluster-info --context kind-capi-operator-smoke-test
- name: Load Docker image to kind
run: |
# Load the built image into kind cluster
kind load docker-image cluster-api-operator:smoke-test --name capi-operator-smoke-test
echo "Loaded image cluster-api-operator:smoke-test into kind cluster"
- name: Add Helm repositories
run: |
helm repo add jetstack https://charts.jetstack.io
helm repo update
- name: Install cert-manager
run: |
helm install cert-manager jetstack/cert-manager \
--namespace cert-manager \
--create-namespace \
--set installCRDs=true \
--wait \
--timeout 5m
- name: Install Cluster API Operator
run: |
# Use exact chart filename based on HELM_CHART_TAG
CHART_PACKAGE="out/package/cluster-api-operator-${HELM_CHART_TAG}.tgz"
echo "Using chart package: $CHART_PACKAGE"
# Verify the file exists
if [ ! -f "$CHART_PACKAGE" ]; then
echo "Error: Chart package not found: $CHART_PACKAGE"
ls -la out/package/
exit 1
fi
helm install capi-operator "$CHART_PACKAGE" \
--create-namespace \
-n capi-operator-system \
--set image.manager.repository=cluster-api-operator \
--set image.manager.tag=smoke-test \
--set image.manager.pullPolicy=IfNotPresent \
--wait \
--timeout 90s
- name: Wait for CAPI Operator to be ready
run: |
kubectl wait --for=condition=Available --timeout=300s -n capi-operator-system deployment/capi-operator-cluster-api-operator
- name: Deploy providers using cluster-api-operator-providers chart
run: |
# Create values file for providers
cat <<EOF > /tmp/providers-values.yaml
core:
cluster-api:
namespace: capi-system
bootstrap:
kubeadm:
namespace: capi-kubeadm-bootstrap-system
controlPlane:
kubeadm:
namespace: capi-kubeadm-control-plane-system
infrastructure:
docker:
namespace: capd-system
manager:
featureGates:
core:
ClusterTopology: true
ClusterResourceSet: true
MachinePool: true
kubeadm:
ClusterTopology: true
MachinePool: true
docker:
ClusterTopology: true
EOF
# Use exact providers chart filename based on HELM_CHART_TAG
PROVIDERS_CHART_PACKAGE="out/package/cluster-api-operator-providers-${HELM_CHART_TAG}.tgz"
echo "Using providers chart package: $PROVIDERS_CHART_PACKAGE"
# Verify the file exists
if [ ! -f "$PROVIDERS_CHART_PACKAGE" ]; then
echo "Error: Providers chart package not found: $PROVIDERS_CHART_PACKAGE"
ls -la out/package/
exit 1
fi
helm install capi-providers "$PROVIDERS_CHART_PACKAGE" \
-f /tmp/providers-values.yaml \
--wait
- name: Wait for providers to be ready
run: |
echo "=== Waiting for Core Provider to be ready ==="
kubectl wait --for=condition=Ready --timeout=300s -n capi-system coreprovider/cluster-api || true
echo -e "\n=== Waiting for Bootstrap Provider to be ready ==="
kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-bootstrap-system bootstrapprovider/kubeadm || true
echo -e "\n=== Waiting for Control Plane Provider to be ready ==="
kubectl wait --for=condition=Ready --timeout=300s -n capi-kubeadm-control-plane-system controlplaneprovider/kubeadm || true
echo -e "\n=== Waiting for Infrastructure Provider to be ready ==="
kubectl wait --for=condition=Ready --timeout=300s -n capd-system infrastructureprovider/docker || true
# Additional wait for deployments
echo -e "\n=== Waiting for provider deployments ==="
kubectl wait --for=condition=Available --timeout=300s -n capi-system deployment/capi-controller-manager || true
kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager || true
kubectl wait --for=condition=Available --timeout=300s -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager || true
kubectl wait --for=condition=Available --timeout=300s -n capd-system deployment/capd-controller-manager || true
# Wait for webhooks to be ready
echo -e "\n=== Waiting for webhook services ==="
kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-bootstrap-system service/capi-kubeadm-bootstrap-webhook-service || true
kubectl wait --for=jsonpath='{.status.loadBalancer}' --timeout=300s -n capi-kubeadm-control-plane-system service/capi-kubeadm-control-plane-webhook-service || true
- name: Verify installation
run: |
echo "=== Cluster API Operator Status ==="
kubectl get pods -n capi-operator-system
echo -e "\n=== Core Provider Status ==="
kubectl get coreprovider -A -o wide
kubectl describe coreprovider -n capi-system cluster-api || true
echo -e "\n=== Bootstrap Provider Status ==="
kubectl get bootstrapprovider -A -o wide
kubectl describe bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm || true
echo -e "\n=== Control Plane Provider Status ==="
kubectl get controlplaneprovider -A -o wide
kubectl describe controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm || true
echo -e "\n=== Infrastructure Provider Status ==="
kubectl get infrastructureprovider -A -o wide
kubectl describe infrastructureprovider -n capd-system docker || true
echo -e "\n=== All Pods ==="
kubectl get pods -A | grep -E "(capi-|capd-)"
echo -e "\n=== Webhook Services ==="
kubectl get svc -A | grep webhook
echo -e "\n=== Webhook Certificates ==="
kubectl get certificate,certificaterequest -A | grep -E "(capi-|capd-)"
echo -e "\n=== CRDs ==="
kubectl get crds | grep -E "(cluster.x-k8s.io|operator.cluster.x-k8s.io)"
- name: Check provider health
run: |
# Check if core provider is ready
CORE_READY=$(kubectl get coreprovider -n capi-system cluster-api -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
if [ "$CORE_READY" != "True" ]; then
echo "Core provider is not ready"
kubectl get coreprovider -n capi-system cluster-api -o yaml
exit 1
fi
# Check if bootstrap provider is ready
BOOTSTRAP_READY=$(kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
if [ "$BOOTSTRAP_READY" != "True" ]; then
echo "Bootstrap provider is not ready"
kubectl get bootstrapprovider -n capi-kubeadm-bootstrap-system kubeadm -o yaml
exit 1
fi
# Check if control plane provider is ready
CONTROLPLANE_READY=$(kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
if [ "$CONTROLPLANE_READY" != "True" ]; then
echo "Control plane provider is not ready"
kubectl get controlplaneprovider -n capi-kubeadm-control-plane-system kubeadm -o yaml
exit 1
fi
# Check if infrastructure provider is ready
INFRA_READY=$(kubectl get infrastructureprovider -n capd-system docker -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
if [ "$INFRA_READY" != "True" ]; then
echo "Infrastructure provider is not ready"
kubectl get infrastructureprovider -n capd-system docker -o yaml
exit 1
fi
echo "All providers are ready!"
# Additional webhook readiness check
echo -e "\n=== Checking webhook endpoints ==="
kubectl get endpoints -A | grep webhook
- name: Install clusterctl
run: |
# Install clusterctl
curl -L https://github.com/kubernetes-sigs/cluster-api/releases/latest/download/clusterctl-linux-amd64 -o clusterctl
chmod +x clusterctl
sudo mv clusterctl /usr/local/bin/
# Verify installation
clusterctl version
- name: Create workload cluster
run: |
echo "=== Generating workload cluster manifest ==="
CLUSTER_NAME="capi-quickstart"
echo "CLUSTER_NAME=$CLUSTER_NAME" >> $GITHUB_ENV
clusterctl generate cluster $CLUSTER_NAME \
--infrastructure docker \
--flavor development \
--kubernetes-version v1.33.0 \
--control-plane-machine-count=1 \
--worker-machine-count=2 \
> capi-quickstart.yaml
kubectl apply -f capi-quickstart.yaml
- name: Get workload cluster kubeconfig
run: |
echo "=== Getting workload cluster kubeconfig ==="
CLUSTER_NAMESPACE=default
# Wait for kubeconfig secret to be available
echo "Waiting for kubeconfig secret..."
timeout 300s bash -c "
while true; do
if kubectl get secret ${CLUSTER_NAME}-kubeconfig -n ${CLUSTER_NAMESPACE} &>/dev/null; then
echo 'Kubeconfig secret found'
break
fi
echo -n '.'
sleep 2
done
"
# Use clusterctl to get the proper admin kubeconfig instead of extracting from secret directly
echo "=== Using clusterctl to get admin kubeconfig ==="
clusterctl get kubeconfig ${CLUSTER_NAME} --namespace ${CLUSTER_NAMESPACE} > ${CLUSTER_NAME}.kubeconfig
# Verify kubeconfig file exists and has content
if [ ! -s "${CLUSTER_NAME}.kubeconfig" ]; then
echo "ERROR: kubeconfig file is empty or does not exist"
exit 1
fi
echo "=== Kubeconfig content (first 10 lines) ==="
head -n 10 ${CLUSTER_NAME}.kubeconfig
# Export kubeconfig for subsequent steps
export KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig
echo "KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig" >> $GITHUB_ENV
echo "=== Testing kubeconfig with admin privileges ==="
# Wait for API server to be reachable
for i in {1..30}; do
if kubectl cluster-info &>/dev/null; then
echo "Cluster API is reachable"
kubectl cluster-info
# Verify we have admin privileges by checking if we can list nodes
if kubectl get nodes &>/dev/null; then
echo "Admin privileges confirmed - can list nodes"
break
else
echo "Waiting for admin privileges... (attempt $i/30)"
fi
else
echo "Waiting for cluster API to be reachable... (attempt $i/30)"
fi
sleep 10
done
# Final verification of admin access
kubectl auth can-i "*" "*" --all-namespaces
- name: Install CNI plugin (Calico) using Helm
run: |
echo "=== Installing Calico CNI plugin using Helm ==="
# Ensure KUBECONFIG is set
echo "Using KUBECONFIG: $KUBECONFIG"
# Add Calico Helm repository
helm repo add projectcalico https://docs.tigera.io/calico/charts
helm repo update
# Install Calico using Helm with values from CAPI Azure provider
kubectl apply -f https://raw.githubusercontent.com/projectcalico/calico/v3.26.1/manifests/calico.yaml
echo "=== Waiting for Calico to be ready ==="
kubectl wait --for=condition=Ready --timeout=300s pods -n tigera-operator -l app.kubernetes.io/name=tigera-operator || true
# Wait for calico-system namespace to be created
for i in {1..30}; do
if kubectl get namespace calico-system &>/dev/null; then
echo "calico-system namespace exists"
break
else
echo "Waiting for calico-system namespace... (attempt $i/30)"
sleep 10
fi
done
kubectl wait --for=condition=Ready --timeout=300s pods -n calico-system --all || true
echo "=== Calico installation complete ==="
kubectl get pods -n tigera-operator
kubectl get pods -n calico-system || echo "calico-system namespace may not exist yet"
- name: Wait for nodes to be ready
run: |
echo "=== Waiting for control plane node to be ready ==="
# Wait for the node to become ready after CNI installation
kubectl wait --for=condition=Ready --timeout=300s nodes --all
echo "=== Checking node status ==="
kubectl get nodes -o wide
echo "=== Waiting for control plane replicas ==="
# Switch back to management cluster context for this check
unset KUBECONFIG
kubectl wait --for=jsonpath='{.status.readyReplicas}'=1 --timeout=300s kubeadmcontrolplane -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
echo "=== Final cluster status ==="
kubectl get cluster ${CLUSTER_NAME} -o wide
kubectl get machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME}
- name: Verify kubectl commands work on workload cluster
run: |
# Ensure we're using the workload cluster kubeconfig
export KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig
echo "=== Testing kubectl get po on workload cluster ==="
kubectl get po -A
echo -e "\n=== Testing kubectl get nodes ==="
kubectl get nodes
echo -e "\n=== Verifying CNI is working ==="
kubectl get pods -n calico-system || echo "calico-system namespace may not exist"
kubectl get pods -n tigera-operator
echo -e "\n=== Waiting for system pods to be ready ==="
kubectl wait --for=condition=Ready --timeout=300s pods -n kube-system -l k8s-app=kube-proxy
kubectl wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-apiserver
kubectl wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-controller-manager
kubectl wait --for=condition=Ready --timeout=300s pods -n kube-system -l component=kube-scheduler
- name: Verify cluster functionality
run: |
# Ensure we're using the workload cluster kubeconfig
export KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig
echo "=== Final cluster verification ==="
echo "Cluster nodes:"
kubectl get nodes -o wide
echo -e "\nAll pods:"
kubectl get po -A
echo -e "\nAll services:"
kubectl get svc -A
echo -e "\nCluster info:"
kubectl cluster-info
- name: Collect debug information on failure
if: failure()
run: |
echo "=== Events ==="
kubectl get events -A --sort-by='.lastTimestamp' | tail -50
echo -e "\n=== CAPI Operator Logs ==="
kubectl logs -n capi-operator-system deployment/capi-operator-cluster-api-operator --tail=100 || true
echo -e "\n=== Core Provider Logs ==="
kubectl logs -n capi-system deployment/capi-controller-manager --tail=100 || true
echo -e "\n=== Bootstrap Provider Logs ==="
kubectl logs -n capi-kubeadm-bootstrap-system deployment/capi-kubeadm-bootstrap-controller-manager --tail=100 || true
echo -e "\n=== Control Plane Provider Logs ==="
kubectl logs -n capi-kubeadm-control-plane-system deployment/capi-kubeadm-control-plane-controller-manager --tail=100 || true
echo -e "\n=== Infrastructure Provider Logs ==="
kubectl logs -n capd-system deployment/capd-controller-manager --tail=100 || true
echo -e "\n=== Webhook Services and Endpoints ==="
kubectl get svc,endpoints -A | grep webhook || true
echo -e "\n=== Webhook Certificates ==="
kubectl get certificate,certificaterequest,secret -A | grep -E "(webhook|serving-cert)" || true
echo -e "\n=== Cluster Resources ==="
kubectl get cluster,dockercluster,kubeadmcontrolplane,machine,dockermachine -A -o wide || true
echo -e "\n=== Describe Cluster ==="
kubectl describe cluster ${CLUSTER_NAME} || true
echo -e "\n=== Describe Machines ==="
kubectl describe machines -l cluster.x-k8s.io/cluster-name=${CLUSTER_NAME} || true
echo -e "\n=== Docker Containers ==="
docker ps -a | grep -E "(smoke-test|kind)" || true
echo -e "\n=== Kind Clusters ==="
kind get clusters || true
echo -e "\n=== Describe Failed Pods ==="
kubectl get pods -A | grep -v Running | grep -v Completed | tail -n +2 | while read namespace name ready status restarts age; do
echo "Describing pod $name in namespace $namespace"
kubectl describe pod -n $namespace $name
echo "---"
done
echo -e "\n=== CNI Diagnostics ==="
echo "Checking Calico installation status..."
if [ -f "${CLUSTER_NAME}.kubeconfig" ]; then
export KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig
kubectl get pods -n tigera-operator -o wide || true
kubectl get pods -n calico-system -o wide || true
kubectl get pods -n calico-apiserver -o wide || true
echo -e "\n=== Calico logs ==="
kubectl logs -n tigera-operator -l app.kubernetes.io/name=tigera-operator --tail=50 || true
else
echo "Workload cluster kubeconfig not found"
fi
echo -e "\n=== Node CNI status ==="
CONTROL_PLANE_CONTAINER=$(docker ps -a | grep ${CLUSTER_NAME}-controlplane | awk '{print $1}' | head -1)
if [ ! -z "$CONTROL_PLANE_CONTAINER" ]; then
echo "Control plane container: $CONTROL_PLANE_CONTAINER"
echo "=== Checking CNI binaries ==="
docker exec $CONTROL_PLANE_CONTAINER ls -la /opt/cni/bin/ || echo "CNI binaries directory not found"
echo -e "\n=== Checking CNI configuration ==="
docker exec $CONTROL_PLANE_CONTAINER ls -la /etc/cni/net.d/ || echo "CNI config directory not found"
docker exec $CONTROL_PLANE_CONTAINER cat /etc/cni/net.d/* 2>/dev/null || echo "No CNI config files found"
echo -e "\n=== Checking kubelet configuration ==="
docker exec $CONTROL_PLANE_CONTAINER cat /var/lib/kubelet/kubeadm-flags.env || true
docker exec $CONTROL_PLANE_CONTAINER ps aux | grep kubelet || true
echo -e "\n=== Node status inside container ==="
docker exec $CONTROL_PLANE_CONTAINER kubectl --kubeconfig=/etc/kubernetes/admin.conf get nodes -o wide || true
docker exec $CONTROL_PLANE_CONTAINER kubectl --kubeconfig=/etc/kubernetes/admin.conf describe nodes || true
fi
echo -e "\n=== CAPD Provider Configuration ==="
# Switch back to management cluster context
unset KUBECONFIG
kubectl get dockercluster ${CLUSTER_NAME} -o yaml || true
kubectl get dockermachinetemplate -A -o yaml || true
echo -e "\n=== Helm releases ==="
if [ -f "${CLUSTER_NAME}.kubeconfig" ]; then
export KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig
helm list --all-namespaces || true
else
echo "Workload cluster kubeconfig not found"
fi
- name: Collect workload cluster debug information on failure
if: failure()
run: |
echo "=== Workload Cluster Debug Information ==="
# Check if workload cluster kubeconfig exists
if [ -f "${CLUSTER_NAME}.kubeconfig" ]; then
export KUBECONFIG=$(pwd)/${CLUSTER_NAME}.kubeconfig
echo "=== Workload cluster status ==="
kubectl cluster-info || echo "Failed to get cluster info"
echo -e "\n=== All namespaces in workload cluster ==="
kubectl get namespaces || echo "Failed to get namespaces"
echo -e "\n=== All pods in workload cluster ==="
kubectl get pods -A -o wide || echo "Failed to get pods"
echo -e "\n=== Pod descriptions for non-running pods ==="
kubectl get pods -A --field-selector=status.phase!=Running,status.phase!=Succeeded -o json | \
jq -r '.items[] | "\(.metadata.namespace)/\(.metadata.name)"' | \
while read pod; do
echo "Describing pod: $pod"
kubectl describe pod -n $(echo $pod | cut -d'/' -f1) $(echo $pod | cut -d'/' -f2)
echo "---"
done || echo "No non-running pods or failed to describe"
echo -e "\n=== Events in workload cluster (last 100) ==="
kubectl get events -A --sort-by='.lastTimestamp' | tail -100 || echo "Failed to get events"
echo -e "\n=== Node conditions ==="
kubectl get nodes -o json | jq -r '.items[] | .metadata.name as $name | .status.conditions[] | "\($name): \(.type)=\(.status) (\(.reason))"' || echo "Failed to get node conditions"
echo -e "\n=== System pod logs ==="
# kube-apiserver logs
kubectl logs -n kube-system -l component=kube-apiserver --tail=50 || echo "No kube-apiserver logs"
# kube-controller-manager logs
kubectl logs -n kube-system -l component=kube-controller-manager --tail=50 || echo "No kube-controller-manager logs"
# kube-scheduler logs
kubectl logs -n kube-system -l component=kube-scheduler --tail=50 || echo "No kube-scheduler logs"
else
echo "Workload cluster kubeconfig not found at ${CLUSTER_NAME}.kubeconfig"
fi
- name: Clean up
if: always()
run: |
echo "=== Cleaning up kind clusters ==="
# List all kind clusters before cleanup
echo "Current kind clusters:"
kind get clusters || true
# Delete workload cluster if it exists
echo "Deleting workload cluster: ${CLUSTER_NAME}"
kind delete cluster --name ${CLUSTER_NAME} || true
# Delete management cluster
echo "Deleting management cluster: capi-operator-smoke-test"
kind delete cluster --name capi-operator-smoke-test || true
# Verify all clusters are deleted
echo "Remaining kind clusters:"
kind get clusters || true