From 780315f94c360bf5783085bfb8c091fa3ab009fa Mon Sep 17 00:00:00 2001 From: Scotte Zinn Date: Wed, 19 Feb 2025 16:25:34 -0500 Subject: [PATCH] Futzing with the staging bringup script --- .../bootstrap/apps/resources/prepare.sh | 108 ++++++++---------- scripts/common.sh | 91 +++++++++++++++ 2 files changed, 137 insertions(+), 62 deletions(-) create mode 100755 scripts/common.sh diff --git a/kubernetes/staging/bootstrap/apps/resources/prepare.sh b/kubernetes/staging/bootstrap/apps/resources/prepare.sh index 11196a3726..c5320dde17 100755 --- a/kubernetes/staging/bootstrap/apps/resources/prepare.sh +++ b/kubernetes/staging/bootstrap/apps/resources/prepare.sh @@ -2,50 +2,31 @@ set -euo pipefail -# Set default values for the 'gum log' command -readonly LOG_ARGS=("log" "--time=rfc3339" "--formatter=text" "--structured" "--level") - -# Verify required CLI tools are installed -function check_dependencies() { - local deps=("gum" "jq" "kubectl" "kustomize" "op" "talosctl" "yq") - local missing=() - - for dep in "${deps[@]}"; do - if ! command -v "${dep}" &>/dev/null; then - missing+=("${dep}") - fi - done - - if [ ${#missing[@]} -ne 0 ]; then - if ! command -v gum &>/dev/null; then - printf "%s \033[1;95m%s\033[0m Missing required dependencies \033[0;30mdependencies=\033[0m\"%s\"\n" \ - "$(date --iso-8601=seconds)" "FATAL" "${missing[*]}" - exit 1 - fi - gum "${LOG_ARGS[@]}" fatal "Missing required dependencies" dependencies "${missing[*]}" - fi -} +# shellcheck disable=SC2155 +export ROOT_DIR="$(git rev-parse --show-toplevel)" +# shellcheck disable=SC1091 +source "${ROOT_DIR}/scripts/common.sh" # Talos requires the nodes to be 'Ready=False' before applying resources function wait_for_nodes() { - gum "${LOG_ARGS[@]}" debug "Waiting for nodes to be available" + log debug "Waiting for nodes to be available" # Skip waiting if all nodes are 'Ready=True' - if kubectl --context "${CLUSTER_CONTEXT}" wait nodes --for=condition=Ready=True --all --timeout=10s &>/dev/null; then - gum "${LOG_ARGS[@]}" info "Nodes are available and ready, skipping wait for nodes" + if kubectl_cmd wait nodes --for=condition=Ready=True --all --timeout=10s &>/dev/null; then + log info "Nodes are available and ready, skipping wait for nodes" return fi # Wait for all nodes to be 'Ready=False' - until kubectl --context "${CLUSTER_CONTEXT}" wait nodes --for=condition=Ready=False --all --timeout=10s &>/dev/null; do - gum "${LOG_ARGS[@]}" info "Nodes are not available, waiting for nodes to be available" + until kubectl_cmd wait nodes --for=condition=Ready=False --all --timeout=10s &>/dev/null; do + log info "Nodes are not available, waiting for nodes to be available" sleep 10 done } # Applications in the helmfile require Prometheus custom resources (e.g. servicemonitors) function apply_prometheus_crds() { - gum "${LOG_ARGS[@]}" debug "Applying Prometheus CRDs" + log debug "Applying Prometheus CRDs" # renovate: datasource=github-releases depName=prometheus-operator/prometheus-operator local -r version=v0.80.0 @@ -53,36 +34,36 @@ function apply_prometheus_crds() { # Fetch resources using kustomize build if ! resources=$(kustomize build "https://github.com/prometheus-operator/prometheus-operator/?ref=${version}" 2>/dev/null) || [[ -z "${resources}" ]]; then - gum "${LOG_ARGS[@]}" fatal "Failed to fetch Prometheus CRDs, check the version or the repository URL" + log fatal "Failed to fetch Prometheus CRDs, check the version or the repository URL" fi # Extract only CustomResourceDefinitions if ! crds=$(echo "${resources}" | yq '. | select(.kind == "CustomResourceDefinition")' 2>/dev/null) || [[ -z "${crds}" ]]; then - gum "${LOG_ARGS[@]}" fatal "No CustomResourceDefinitions found in the fetched resources" + log fatal "No CustomResourceDefinitions found in the fetched resources" fi # Check if the CRDs are up-to-date - if echo "${crds}" | kubectl --context "${CLUSTER_CONTEXT}" diff --filename - &>/dev/null; then - gum "${LOG_ARGS[@]}" info "Prometheus CRDs are up-to-date" + if echo "${crds}" | kubectl_cmd diff --filename - &>/dev/null; then + log info "Prometheus CRDs are up-to-date" return fi # Apply the CRDs - if echo "${crds}" | kubectl --context "${CLUSTER_CONTEXT}" apply --server-side --filename - &>/dev/null; then - gum "${LOG_ARGS[@]}" info "Prometheus CRDs applied successfully" + if echo "${crds}" | kubectl_cmd apply --server-side --filename - &>/dev/null; then + log info "Prometheus CRDs applied successfully" else - gum "${LOG_ARGS[@]}" fatal "Failed to apply Prometheus CRDs" + log fatal "Failed to apply Prometheus CRDs" fi } # The application namespaces are created before applying the resources function apply_namespaces() { - gum "${LOG_ARGS[@]}" debug "Applying namespaces" + log debug "Applying namespaces" local -r apps_dir="${KUBERNETES_DIR}/apps" if [[ ! -d "${apps_dir}" ]]; then - gum "${LOG_ARGS[@]}" fatal "Directory does not exist" directory "${apps_dir}" + log fatal "Directory does not exist" directory "${apps_dir}" fi for app in "${apps_dir}"/*/; do @@ -90,89 +71,92 @@ function apply_namespaces() { # Check if the namespace resources are up-to-date if --context "${CLUSTER_CONTEXT}" get namespace "${namespace}" &>/dev/null; then - gum "${LOG_ARGS[@]}" info "Namespace resource is up-to-date" resource "${namespace}" + log info "Namespace resource is up-to-date" resource "${namespace}" continue fi # Apply the namespace resources - if kubectl --context "${CLUSTER_CONTEXT}" create namespace "${namespace}" --dry-run=client --output=yaml \ - | kubectl --context "${CLUSTER_CONTEXT}" apply --server-side --filename - &>/dev/null; + if kubectl_cmd create namespace "${namespace}" --dry-run=client --output=yaml \ + | kubectl_cmd apply --server-side --filename - &>/dev/null; then - gum "${LOG_ARGS[@]}" info "Namespace resource applied" resource "${namespace}" + log info "Namespace resource applied" resource "${namespace}" else - gum "${LOG_ARGS[@]}" fatal "Failed to apply namespace resource" resource "${namespace}" + log fatal "Failed to apply namespace resource" resource "${namespace}" fi done } # Secrets to be applied before the helmfile charts are installed function apply_secrets() { - gum "${LOG_ARGS[@]}" debug "Applying secrets" + log debug "Applying secrets" local -r secrets_file="${KUBERNETES_DIR}/bootstrap/apps/resources/secrets.yaml.tpl" local resources if [[ ! -f "${secrets_file}" ]]; then - gum "${LOG_ARGS[@]}" fatal "File does not exist" file "${secrets_file}" + log fatal "File does not exist" file "${secrets_file}" fi # Inject secrets into the template if ! resources=$(op inject --in-file "${secrets_file}" 2>/dev/null) || [[ -z "${resources}" ]]; then - gum "${LOG_ARGS[@]}" fatal "Failed to inject secrets" file "${secrets_file}" + log fatal "Failed to inject secrets" file "${secrets_file}" fi # Check if the secret resources are up-to-date - if echo "${resources}" | kubectl --context "${CLUSTER_CONTEXT}" diff --filename - &>/dev/null; then - gum "${LOG_ARGS[@]}" info "Secret resources are up-to-date" + if echo "${resources}" | kubectl_cmd diff --filename - &>/dev/null; then + log info "Secret resources are up-to-date" return fi # Apply secret resources - if echo "${resources}" | kubectl --context "${CLUSTER_CONTEXT}" apply --server-side --filename - &>/dev/null; then - gum "${LOG_ARGS[@]}" info "Secret resources applied" + if echo "${resources}" | kubectl_cmd apply --server-side --filename - &>/dev/null; then + log info "Secret resources applied" else - gum "${LOG_ARGS[@]}" fatal "Failed to apply secret resources" + log fatal "Failed to apply secret resources" fi } # Disks in use by rook-ceph must be wiped before Rook is installed function wipe_rook_disks() { - gum "${LOG_ARGS[@]}" debug "Wiping Rook disks" + log debug "Wiping Rook disks" if [[ -z "${ROOK_DISK:-}" ]]; then - gum "${LOG_ARGS[@]}" fatal "Environment variable not set" env_var ROOK_DISK + log fatal "Environment variable not set" env_var ROOK_DISK fi # Skip disk wipe if Rook is detected running in the cluster if --context "${CLUSTER_CONTEXT}" --namespace rook-ceph get kustomization rook-ceph &>/dev/null; then - gum "${LOG_ARGS[@]}" warn "Rook is detected running in the cluster, skipping disk wipe" + log warn "Rook is detected running in the cluster, skipping disk wipe" return fi # Wipe disks on each node that match the ROOK_DISK environment variable - for node in $(talosctl --context "${CLUSTER_CONTEXT}" config info --output json | jq --raw-output '.nodes | .[]'); do + for node in $(talosctl_cmd config info --output json | jq --raw-output '.nodes | .[]'); do disk=$( - talosctl --context "${CLUSTER_CONTEXT}" --nodes "${node}" get disks --output json \ + talosctl_cmd --nodes "${node}" get disks --output json \ | jq --raw-output 'select(.spec.model == env.ROOK_DISK) | .metadata.id' \ | xargs ) if [[ -n "${disk}" ]]; then - gum "${LOG_ARGS[@]}" debug "Discovered Talos node and disk" node "${node}" disk "${disk}" + log debug "Discovered Talos node and disk" node "${node}" disk "${disk}" - if talosctl --context "${CLUSTER_CONTEXT}" --nodes "${node}" wipe disk "${disk}" &>/dev/null; then - gum "${LOG_ARGS[@]}" info "Disk wiped" node "${node}" disk "${disk}" + if talosctl_cmd --nodes "${node}" wipe disk "${disk}" &>/dev/null; then + log info "Disk wiped" node "${node}" disk "${disk}" else - gum "${LOG_ARGS[@]}" fatal "Failed to wipe disk" node "${node}" disk "${disk}" + log fatal "Failed to wipe disk" node "${node}" disk "${disk}" fi else - gum "${LOG_ARGS[@]}" warn "No disks found" node "${node}" model "${ROOK_DISK:-}" + log warn "No disks found" node "${node}" model "${ROOK_DISK:-}" fi done } function main() { - check_dependencies + # Verifications before bootstrapping the cluster + check_env CLUSTER_CONTEXT + check_cli helmfile jq kubectl kustomize minijinja-cli op talosctl yq + wait_for_nodes apply_prometheus_crds apply_namespaces diff --git a/scripts/common.sh b/scripts/common.sh new file mode 100755 index 0000000000..8c02d33a64 --- /dev/null +++ b/scripts/common.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Log messages with different levels +function log() { + local level="${1:-info}" + shift + + local -A colors=( + [info]="\033[1m\033[38;5;87m" # Cyan + [warn]="\033[1m\033[38;5;192m" # Yellow + [error]="\033[1m\033[38;5;198m" # Red + [debug]="\033[1m\033[38;5;63m" # Blue + [fatal]="\033[1m\033[38;5;92m" # Purple + ) + + if [[ ! ${colors[$level]} ]]; then + level="info" + fi + + local color="${colors[$level]}" + local msg="$1" + shift + + local data= + if [[ $# -gt 0 ]]; then + for item in "$@"; do + if [[ "${item}" == *=* ]]; then + data+="\033[1m\033[38;5;236m${item%%=*}=\033[0m\"${item#*=}\" " + else + data+="${item} " + fi + done + fi + + printf "%s %b%s%b %s %b\n" "$(date --iso-8601=seconds)" \ + "${color}" "${level^^}" "\033[0m" "${msg}" "${data}" + + if [[ "$level" == "fatal" ]]; then + exit 1 + fi +} + +# Check if required environment variables are set +function check_env() { + local envs=("${@}") + local missing=() + + for env in "${envs[@]}"; do + if [[ -z "${!env-}" ]]; then + missing+=("${env}") + fi + done + + if [ ${#missing[@]} -ne 0 ]; then + log fatal "Missing required env variables" "envs=${missing[*]}" + fi + + log debug "Env variables are set" "envs=${envs[*]}" +} + +# Check if required CLI tools are installed +function check_cli() { + local deps=("${@}") + local missing=() + + for dep in "${deps[@]}"; do + if ! command -v "${dep}" &>/dev/null; then + missing+=("${dep}") + fi + done + + if [ ${#missing[@]} -ne 0 ]; then + log fatal "Missing required deps" "deps=${missing[*]}" + fi + + log debug "Deps are installed" "deps=${deps[*]}" +} + +# Execute talosctl with the cluster context +function talosctl_cmd() { + # shellcheck disable=SC2068 + talosctl --context "${CLUSTER_CONTEXT}" $@ +} + +# Execute kubectl with the cluster context +function kubectl_cmd() { + # shellcheck disable=SC2068 + kubectl --context "${CLUSTER_CONTEXT}" $@ +}