From 780315f94c360bf5783085bfb8c091fa3ab009fa Mon Sep 17 00:00:00 2001
From: Scotte Zinn <scotte@zinn.ca>
Date: Wed, 19 Feb 2025 16:25:34 -0500
Subject: [PATCH] Futzing with the staging bringup script

---
 .../bootstrap/apps/resources/prepare.sh       | 108 ++++++++----------
 scripts/common.sh                             |  91 +++++++++++++++
 2 files changed, 137 insertions(+), 62 deletions(-)
 create mode 100755 scripts/common.sh

diff --git a/kubernetes/staging/bootstrap/apps/resources/prepare.sh b/kubernetes/staging/bootstrap/apps/resources/prepare.sh
index 11196a3726..c5320dde17 100755
--- a/kubernetes/staging/bootstrap/apps/resources/prepare.sh
+++ b/kubernetes/staging/bootstrap/apps/resources/prepare.sh
@@ -2,50 +2,31 @@
 
 set -euo pipefail
 
-# Set default values for the 'gum log' command
-readonly LOG_ARGS=("log" "--time=rfc3339" "--formatter=text" "--structured" "--level")
-
-# Verify required CLI tools are installed
-function check_dependencies() {
-    local deps=("gum" "jq" "kubectl" "kustomize" "op" "talosctl" "yq")
-    local missing=()
-
-    for dep in "${deps[@]}"; do
-        if ! command -v "${dep}" &>/dev/null; then
-            missing+=("${dep}")
-        fi
-    done
-
-    if [ ${#missing[@]} -ne 0 ]; then
-        if ! command -v gum &>/dev/null; then
-            printf "%s \033[1;95m%s\033[0m Missing required dependencies \033[0;30mdependencies=\033[0m\"%s\"\n" \
-                "$(date --iso-8601=seconds)" "FATAL" "${missing[*]}"
-            exit 1
-        fi
-        gum "${LOG_ARGS[@]}" fatal "Missing required dependencies" dependencies "${missing[*]}"
-    fi
-}
+# shellcheck disable=SC2155
+export ROOT_DIR="$(git rev-parse --show-toplevel)"
+# shellcheck disable=SC1091
+source "${ROOT_DIR}/scripts/common.sh"
 
 # Talos requires the nodes to be 'Ready=False' before applying resources
 function wait_for_nodes() {
-    gum "${LOG_ARGS[@]}" debug "Waiting for nodes to be available"
+    log debug "Waiting for nodes to be available"
 
     # Skip waiting if all nodes are 'Ready=True'
-    if kubectl --context "${CLUSTER_CONTEXT}" wait nodes --for=condition=Ready=True --all --timeout=10s &>/dev/null; then
-        gum "${LOG_ARGS[@]}" info "Nodes are available and ready, skipping wait for nodes"
+    if kubectl_cmd wait nodes --for=condition=Ready=True --all --timeout=10s &>/dev/null; then
+        log info "Nodes are available and ready, skipping wait for nodes"
         return
     fi
 
     # Wait for all nodes to be 'Ready=False'
-    until kubectl --context "${CLUSTER_CONTEXT}" wait nodes --for=condition=Ready=False --all --timeout=10s &>/dev/null; do
-        gum "${LOG_ARGS[@]}" info "Nodes are not available, waiting for nodes to be available"
+    until kubectl_cmd wait nodes --for=condition=Ready=False --all --timeout=10s &>/dev/null; do
+        log info "Nodes are not available, waiting for nodes to be available"
         sleep 10
     done
 }
 
 # Applications in the helmfile require Prometheus custom resources (e.g. servicemonitors)
 function apply_prometheus_crds() {
-    gum "${LOG_ARGS[@]}" debug "Applying Prometheus CRDs"
+    log debug "Applying Prometheus CRDs"
 
     # renovate: datasource=github-releases depName=prometheus-operator/prometheus-operator
     local -r version=v0.80.0
@@ -53,36 +34,36 @@ function apply_prometheus_crds() {
 
     # Fetch resources using kustomize build
     if ! resources=$(kustomize build "https://github.com/prometheus-operator/prometheus-operator/?ref=${version}" 2>/dev/null) || [[ -z "${resources}" ]]; then
-        gum "${LOG_ARGS[@]}" fatal "Failed to fetch Prometheus CRDs, check the version or the repository URL"
+        log fatal "Failed to fetch Prometheus CRDs, check the version or the repository URL"
     fi
 
     # Extract only CustomResourceDefinitions
     if ! crds=$(echo "${resources}" | yq '. | select(.kind == "CustomResourceDefinition")' 2>/dev/null) || [[ -z "${crds}" ]]; then
-        gum "${LOG_ARGS[@]}" fatal "No CustomResourceDefinitions found in the fetched resources"
+        log fatal "No CustomResourceDefinitions found in the fetched resources"
     fi
 
     # Check if the CRDs are up-to-date
-    if echo "${crds}" | kubectl --context "${CLUSTER_CONTEXT}" diff --filename - &>/dev/null; then
-        gum "${LOG_ARGS[@]}" info "Prometheus CRDs are up-to-date"
+    if echo "${crds}" | kubectl_cmd diff --filename - &>/dev/null; then
+        log info "Prometheus CRDs are up-to-date"
         return
     fi
 
     # Apply the CRDs
-    if echo "${crds}" | kubectl --context "${CLUSTER_CONTEXT}" apply --server-side --filename - &>/dev/null; then
-        gum "${LOG_ARGS[@]}" info "Prometheus CRDs applied successfully"
+    if echo "${crds}" | kubectl_cmd apply --server-side --filename - &>/dev/null; then
+        log info "Prometheus CRDs applied successfully"
     else
-        gum "${LOG_ARGS[@]}" fatal "Failed to apply Prometheus CRDs"
+        log fatal "Failed to apply Prometheus CRDs"
     fi
 }
 
 # The application namespaces are created before applying the resources
 function apply_namespaces() {
-    gum "${LOG_ARGS[@]}" debug "Applying namespaces"
+    log debug "Applying namespaces"
 
     local -r apps_dir="${KUBERNETES_DIR}/apps"
 
     if [[ ! -d "${apps_dir}" ]]; then
-        gum "${LOG_ARGS[@]}" fatal "Directory does not exist" directory "${apps_dir}"
+        log fatal "Directory does not exist" directory "${apps_dir}"
     fi
 
     for app in "${apps_dir}"/*/; do
@@ -90,89 +71,92 @@ function apply_namespaces() {
 
         # Check if the namespace resources are up-to-date
         if  --context "${CLUSTER_CONTEXT}" get namespace "${namespace}" &>/dev/null; then
-            gum "${LOG_ARGS[@]}" info "Namespace resource is up-to-date" resource "${namespace}"
+            log info "Namespace resource is up-to-date" resource "${namespace}"
             continue
         fi
 
         # Apply the namespace resources
-        if kubectl --context "${CLUSTER_CONTEXT}" create namespace "${namespace}" --dry-run=client --output=yaml \
-            | kubectl --context "${CLUSTER_CONTEXT}" apply --server-side --filename - &>/dev/null;
+        if kubectl_cmd create namespace "${namespace}" --dry-run=client --output=yaml \
+            | kubectl_cmd apply --server-side --filename - &>/dev/null;
         then
-            gum "${LOG_ARGS[@]}" info "Namespace resource applied" resource "${namespace}"
+            log info "Namespace resource applied" resource "${namespace}"
         else
-            gum "${LOG_ARGS[@]}" fatal "Failed to apply namespace resource" resource "${namespace}"
+            log fatal "Failed to apply namespace resource" resource "${namespace}"
         fi
     done
 }
 
 # Secrets to be applied before the helmfile charts are installed
 function apply_secrets() {
-    gum "${LOG_ARGS[@]}" debug "Applying secrets"
+    log debug "Applying secrets"
 
     local -r secrets_file="${KUBERNETES_DIR}/bootstrap/apps/resources/secrets.yaml.tpl"
     local resources
 
     if [[ ! -f "${secrets_file}" ]]; then
-        gum "${LOG_ARGS[@]}" fatal "File does not exist" file "${secrets_file}"
+        log fatal "File does not exist" file "${secrets_file}"
     fi
 
     # Inject secrets into the template
     if ! resources=$(op inject --in-file "${secrets_file}" 2>/dev/null) || [[ -z "${resources}" ]]; then
-        gum "${LOG_ARGS[@]}" fatal "Failed to inject secrets" file "${secrets_file}"
+        log fatal "Failed to inject secrets" file "${secrets_file}"
     fi
 
     # Check if the secret resources are up-to-date
-    if echo "${resources}" | kubectl --context "${CLUSTER_CONTEXT}" diff --filename - &>/dev/null; then
-        gum "${LOG_ARGS[@]}" info "Secret resources are up-to-date"
+    if echo "${resources}" | kubectl_cmd diff --filename - &>/dev/null; then
+        log info "Secret resources are up-to-date"
         return
     fi
 
     # Apply secret resources
-    if echo "${resources}" | kubectl --context "${CLUSTER_CONTEXT}" apply --server-side --filename - &>/dev/null; then
-        gum "${LOG_ARGS[@]}" info "Secret resources applied"
+    if echo "${resources}" | kubectl_cmd apply --server-side --filename - &>/dev/null; then
+        log info "Secret resources applied"
     else
-        gum "${LOG_ARGS[@]}" fatal "Failed to apply secret resources"
+        log fatal "Failed to apply secret resources"
     fi
 }
 
 # Disks in use by rook-ceph must be wiped before Rook is installed
 function wipe_rook_disks() {
-    gum "${LOG_ARGS[@]}" debug "Wiping Rook disks"
+    log debug "Wiping Rook disks"
 
     if [[ -z "${ROOK_DISK:-}" ]]; then
-        gum "${LOG_ARGS[@]}" fatal "Environment variable not set" env_var ROOK_DISK
+        log fatal "Environment variable not set" env_var ROOK_DISK
     fi
 
     # Skip disk wipe if Rook is detected running in the cluster
     if  --context "${CLUSTER_CONTEXT}" --namespace rook-ceph get kustomization rook-ceph &>/dev/null; then
-        gum "${LOG_ARGS[@]}" warn "Rook is detected running in the cluster, skipping disk wipe"
+        log warn "Rook is detected running in the cluster, skipping disk wipe"
         return
     fi
 
     # Wipe disks on each node that match the ROOK_DISK environment variable
-    for node in $(talosctl --context "${CLUSTER_CONTEXT}" config info --output json | jq --raw-output '.nodes | .[]'); do
+    for node in $(talosctl_cmd config info --output json | jq --raw-output '.nodes | .[]'); do
         disk=$(
-            talosctl --context "${CLUSTER_CONTEXT}" --nodes "${node}" get disks --output json \
+            talosctl_cmd --nodes "${node}" get disks --output json \
                 | jq --raw-output 'select(.spec.model == env.ROOK_DISK) | .metadata.id' \
                 | xargs
         )
 
         if [[ -n "${disk}" ]]; then
-            gum "${LOG_ARGS[@]}" debug "Discovered Talos node and disk" node "${node}" disk "${disk}"
+            log debug "Discovered Talos node and disk" node "${node}" disk "${disk}"
 
-            if talosctl --context "${CLUSTER_CONTEXT}" --nodes "${node}" wipe disk "${disk}" &>/dev/null; then
-                gum "${LOG_ARGS[@]}" info "Disk wiped" node "${node}" disk "${disk}"
+            if talosctl_cmd --nodes "${node}" wipe disk "${disk}" &>/dev/null; then
+                log info "Disk wiped" node "${node}" disk "${disk}"
             else
-                gum "${LOG_ARGS[@]}" fatal "Failed to wipe disk" node "${node}" disk "${disk}"
+                log fatal "Failed to wipe disk" node "${node}" disk "${disk}"
             fi
         else
-            gum "${LOG_ARGS[@]}" warn "No disks found" node "${node}" model "${ROOK_DISK:-}"
+            log warn "No disks found" node "${node}" model "${ROOK_DISK:-}"
         fi
     done
 }
 
 function main() {
-    check_dependencies
+    # Verifications before bootstrapping the cluster
+    check_env CLUSTER_CONTEXT
+    check_cli helmfile jq kubectl kustomize minijinja-cli op talosctl yq
+
     wait_for_nodes
     apply_prometheus_crds
     apply_namespaces
diff --git a/scripts/common.sh b/scripts/common.sh
new file mode 100755
index 0000000000..8c02d33a64
--- /dev/null
+++ b/scripts/common.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Log messages with different levels
+function log() {
+    local level="${1:-info}"
+    shift
+
+    local -A colors=(
+        [info]="\033[1m\033[38;5;87m"   # Cyan
+        [warn]="\033[1m\033[38;5;192m"  # Yellow
+        [error]="\033[1m\033[38;5;198m" # Red
+        [debug]="\033[1m\033[38;5;63m"  # Blue
+        [fatal]="\033[1m\033[38;5;92m"  # Purple
+    )
+
+    if [[ ! ${colors[$level]} ]]; then
+        level="info"
+    fi
+
+    local color="${colors[$level]}"
+    local msg="$1"
+    shift
+
+    local data=
+    if [[ $# -gt 0 ]]; then
+        for item in "$@"; do
+            if [[ "${item}" == *=* ]]; then
+                data+="\033[1m\033[38;5;236m${item%%=*}=\033[0m\"${item#*=}\" "
+            else
+                data+="${item} "
+            fi
+        done
+    fi
+
+    printf "%s %b%s%b %s %b\n" "$(date --iso-8601=seconds)" \
+        "${color}" "${level^^}" "\033[0m" "${msg}" "${data}"
+
+    if [[ "$level" == "fatal" ]]; then
+        exit 1
+    fi
+}
+
+# Check if required environment variables are set
+function check_env() {
+    local envs=("${@}")
+    local missing=()
+
+    for env in "${envs[@]}"; do
+        if [[ -z "${!env-}" ]]; then
+            missing+=("${env}")
+        fi
+    done
+
+    if [ ${#missing[@]} -ne 0 ]; then
+        log fatal "Missing required env variables" "envs=${missing[*]}"
+    fi
+
+    log debug "Env variables are set" "envs=${envs[*]}"
+}
+
+# Check if required CLI tools are installed
+function check_cli() {
+    local deps=("${@}")
+    local missing=()
+
+    for dep in "${deps[@]}"; do
+        if ! command -v "${dep}" &>/dev/null; then
+            missing+=("${dep}")
+        fi
+    done
+
+    if [ ${#missing[@]} -ne 0 ]; then
+        log fatal "Missing required deps" "deps=${missing[*]}"
+    fi
+
+    log debug "Deps are installed" "deps=${deps[*]}"
+}
+
+# Execute talosctl with the cluster context
+function talosctl_cmd() {
+    # shellcheck disable=SC2068
+    talosctl --context "${CLUSTER_CONTEXT}" $@
+}
+
+# Execute kubectl with the cluster context
+function kubectl_cmd() {
+    # shellcheck disable=SC2068
+    kubectl --context "${CLUSTER_CONTEXT}" $@
+}