diff --git a/defaults.yml b/defaults.yml index 3620b3bd..3b5d99f6 100644 --- a/defaults.yml +++ b/defaults.yml @@ -22,7 +22,7 @@ gcp_region: europe-north1 gcp_zone: b gcp_type: n1-standard-4 gcp_disks: "pd-standard:50" -gke_version: "1.27.2-gke.2100" +gke_version: "1.28.3" #gcp_disks: "pd-standard:20 pd-ssd:30" #gcp_project: "px-deploy" diff --git a/docs/cloud/vsphere/README.md b/docs/cloud/vsphere/README.md index ad267317..d0328352 100644 --- a/docs/cloud/vsphere/README.md +++ b/docs/cloud/vsphere/README.md @@ -112,6 +112,17 @@ Resolution: ensure your vsphere_user has StorageProfile.View privilege (non-propagating) on the root vCenter object +error on destroy: + +``` + +Error: Invalid datastore path '/vmfs/volumes/...' + +``` + +Resolution: + +re-run the destroy command ## How does px-deploy vsphere templating work diff --git a/gcp.go b/gcp.go index b52ff335..455eecb3 100644 --- a/gcp.go +++ b/gcp.go @@ -278,7 +278,7 @@ func gcp_create_variables(config *Config) []string { tags := strings.Split(config.Tags, ",") for _, val := range tags { entry := strings.Split(val, "=") - tf_var_tags = append(tf_var_tags, " "+strings.TrimSpace(entry[0])+" = \""+strings.TrimSpace(entry[1])+"\"") + tf_var_tags = append(tf_var_tags, " "+strings.ToLower(strings.TrimSpace(entry[0]))+" = \""+strings.TrimSpace(entry[1])+"\"") } } // get PXDUSER env and apply to tf_variables diff --git a/px-deploy.go b/px-deploy.go index d1548425..691d46aa 100644 --- a/px-deploy.go +++ b/px-deploy.go @@ -106,6 +106,7 @@ type Config struct { Azure__Group string `yaml:"azure__group,omitempty"` Vsphere__Userdata string `yaml:"vsphere__userdata,omitempty"` Ssh_Pub_Key string + Run_Predelete bool } type Config_Cluster struct { @@ -119,6 +120,11 @@ type Deployment_Status_Return struct { status string } +type Predelete_Status_Return struct { + node string + success bool +} + var Reset = "\033[0m" var White = "\033[97m" var Red = "\033[31m" @@ -132,7 +138,7 @@ var wg sync.WaitGroup func main() { var createName, createTemplate, createRegion, createEnv, connectName, kubeconfigName, destroyName, statusName, historyNumber string - var destroyAll, destroyClear bool + var destroyAll, destroyClear, destroyForce bool var flags Config os.Chdir("/px-deploy/.px-deploy") rootCmd := &cobra.Command{Use: "px-deploy"} @@ -150,6 +156,10 @@ func main() { } config := parse_yaml("defaults.yml") + // should be there by default + // we dont put in into defaults.yml as it defines a path within container + config.Gcp_Auth_Json = "/px-deploy/.px-deploy/gcp.json" + if config.Aws_Tags != "" { fmt.Printf("Parameter 'aws_tags: %s' is deprecated and will be ignored. Please change to 'tags: %s' in ~/.px-deploy/defaults.yml \n", config.Aws_Tags, config.Aws_Tags) } @@ -198,7 +208,7 @@ func main() { return nil } config := parse_yaml(file) - destroy_deployment(config.Name) + destroy_deployment(config.Name, destroyForce) return nil }) } else { @@ -208,7 +218,7 @@ func main() { if destroyClear { destroy_clear(destroyName) } else { - destroy_deployment(destroyName) + destroy_deployment(destroyName, destroyForce) } } }, @@ -403,11 +413,13 @@ func main() { cmdCreate.Flags().StringVarP(&createRegion, "region", "r", "", "AWS, GCP or Azure region (default "+defaults.Aws_Region+", "+defaults.Gcp_Region+" or "+defaults.Azure_Region+")") cmdCreate.Flags().StringVarP(&flags.Cloud, "cloud", "C", "", "aws | gcp | azure | vsphere (default "+defaults.Cloud+")") cmdCreate.Flags().StringVarP(&flags.Ssh_Pub_Key, "ssh_pub_key", "", "", "ssh public key which will be added for root access on each node") + cmdCreate.Flags().BoolVarP(&flags.Run_Predelete, "predelete", "", false, "run predelete scripts on destruction (true/false)") cmdCreate.Flags().StringVarP(&createEnv, "env", "e", "", "Comma-separated list of environment variables to be passed, for example foo=bar,abc=123") cmdCreate.Flags().BoolVarP(&flags.DryRun, "dry_run", "d", false, "dry-run, create local files only. Works only on aws / azure") cmdDestroy.Flags().BoolVarP(&destroyAll, "all", "a", false, "destroy all deployments") cmdDestroy.Flags().BoolVarP(&destroyClear, "clear", "c", false, "destroy local deployment files (use with caution!)") + cmdDestroy.Flags().BoolVarP(&destroyForce, "force", "f", false, "destroy even if predelete script exec fails") cmdDestroy.Flags().StringVarP(&destroyName, "name", "n", "", "name of deployment to be destroyed") cmdConnect.Flags().StringVarP(&connectName, "name", "n", "", "name of deployment to connect to") @@ -483,8 +495,6 @@ func validate_config(config *Config) []string { if _, err := os.Stat("/px-deploy/.px-deploy/gcp.json"); os.IsNotExist(err) { errormsg = append(errormsg, "~/.px-deploy/gcp.json not found. refer to readme.md how to create it") - } else { - config.Gcp_Auth_Json = "/px-deploy/.px-deploy/gcp.json" } if !regexp.MustCompile(`^[a-zA-Z0-9_\-]+$`).MatchString(config.Gcp_Region) { @@ -1007,7 +1017,7 @@ func destroy_clear(name string) { } } -func destroy_deployment(name string) { +func destroy_deployment(name string, destroyForce bool) { os.Chdir("/px-deploy/.px-deploy") config := parse_yaml("deployments/" + name + ".yml") var output []byte @@ -1046,35 +1056,12 @@ func destroy_deployment(name string) { switch config.Platform { case "ocp4": { - - clusters, _ := strconv.Atoi(config.Clusters) - fmt.Println("Running pre-delete scripts on all master nodes. Output is mixed") - for i := 1; i <= clusters; i++ { - wg.Add(1) - go run_predelete(&config, fmt.Sprintf("master-%v-1", i), "script") - } - wg.Wait() - fmt.Println("pre-delete scripts done") - - fmt.Println(White + "Destroying OCP4 cluster(s), wait about 5 minutes (per cluster)... Output is mixed" + Reset) - for i := 1; i <= clusters; i++ { - wg.Add(1) - go run_predelete(&config, fmt.Sprintf("master-%v-1", i), "platform") - } - wg.Wait() - fmt.Println("OCP4 cluster delete done") + prepare_predelete(&config, "script", destroyForce) + prepare_predelete(&config, "platform", destroyForce) } case "eks": { - clusters, _ := strconv.Atoi(config.Clusters) - - fmt.Println("Running pre-delete scripts on all master nodes. Output will be mixed") - for i := 1; i <= clusters; i++ { - wg.Add(1) - go run_predelete(&config, fmt.Sprintf("master-%v-1", i), "script") - } - wg.Wait() - fmt.Println("pre-delete scripts done") + prepare_predelete(&config, "script", destroyForce) err := aws_delete_nodegroups(&config) if err != nil { @@ -1087,14 +1074,8 @@ func destroy_deployment(name string) { // if there are no px clouddrive volumes // terraform will terminate instances // otherwise terminate instances to enable volume deletion - clusters, _ := strconv.Atoi(config.Clusters) - fmt.Println("Running pre-delete scripts on all master nodes. Output will be mixed") - for i := 1; i <= clusters; i++ { - wg.Add(1) - go run_predelete(&config, fmt.Sprintf("master-%v-1", i), "script") - } - wg.Wait() - fmt.Println("pre-delete scripts done") + + prepare_predelete(&config, "script", destroyForce) if len(aws_volumes) > 0 { fmt.Printf("Waiting for termination of %v instances: (timeout 5min) \n", len(aws_instances)) @@ -1148,15 +1129,7 @@ func destroy_deployment(name string) { die("Error: outdated deployment") } - clusters, _ := strconv.Atoi(config.Clusters) - - fmt.Println("Running pre-delete scripts on all master nodes. Output will be mixed") - for i := 1; i <= clusters; i++ { - wg.Add(1) - go run_predelete(&config, fmt.Sprintf("%v-master-%v-1", config.Name, i), "script") - } - wg.Wait() - fmt.Println("pre-delete scripts done") + prepare_predelete(&config, "script", destroyForce) instances, err := gcp_get_instances(config.Name, &config) if err != nil { @@ -1232,15 +1205,7 @@ func destroy_deployment(name string) { } } else if config.Cloud == "azure" { - clusters, _ := strconv.Atoi(config.Clusters) - - fmt.Println("Running pre-delete scripts on all master nodes. Output will be mixed") - for i := 1; i <= clusters; i++ { - wg.Add(1) - go run_predelete(&config, fmt.Sprintf("master-%v-1", i), "script") - } - wg.Wait() - fmt.Println("pre-delete scripts done") + prepare_predelete(&config, "script", destroyForce) tf_error := run_terraform_destroy(&config) if tf_error != "" { @@ -1253,14 +1218,7 @@ func destroy_deployment(name string) { die("Error: outdated deployment") } - clusters, _ := strconv.Atoi(config.Clusters) - fmt.Println("Running pre-delete scripts on all master nodes. Output will be mixed") - for i := 1; i <= clusters; i++ { - wg.Add(1) - go run_predelete(&config, fmt.Sprintf("%s-master-%v", config.Name, i), "script") - } - wg.Wait() - fmt.Println("pre-delete scripts done") + prepare_predelete(&config, "script", destroyForce) vsphere_prepare_destroy(&config) @@ -1286,8 +1244,15 @@ func destroy_deployment(name string) { fmt.Println(White + "Destroyed." + Reset) } func run_terraform_destroy(config *Config) string { + var cmd *exec.Cmd fmt.Println(White + "running Terraform PLAN" + Reset) - cmd := exec.Command("terraform", "-chdir=/px-deploy/.px-deploy/tf-deployments/"+config.Name, "plan", "-destroy", "-input=false", "-refresh=false", "-parallelism=50", "-out=tfplan", "-var-file", ".tfvars") + // vsphere terraform must refresh, otherwise complains about missing disks + // other clouds do no refresh as this saves time @scale + if config.Cloud == "vsphere" { + cmd = exec.Command("terraform", "-chdir=/px-deploy/.px-deploy/tf-deployments/"+config.Name, "plan", "-destroy", "-input=false", "-refresh=true", "-parallelism=50", "-out=tfplan", "-var-file", ".tfvars") + } else { + cmd = exec.Command("terraform", "-chdir=/px-deploy/.px-deploy/tf-deployments/"+config.Name, "plan", "-destroy", "-input=false", "-refresh=false", "-parallelism=50", "-out=tfplan", "-var-file", ".tfvars") + } cmd.Stderr = os.Stderr err := cmd.Run() if err != nil { @@ -1373,7 +1338,78 @@ func get_ip(deployment string) string { return strings.TrimSuffix(string(output), "\n") } -func run_predelete(config *Config, confNode string, confPath string) { +func prepare_predelete(config *Config, runtype string, destroyForce bool) { + // master node naming scheme on clouds: + //aws: master-[clusternum]-1 + //azure: master-[clusternum]-1 + //gcp: [configname]-master-[clusternum]-1 + //vsphere: [configname]-master-[clusternum] + + var name_pre, name_post string + + // script predelete only executes if set in config + if config.Run_Predelete != true && runtype == "script" { + return + } + + clusters, _ := strconv.Atoi(config.Clusters) + predelete_status := make(chan Predelete_Status_Return, clusters) + + switch config.Cloud { + case "aws": + { + name_pre = "master-" + name_post = "-1" + } + case "azure": + { + name_pre = "master-" + name_post = "-1" + } + case "gcp": + { + name_pre = fmt.Sprintf("%v-master-", config.Name) + name_post = "-1" + } + case "vsphere": + { + name_pre = fmt.Sprintf("%v-master-", config.Name) + name_post = "" + } + } + + if config.Platform == "ocp4" && runtype == "platform" { + fmt.Printf("Destroying OCP4 cluster(s), wait about 15 minutes (per cluster)... Output is mixed\n") + } else { + fmt.Printf("Running pre-delete scripts on all master nodes. Output is mixed\n") + } + + wg.Add(clusters) + for i := 1; i <= clusters; i++ { + go exec_predelete(config, fmt.Sprintf("%v%v%v", name_pre, i, name_post), runtype, predelete_status) + } + wg.Wait() + close(predelete_status) + + for elem := range predelete_status { + if elem.success == false { + if destroyForce { + fmt.Printf("%v %v failed %v predelete. --force parmeter set. Continuing delete%v\n", Red, elem.node, runtype, Reset) + } else { + fmt.Printf("%v %v failed %v predelete. canceled deletion process.\nensure %v is powered on and can be accessed by ssh, then retry\n(to enforce deletion use --force parameter)%v\n", Red, elem.node, runtype, elem.node, Reset) + os.Exit(1) + } + } + } + + if config.Platform == "ocp4" && runtype == "platform" { + fmt.Printf("OCP4 cluster delete done\n") + } else { + fmt.Printf("pre-delete %v done\n", runtype) + } +} + +func exec_predelete(config *Config, confNode string, confPath string, success chan Predelete_Status_Return) { var ip string defer wg.Done() @@ -1411,6 +1447,9 @@ func run_predelete(config *Config, confNode string, confPath string) { err := cmd.Run() if err != nil { fmt.Println(Yellow + "Failed to run pre-delete script:" + err.Error() + Reset) + success <- Predelete_Status_Return{confNode, false} + } else { + success <- Predelete_Status_Return{confNode, true} } } diff --git a/templates/pds-petclinic.yml b/templates/pds-petclinic.yml index 2f219fc8..29ad9f54 100644 --- a/templates/pds-petclinic.yml +++ b/templates/pds-petclinic.yml @@ -3,6 +3,7 @@ description: Deploy single k8s/px cluster, register at PDS controlplane, deploy Postgres using API, install petclinic demo app clusters: 1 scripts: ["install-px", "pds-petclinic"] +run_predelete: true env: # PDS_TOKEN: "[your pds user api token]" -> set this as env variable in your defaults.yml diff --git a/terraform/gcp/gke/gke.tf b/terraform/gcp/gke/gke.tf index b041380f..c29e5c25 100644 --- a/terraform/gcp/gke/gke.tf +++ b/terraform/gcp/gke/gke.tf @@ -13,6 +13,11 @@ variable "gke_nodes" { type = number } +data "google_container_engine_versions" "gkeversion" { + location = format("%s-%s",var.gcp_region,var.gcp_zone) + version_prefix = var.gke_version +} + resource "google_container_cluster" "gke" { for_each = var.gkeclusters // do not change naming scheme of cluster as this is referenced in destroy functions @@ -21,9 +26,12 @@ resource "google_container_cluster" "gke" { network = google_compute_network.vpc.id subnetwork = google_compute_subnetwork.subnet[each.key - 1].id initial_node_count = var.gke_nodes - node_version = var.gke_version - min_master_version = var.gke_version - + //node_version = data.google_container_engine_versions.gkeversion.release_channel_default_version["STABLE"] + //min_master_version = data.google_container_engine_versions.gkeversion.release_channel_default_version["STABLE"] + node_version = data.google_container_engine_versions.gkeversion.latest_node_version + min_master_version = data.google_container_engine_versions.gkeversion.latest_master_version + deletion_protection = false + release_channel { channel = "UNSPECIFIED" } diff --git a/terraform/gcp/main.tf b/terraform/gcp/main.tf index 6059dcb2..86d1869a 100644 --- a/terraform/gcp/main.tf +++ b/terraform/gcp/main.tf @@ -2,7 +2,7 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "4.77.0" + version = "5.15.0" } local = { source = "hashicorp/local" diff --git a/terraform/vsphere/main.tf b/terraform/vsphere/main.tf index 129458d4..71747867 100644 --- a/terraform/vsphere/main.tf +++ b/terraform/vsphere/main.tf @@ -2,7 +2,7 @@ terraform { required_providers { vsphere = { source = "hashicorp/vsphere" - version = "2.5.1" + version = "2.6.1" } local = { source = "hashicorp/local" @@ -186,4 +186,4 @@ resource "local_file" "metadata" { resource "local_file" "nodemap" { content = "%{ for vm in vsphere_virtual_machine.node}${format("\"%s\": \"%s,%s\"\n",vm.name,vm.moid,vm.network_interface[0].mac_address)}%{endfor}" filename = "${path.module}/nodemap.txt" -} \ No newline at end of file +} diff --git a/vagrant/eks-master b/vagrant/eks-master index 002ff057..c16c82df 100644 --- a/vagrant/eks-master +++ b/vagrant/eks-master @@ -38,4 +38,4 @@ if [[ ! -z $AWS_ADD_EKS_IAM_USER ]]; then eksctl create iamidentitymapping --cluster px-deploy-$name-$cluster --region=$aws_region \ --arn arn:aws:iam::$AWS_ACCOUNT_ID:user/$AWS_ADD_EKS_IAM_USER --group eks-console-dashboard-full-access-group \ --no-duplicate-arns -fi \ No newline at end of file +fi diff --git a/vagrant/gke-master b/vagrant/gke-master index 6b54c655..7102b41f 100644 --- a/vagrant/gke-master +++ b/vagrant/gke-master @@ -1,6 +1,6 @@ GKE_CLUSTER_NAME=px-deploy-$name-$cluster -dnf install -y docker google-cloud-sdk-gke-gcloud-auth-plugin +dnf install -y docker google-cloud-cli-gke-gcloud-auth-plugin systemctl enable --now docker #curl -L https://github.com/containerd/containerd/releases/download/v1.6.15/containerd-1.6.15-linux-amd64.tar.gz | tar Cxzvf /usr/local - diff --git a/vsphere.go b/vsphere.go index 9bcf9edb..34446c29 100644 --- a/vsphere.go +++ b/vsphere.go @@ -655,6 +655,7 @@ func vsphere_prepare_destroy(config *Config) error { //import clouddrives into tf state vsphere_import_tf_clouddrive(config) + os.Rename("/px-deploy/.px-deploy/tf-deployments/"+config.Name+"/import.tf", "/px-deploy/.px-deploy/tf-deployments/"+config.Name+"/import.tf.done") fmt.Printf("waiting to power off VMs\n") for i := 1; i <= clusters; i++ {