Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup #2034

Merged
merged 19 commits into from
Jul 31, 2024
77 changes: 74 additions & 3 deletions cmd/nodecmd/destroy.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
var (
authorizeRemove bool
authorizeAll bool
destroyAll bool
)

func newDestroyCmd() *cobra.Command {
Expand All @@ -35,12 +36,13 @@ func newDestroyCmd() *cobra.Command {
The node destroy command terminates all running nodes in cloud server and deletes all storage disks.

If there is a static IP address attached, it will be released.`,
Args: cobrautils.ExactArgs(1),
Args: cobrautils.MinimumNArgs(0),
RunE: destroyNodes,
}
cmd.Flags().BoolVar(&authorizeAccess, "authorize-access", false, "authorize CLI to release cloud resources")
cmd.Flags().BoolVar(&authorizeRemove, "authorize-remove", false, "authorize CLI to remove all local files related to cloud nodes")
cmd.Flags().BoolVarP(&authorizeAll, "authorize-all", "y", false, "authorize all CLI requests")
cmd.Flags().BoolVar(&destroyAll, "all", false, "destroy all existing clusters created by Avalanche CLI")
cmd.Flags().StringVar(&awsProfile, "aws-profile", constants.AWSDefaultCredential, "aws profile to use")

return cmd
Expand Down Expand Up @@ -98,7 +100,60 @@ func CallDestroyNode(clusterName string) error {
return destroyNodes(nil, []string{clusterName})
}

// We need to get which cloud service is being used on a cluster
// getFirstAvailableNode gets first node in the cluster that still has its node_config.json
// This is because some nodes might have had their node_config.json file deleted as part of
// deletion process but if an error occurs during deletion process, the node might still exist
// as part of the cluster in cluster_config.json
// If all nodes in the cluster no longer have their node_config.json files, getFirstAvailableNode
// will return false in its second return value
func getFirstAvailableNode(nodesToStop []string) (string, bool) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure why we even need this

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in the case that the first node is deleted (this means no more node_config.json file) but it is still on cluster config, and we need to get the relevant info like cloud service from the first avialable node

firstAvailableNode := nodesToStop[0]
noAvailableNodesFound := false
for index, node := range nodesToStop {
nodeConfigPath := app.GetNodeConfigPath(node)
if !utils.FileExists(nodeConfigPath) {
if index == len(nodesToStop)-1 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you pls add a comment with explanation of the logic

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed

noAvailableNodesFound = true
}
continue
}
firstAvailableNode = node
}
return firstAvailableNode, noAvailableNodesFound
}

func Cleanup() error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think logic for destroy can be improved by loading all clusters and then filtering this list by cluster name provided by users or using this list for --all case

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to make one code to work in all cases

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have different logic for cleanup where it ignores a lot of the errors during destroy

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO current implementation is good

var err error
clustersConfig := models.ClustersConfig{}
if app.ClustersConfigExists() {
clustersConfig, err = app.LoadClustersConfig()
if err != nil {
return err
}
}
clusterNames := maps.Keys(clustersConfig.Clusters)
for _, clusterName := range clusterNames {
if err = CallDestroyNode(clusterName); err != nil {
// we only return error for invalid cloud credentials
// silence for other errors
// TODO: differentiate between AWS and GCP credentials
if strings.Contains(err.Error(), "invalid cloud credentials") {
return fmt.Errorf("invalid AWS credentials")
}
}
}
ux.Logger.PrintToUser("all existing instances created by Avalanche CLI successfully destroyed")
return nil
}

func destroyNodes(_ *cobra.Command, args []string) error {
if len(args) == 0 {
if !destroyAll {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: reverse logic if destroyAll -> Cleanup else return err

return fmt.Errorf("to destroy all existing clusters created by Avalanche CLI, call avalanche node destroy --all. To destroy a specified cluster, call avalanche node destroy CLUSTERNAME")
}
return Cleanup()
}
clusterName := args[0]
if err := checkCluster(clusterName); err != nil {
return err
Expand Down Expand Up @@ -142,7 +197,11 @@ func destroyNodes(_ *cobra.Command, args []string) error {
if err != nil {
return err
}
nodeToStopConfig, err := app.LoadClusterNodeConfig(nodesToStop[0])
firstAvailableNodes, noAvailableNodesFound := getFirstAvailableNode(nodesToStop)
if noAvailableNodesFound {
return removeClustersConfigFiles(clusterName)
}
nodeToStopConfig, err := app.LoadClusterNodeConfig(firstAvailableNodes)
if err != nil {
return err
}
Expand All @@ -165,6 +224,11 @@ func destroyNodes(_ *cobra.Command, args []string) error {
}
for _, node := range nodesToStop {
if !isExternalCluster {
// if we can't find node config path, that means node already deleted on console
// but we didn't get to delete the node from cluster config file
if !utils.FileExists(app.GetNodeConfigPath(node)) {
continue
}
nodeConfig, err := app.LoadClusterNodeConfig(node)
if err != nil {
nodeErrors[node] = err
Expand All @@ -179,7 +243,7 @@ func destroyNodes(_ *cobra.Command, args []string) error {
if isExpiredCredentialError(err) {
ux.Logger.PrintToUser("")
printExpiredCredentialsOutput(awsProfile)
return nil
return fmt.Errorf("invalid cloud credentials")
}
if !errors.Is(err, awsAPI.ErrNodeNotFoundToBeRunning) {
nodeErrors[node] = err
Expand Down Expand Up @@ -224,13 +288,20 @@ func destroyNodes(_ *cobra.Command, args []string) error {
}
if len(nodeErrors) > 0 {
ux.Logger.PrintToUser("Failed nodes: ")
invalidCloudCredentials := false
for node, nodeErr := range nodeErrors {
if strings.Contains(nodeErr.Error(), constants.ErrReleasingGCPStaticIP) {
ux.Logger.RedXToUser("Node is destroyed, but failed to release static ip address for node %s due to %s", node, nodeErr)
} else {
if strings.Contains(nodeErr.Error(), "AuthFailure") {
invalidCloudCredentials = true
}
ux.Logger.RedXToUser("Failed to destroy node %s due to %s", node, nodeErr)
}
}
if invalidCloudCredentials {
return fmt.Errorf("failed to destroy node(s) due to invalid cloud credentials %s", maps.Keys(nodeErrors))
}
return fmt.Errorf("failed to destroy node(s) %s", maps.Keys(nodeErrors))
} else {
if isExternalCluster {
Expand Down
3 changes: 3 additions & 0 deletions cmd/nodecmd/whitelist.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,9 @@ func whitelistSSHPubKey(clusterName string, pubkey string) error {
func getCloudSecurityGroupList(clusterNodes []string) ([]regionSecurityGroup, error) {
cloudSecurityGroupList := []regionSecurityGroup{}
for _, node := range clusterNodes {
if !utils.FileExists(app.GetNodeConfigPath(node)) {
continue
}
nodeConfig, err := app.LoadClusterNodeConfig(node)
if err != nil {
ux.Logger.PrintToUser("Failed to parse node %s due to %s", node, err.Error())
Expand Down
3 changes: 3 additions & 0 deletions pkg/cloud/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,9 @@ func (c *AwsCloud) DestroyAWSNode(nodeConfig models.NodeConfig, clusterName stri
isRunning, err := c.checkInstanceIsRunning(nodeConfig.NodeID)
if err != nil {
ux.Logger.PrintToUser(fmt.Sprintf("Failed to destroy node %s due to %s", nodeConfig.NodeID, err.Error()))
if errors.Is(err, ErrNoInstanceState) {
return nil
}
return err
}
if !isRunning {
Expand Down
Loading