1
+ import os
2
+ import time
3
+ import ibm_cloud_sdk_core
4
+ import ibm_vpc
5
+ import yaml
6
+ from ibm_ray_config .modules .config_builder import ConfigBuilder , spinner
7
+ from ibm_ray_config .modules .utils import Color , color_msg
8
+ RAY_RECYCLABLE = "ray-recyclable"
9
+ ibm_vpc_client = None
10
+
11
+ def get_vpc_data (vpc_id ):
12
+
13
+ if not vpc_id : return None
14
+ try :
15
+ vpc_data = ibm_vpc_client .get_vpc (vpc_id ).result
16
+ return vpc_data
17
+ except ibm_cloud_sdk_core .ApiException as e :
18
+ if e .code == 404 :
19
+ print (("VPC doesn't exist." ))
20
+ return None
21
+ else : raise
22
+
23
+ def delete_subnets (vpc_data ):
24
+ @spinner
25
+ def _poll_subnet_exists (subnet_id ):
26
+ tries = 10
27
+ sleep_interval = 10
28
+ while tries :
29
+ try :
30
+ subnet_data = ibm_vpc_client .get_subnet (subnet_id ).result
31
+ except Exception :
32
+ print (color_msg (f"Deleted subnet id: '{ subnet_id } '" ,color = Color .PURPLE ))
33
+ return True
34
+ tries -= 1
35
+ time .sleep (sleep_interval )
36
+ print (color_msg (f"Internal VPC error: Failed to delete subnet: '{ subnet_id } ' within expected time frame.\n "
37
+ "Try again later." ))
38
+ raise Exception ("Failed to delete subnet within expected time frame" )
39
+
40
+ subnets_attached_to_routing_table = ibm_vpc_client .list_subnets (routing_table_id = vpc_data ['default_routing_table' ]['id' ]).get_result ()['subnets' ]
41
+ subnets_ids = [subnet ['id' ] for subnet in subnets_attached_to_routing_table ]
42
+ for id in subnets_ids :
43
+ ibm_vpc_client .delete_subnet (id ).get_result ()
44
+ _poll_subnet_exists (id )
45
+
46
+
47
+ def delete_gateways (vpc_id ):
48
+ gateways = ibm_vpc_client .list_public_gateways (resource_group_id = RESOURCE_GROUP_ID ).get_result ()['public_gateways' ]
49
+ gateways_ids_of_vpc = [gateway ['id' ] for gateway in gateways if gateway ['vpc' ]['id' ]== vpc_id ]
50
+ for gateway_id in gateways_ids_of_vpc :
51
+ deleting_resource = True
52
+ while deleting_resource :
53
+ try :
54
+ ibm_vpc_client .delete_public_gateway (gateway_id ).get_result ()
55
+ deleting_resource = False
56
+ print (color_msg (f"Deleted gateway with id: '{ gateway_id } '" ,Color .PURPLE ))
57
+ except ibm_cloud_sdk_core .ApiException as e :
58
+ if e .code == 404 :
59
+ print ("Gateway doesn't exist." )
60
+ deleting_resource = False
61
+ if e .code == 409 :
62
+ print ("Gateway still in use. If error persists, exit and try again later." )
63
+ time .sleep (5 )
64
+
65
+ def delete_recyclable_ip (head_node_data ):
66
+ nic_id = head_node_data ["network_interfaces" ][0 ]["id" ]
67
+
68
+ # find head node external ip
69
+ floating_ips_data = ibm_vpc_client .list_instance_network_interface_floating_ips (
70
+ head_node_data ["id" ],nic_id ).get_result ()
71
+ floating_ips = floating_ips_data .get ("floating_ips" ,[])
72
+
73
+ for ip in floating_ips :
74
+ if ip ["name" ].startswith (RAY_RECYCLABLE ):
75
+ deleting_resource = True
76
+ while deleting_resource :
77
+ try :
78
+ ibm_vpc_client .delete_floating_ip (ip ["id" ])
79
+ deleting_resource = False
80
+ print (color_msg (f"Deleted IP address with id: '{ ip ['id' ]} '" ,Color .PURPLE ))
81
+ except ibm_cloud_sdk_core .ApiException as e :
82
+ if e .code == 404 :
83
+ print ("IP wasn't found" )
84
+ deleting_resource = False
85
+ if e .code == 409 :
86
+ print ("IP still in use. If error persists, exit and try again later." )
87
+ # will retry until cloud functions timeout.
88
+ time .sleep (5 )
89
+
90
+
91
+ def delete_instances (vpc_id , cluster_name ):
92
+ @spinner
93
+ def _poll_instance_exists (instance_id ):
94
+ tries = 10
95
+ sleep_interval = 10
96
+ while tries :
97
+ try :
98
+ instance_data = ibm_vpc_client .get_instance (instance_id ).get_result ()
99
+ except Exception :
100
+ print (color_msg (f"Deleted VM instance with id: '{ instance_id } '" ,Color .PURPLE ))
101
+ return True
102
+ tries -= 1
103
+ time .sleep (sleep_interval )
104
+ print (color_msg (f"Internal VPC error: Failed to delete VM: '{ instance_id } ' within expected time frame."
105
+ "\n Try again later." ))
106
+ raise Exception ("Failed to delete instance within expected time frame." )
107
+
108
+ instances = ibm_vpc_client .list_instances (vpc_id = vpc_id ).get_result ()['instances' ]
109
+ # delete ip address of head node if it was created by Ray.
110
+ head_node_data = next ((inst for inst in instances if f"{ cluster_name } -head" in inst ['name' ]),None )
111
+ if head_node_data :
112
+ delete_recyclable_ip (head_node_data )
113
+
114
+ instances_ids = [instance ['id' ] for instance in instances ]
115
+ for id in instances_ids :
116
+ ibm_vpc_client .delete_instance (id = id ).get_result ()
117
+ for id in instances_ids :
118
+ _poll_instance_exists (id )
119
+
120
+ def delete_unbound_vpc (vpc_id ):
121
+ deleting_resource = True
122
+ while deleting_resource :
123
+ try :
124
+ ibm_vpc_client .delete_vpc (vpc_id ).get_result ()
125
+ deleting_resource = False
126
+ print (color_msg (f"VPC '{ vpc_id } ' and its attached resources were deleted successfully" ,Color .LIGHTGREEN ))
127
+ except ibm_cloud_sdk_core .ApiException as e :
128
+ if e .code == 404 :
129
+ print ("VPC doesn't exist." )
130
+ deleting_resource = False
131
+ if e .code == 409 :
132
+ print ("VPC still in use." )
133
+ # will retry until cloud functions timeout.
134
+ time .sleep (5 )
135
+
136
+ def delete_vpc (vpc_id , cluster_name ):
137
+ vpc_data = get_vpc_data (vpc_id )
138
+ if not vpc_data :
139
+ print ((f"Failed to find a VPC with id={ vpc_id } " ))
140
+ return
141
+ print (color_msg (f"Deleting vpc: '{ vpc_data ['name' ]} ' with id: '{ vpc_id } '" ,Color .YELLOW ))
142
+ delete_instances (vpc_data ['id' ], cluster_name )
143
+ delete_subnets (vpc_data )
144
+ delete_gateways (vpc_id )
145
+ delete_unbound_vpc (vpc_id )
146
+
147
+
148
+
149
+ def clean_cluster (config_file ):
150
+ global ibm_vpc_client , RESOURCE_GROUP_ID
151
+ with open (os .path .expanduser (config_file )) as f :
152
+ config = yaml .safe_load (f )
153
+ node_config = config ['available_node_types' ][ConfigBuilder .DEFAULT_NODE_TYPE ]['node_config' ]
154
+ iam_api_key , RESOURCE_GROUP_ID , vpc_id , region , cluster_name = (config ['provider' ]['iam_api_key' ], node_config ['resource_group_id' ],
155
+ node_config ['vpc_id' ], config ['provider' ]['region' ], config ['cluster_name' ])
156
+
157
+ authenticator = ibm_cloud_sdk_core .authenticators .IAMAuthenticator (iam_api_key , url = None )
158
+ ibm_vpc_client = ibm_vpc .VpcV1 ('2022-06-30' ,authenticator = authenticator )
159
+
160
+ if not region :
161
+ raise Exception ("VPC not found in any region" )
162
+
163
+ ibm_vpc_client .set_service_url (f'https://{ region } .iaas.cloud.ibm.com/v1' )
164
+
165
+ delete_vpc (vpc_id = vpc_id , cluster_name = cluster_name )
166
+ return {"Status" : "Success" }
0 commit comments