Skip to content

Commit 134cfd7

Browse files
committed
add cleanup functionality and remove input flag
1 parent adab033 commit 134cfd7

File tree

2 files changed

+181
-7
lines changed

2 files changed

+181
-7
lines changed

src/ibm_ray_config/main.py

+15-7
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import yaml
66

77
from ibm_ray_config.modules.utils import color_msg, Color, verify_paths, dump_cluster_folder
8+
from ibm_ray_config.modules.cleanup import clean_cluster
89

910
# currently supporting configuration for ray above IBM Gen2 VPC.
1011
IBM_VPC = 'IBM VPC'
@@ -52,20 +53,27 @@ def validate_api_keys(base_config, modules, iam_api_key, compute_iam_endpoint):
5253

5354
@click.command()
5455
@click.option('--output-folder', '-o', help='Output folder to save configurations')
55-
@click.option('--input-file', '-i', help=f'Template for the new configuration')
56+
# @click.option('--input-file', '-i', help=f'Template for the new configuration')
5657
@click.option('--iam-api-key', '-a', help='IAM_API_KEY')
5758
@click.option('--version', '-v', help=f'Get package version', is_flag=True)
5859
@click.option('--compute-iam-endpoint', help='IAM endpoint url used for compute instead of default https://iam.cloud.ibm.com')
59-
@click.option('--endpoint', help='IBM Cloud API endpoint')
60+
@click.option('--region', '-r', help='IBM Cloud VPC API region')
61+
@click.option('--cleanup', '-c', help='Path to cluster config file to delete')
6062
@click.option('--pr', '-g', help=f'Temporary workaround for ray gen2 only. If specified, use provider setup from PR github', is_flag=True, default=False)
61-
def builder(iam_api_key, output_folder, input_file, version, compute_iam_endpoint, endpoint, pr):
62-
defaults = None # to be replaced by a flag
63+
def builder(iam_api_key, output_folder, version, compute_iam_endpoint, region, cleanup, pr):
64+
defaults = None # to be replaced by a flag
65+
input_file = None # to be replaced by a flag
66+
6367
if version:
6468
print(f"{pkg_resources.get_distribution('ibm-ray-config').project_name} "
6569
f"{pkg_resources.get_distribution('ibm-ray-config').version}")
6670
exit(0)
6771

68-
print(color_msg("\nWelcome to ibm_ray_config export helper\n", color=Color.YELLOW))
72+
if cleanup:
73+
clean_cluster(cleanup)
74+
exit(0)
75+
76+
print(color_msg(f"\nWelcome to ibm_ray_config export helper\n", color=Color.YELLOW))
6977

7078
input_file, output_folder = verify_paths(input_file, output_folder)
7179

@@ -79,8 +87,8 @@ def builder(iam_api_key, output_folder, input_file, version, compute_iam_endpoin
7987
base_config['create_defaults'] = defaults
8088
base_config, modules = validate_api_keys(base_config, modules, iam_api_key, compute_iam_endpoint)
8189

82-
if endpoint:
83-
base_config['provider']['endpoint'] = endpoint
90+
if region:
91+
base_config['provider']['endpoint'] = f'https://{region}.iaas.cloud.ibm.com/v1'
8492

8593
for module in modules:
8694
next_module = module(base_config)

src/ibm_ray_config/modules/cleanup.py

+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import os
2+
import time
3+
import ibm_cloud_sdk_core
4+
import ibm_vpc
5+
import yaml
6+
from ibm_ray_config.modules.config_builder import ConfigBuilder, spinner
7+
from ibm_ray_config.modules.utils import Color, color_msg
8+
RAY_RECYCLABLE = "ray-recyclable"
9+
ibm_vpc_client = None
10+
11+
def get_vpc_data(vpc_id):
12+
13+
if not vpc_id: return None
14+
try:
15+
vpc_data = ibm_vpc_client.get_vpc(vpc_id).result
16+
return vpc_data
17+
except ibm_cloud_sdk_core.ApiException as e:
18+
if e.code == 404:
19+
print(("VPC doesn't exist."))
20+
return None
21+
else: raise
22+
23+
def delete_subnets(vpc_data):
24+
@spinner
25+
def _poll_subnet_exists(subnet_id):
26+
tries = 10
27+
sleep_interval = 10
28+
while tries:
29+
try:
30+
subnet_data = ibm_vpc_client.get_subnet(subnet_id).result
31+
except Exception:
32+
print(color_msg(f"Deleted subnet id: '{subnet_id}'",color=Color.PURPLE))
33+
return True
34+
tries -= 1
35+
time.sleep(sleep_interval)
36+
print(color_msg(f"Internal VPC error: Failed to delete subnet: '{subnet_id}' within expected time frame.\n"
37+
"Try again later."))
38+
raise Exception ("Failed to delete subnet within expected time frame")
39+
40+
subnets_attached_to_routing_table = ibm_vpc_client.list_subnets(routing_table_id = vpc_data['default_routing_table']['id']).get_result()['subnets']
41+
subnets_ids = [subnet['id'] for subnet in subnets_attached_to_routing_table]
42+
for id in subnets_ids:
43+
ibm_vpc_client.delete_subnet(id).get_result()
44+
_poll_subnet_exists(id)
45+
46+
47+
def delete_gateways(vpc_id):
48+
gateways = ibm_vpc_client.list_public_gateways(resource_group_id=RESOURCE_GROUP_ID).get_result()['public_gateways']
49+
gateways_ids_of_vpc = [gateway['id'] for gateway in gateways if gateway['vpc']['id']== vpc_id]
50+
for gateway_id in gateways_ids_of_vpc:
51+
deleting_resource = True
52+
while deleting_resource:
53+
try:
54+
ibm_vpc_client.delete_public_gateway(gateway_id).get_result()
55+
deleting_resource = False
56+
print(color_msg(f"Deleted gateway with id: '{gateway_id}'",Color.PURPLE))
57+
except ibm_cloud_sdk_core.ApiException as e:
58+
if e.code == 404:
59+
print("Gateway doesn't exist.")
60+
deleting_resource = False
61+
if e.code == 409:
62+
print("Gateway still in use. If error persists, exit and try again later.")
63+
time.sleep(5)
64+
65+
def delete_recyclable_ip(head_node_data):
66+
nic_id = head_node_data["network_interfaces"][0]["id"]
67+
68+
# find head node external ip
69+
floating_ips_data = ibm_vpc_client.list_instance_network_interface_floating_ips(
70+
head_node_data["id"],nic_id).get_result()
71+
floating_ips = floating_ips_data.get("floating_ips",[])
72+
73+
for ip in floating_ips:
74+
if ip["name"].startswith(RAY_RECYCLABLE):
75+
deleting_resource = True
76+
while deleting_resource:
77+
try:
78+
ibm_vpc_client.delete_floating_ip(ip["id"])
79+
deleting_resource = False
80+
print(color_msg(f"Deleted IP address with id: '{ip['id']}'",Color.PURPLE))
81+
except ibm_cloud_sdk_core.ApiException as e:
82+
if e.code == 404:
83+
print("IP wasn't found")
84+
deleting_resource = False
85+
if e.code == 409:
86+
print("IP still in use. If error persists, exit and try again later.")
87+
# will retry until cloud functions timeout.
88+
time.sleep(5)
89+
90+
91+
def delete_instances(vpc_id, cluster_name):
92+
@spinner
93+
def _poll_instance_exists(instance_id):
94+
tries = 10
95+
sleep_interval = 10
96+
while tries:
97+
try:
98+
instance_data = ibm_vpc_client.get_instance(instance_id).get_result()
99+
except Exception:
100+
print(color_msg(f"Deleted VM instance with id: '{instance_id}'",Color.PURPLE))
101+
return True
102+
tries -= 1
103+
time.sleep(sleep_interval)
104+
print(color_msg(f"Internal VPC error: Failed to delete VM: '{instance_id}' within expected time frame."
105+
"\nTry again later."))
106+
raise Exception("Failed to delete instance within expected time frame.")
107+
108+
instances = ibm_vpc_client.list_instances(vpc_id=vpc_id).get_result()['instances']
109+
# delete ip address of head node if it was created by Ray.
110+
head_node_data = next((inst for inst in instances if f"{cluster_name}-head" in inst['name']),None)
111+
if head_node_data:
112+
delete_recyclable_ip(head_node_data)
113+
114+
instances_ids = [instance['id'] for instance in instances]
115+
for id in instances_ids:
116+
ibm_vpc_client.delete_instance(id=id).get_result()
117+
for id in instances_ids:
118+
_poll_instance_exists(id)
119+
120+
def delete_unbound_vpc(vpc_id):
121+
deleting_resource = True
122+
while deleting_resource:
123+
try:
124+
ibm_vpc_client.delete_vpc(vpc_id).get_result()
125+
deleting_resource = False
126+
print(color_msg(f"VPC '{vpc_id}' and its attached resources were deleted successfully",Color.LIGHTGREEN))
127+
except ibm_cloud_sdk_core.ApiException as e:
128+
if e.code == 404:
129+
print("VPC doesn't exist.")
130+
deleting_resource = False
131+
if e.code == 409:
132+
print("VPC still in use.")
133+
# will retry until cloud functions timeout.
134+
time.sleep(5)
135+
136+
def delete_vpc(vpc_id, cluster_name):
137+
vpc_data = get_vpc_data(vpc_id)
138+
if not vpc_data:
139+
print((f"Failed to find a VPC with id={vpc_id}"))
140+
return
141+
print(color_msg(f"Deleting vpc: '{vpc_data['name']}' with id: '{vpc_id}'",Color.YELLOW))
142+
delete_instances(vpc_data['id'], cluster_name)
143+
delete_subnets(vpc_data)
144+
delete_gateways(vpc_id)
145+
delete_unbound_vpc(vpc_id)
146+
147+
148+
149+
def clean_cluster(config_file):
150+
global ibm_vpc_client, RESOURCE_GROUP_ID
151+
with open(os.path.expanduser(config_file)) as f:
152+
config = yaml.safe_load(f)
153+
node_config = config['available_node_types'][ConfigBuilder.DEFAULT_NODE_TYPE]['node_config']
154+
iam_api_key, RESOURCE_GROUP_ID, vpc_id, region, cluster_name = (config['provider']['iam_api_key'], node_config['resource_group_id'],
155+
node_config['vpc_id'], config['provider']['region'], config['cluster_name'])
156+
157+
authenticator = ibm_cloud_sdk_core.authenticators.IAMAuthenticator(iam_api_key, url=None)
158+
ibm_vpc_client = ibm_vpc.VpcV1('2022-06-30',authenticator=authenticator)
159+
160+
if not region:
161+
raise Exception("VPC not found in any region")
162+
163+
ibm_vpc_client.set_service_url(f'https://{region}.iaas.cloud.ibm.com/v1')
164+
165+
delete_vpc(vpc_id=vpc_id, cluster_name=cluster_name)
166+
return {"Status": "Success"}

0 commit comments

Comments
 (0)