Skip to content

Commit 117ffe0

Browse files
committed
produce scripts for cluster interaction
1 parent 3ff15ce commit 117ffe0

File tree

1 file changed

+76
-28
lines changed

1 file changed

+76
-28
lines changed

src/ibm_ray_config/modules/utils.py

Lines changed: 76 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import yaml
88
import time
99
from enum import Enum
10-
import uuid
10+
from pathlib import Path
1111
import inquirer
1212
from inquirer import errors
1313
from ibm_platform_services import IamIdentityV1
@@ -144,19 +144,19 @@ def validate_not_empty(answers, current):
144144
raise errors.ValidationError('', reason=f"Key name can't be empty")
145145
return True
146146

147-
def validate_cluster_name(answers, current):
147+
def validate_name(answers, current):
148148
"""
149149
returns True if cluster name ray and IBM VPC VSI requirements.
150150
since ray's cluster pattern of "^([a-zA-Z0-9_]+)$" is contained within the IBM's VSI pattern
151151
of "^([a-z]|[a-z][-a-z0-9]*[a-z0-9])$" this validation assures compliance with the latter pattern.
152152
"""
153153
vsi_pattern = "^([a-z]|[a-z][-a-z0-9]*[a-z0-9])$"
154154
if not current:
155-
raise errors.ValidationError('', reason=f"Key name can't be empty")
155+
raise errors.ValidationError('', reason=f"Name can't be empty")
156156
pattern = re.compile(vsi_pattern)
157157
res = pattern.match(current)
158158
if not res or len(res.group())!=len(current):
159-
raise errors.ValidationError('', reason=f"Cluster name doesn't adhere to pattern: {vsi_pattern}")
159+
raise errors.ValidationError('', reason=f"Name must adhere to pattern: {vsi_pattern}")
160160
return True
161161

162162
def validate_exists(answers, current):
@@ -281,18 +281,18 @@ def verify_paths(input_path, output_path, verify_config=False):
281281
""":returns a valid input and output path files, in accordance with provided paths.
282282
if a given path is invalid, and user is unable to rectify, a default path will be chosen in its stead. """
283283

284-
def _is_valid_input_path(path):
284+
def _is_valid_input_file(path):
285285
if not os.path.isfile(path):
286286
print(color_msg(f"\nError - Path: '{path}' doesn't point to a file. ", color=Color.RED))
287287
return False
288288
return True
289289

290-
def _is_valid_output_path(path):
290+
def _is_valid_output_dir(path):
291291
""":returns path if it's either a valid absolute path, or a file name to be appended to current directory"""
292292

293293
if os.path.isdir(path):
294294
return path
295-
else:
295+
else: # returns None
296296
print(color_msg(f"{path} doesn't lead to an existing directory", color=Color.RED))
297297

298298
def _prompt_user(path, default_config_file, verify_func, request, default_msg):
@@ -307,10 +307,10 @@ def _prompt_user(path, default_config_file, verify_func, request, default_msg):
307307
path = free_dialog(request)['answer']
308308

309309
if not verify_config:
310-
input_path = _prompt_user(input_path, '', _is_valid_input_path,
310+
input_path = _prompt_user(input_path, '', _is_valid_input_file,
311311
"Provide a path to your existing config file, or leave blank to configure from template",
312312
'Using default input file\n')
313-
output_path = _prompt_user(output_path, os.getcwd(), _is_valid_output_path,
313+
output_path = _prompt_user(output_path, os.getcwd(), _is_valid_output_dir,
314314
"Provide a custom path for your config file, or leave blank for default output location",
315315
'Using default output path\n')
316316
return input_path, output_path
@@ -357,32 +357,80 @@ def get_profile_resources(instance_profile):
357357

358358
return cpu_num, memory_num, gpu_num
359359

360+
def write_script(script_name:str, path:str, content:list, run_from_cluster_dir = True):
361+
"""creates a script named 'script_name' at 'path' folder
362+
with 'content' as script commands. also add +x permission"""
363+
364+
USE_BASH = ['#!/bin/bash\n']
365+
CD_TO_CLUSTER_DIR = ['SCRIPTS_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )\n',
366+
'CLUSTER_DIR=$(dirname "$SCRIPTS_DIR")\n',
367+
'cd $CLUSTER_DIR\n']
368+
RETURN_TO_ORIGINAL_DIR = ['\ncd --']
369+
if run_from_cluster_dir:
370+
CMD = CD_TO_CLUSTER_DIR+content+RETURN_TO_ORIGINAL_DIR
371+
else:
372+
CMD = content
373+
file_path = os.path.join(path, script_name)
374+
with open(file_path, 'w') as script:
375+
script.writelines(USE_BASH+CMD)
376+
# add execute permissions to file's current permissions
377+
current_permissions = os.stat(file_path).st_mode
378+
new_permissions = current_permissions | 0o111
379+
os.chmod(file_path, new_permissions)
360380

361381
def dump_cluster_folder(config, output_folder):
382+
383+
# create a output_folder and scripts_folder if doesn't exist
362384
if not os.path.isdir(output_folder):
363385
os.mkdir(output_folder)
364-
cluster_folder = os.path.join(output_folder, f"ray-cluster-{config['cluster_name']}-{str(uuid.uuid4())[:5]}")
386+
cluster_folder = os.path.join(output_folder, f"{config['cluster_name']}")
387+
scripts_folder = os.path.join(cluster_folder, f"scripts")
388+
os.makedirs(cluster_folder, exist_ok=True) # directory already exists
389+
os.makedirs(scripts_folder, exist_ok=True) # directory already exists
390+
391+
cluster_file = "config.yaml"
392+
cluster_file_path = os.path.join(cluster_folder, cluster_file)
365393

366-
# dump cluster config_file
367-
cluster_file = f"cluster-{config['cluster_name']}-config.yaml" # extracting: {cluster_name}-{uuid}
368-
cluster_file_path = os.path.join(cluster_folder,cluster_file)
369-
os.mkdir(cluster_folder)
394+
# get source path of ssh keys and extract their name
395+
original_private_key_path = os.path.expanduser(config['auth']['ssh_private_key'])
396+
original_public_key_path = original_private_key_path+'.pub'
397+
private_key_name = original_private_key_path.rsplit('/',1)[-1]
398+
399+
# update ssh key path to output folder
400+
new_private_key_path = os.path.join(cluster_folder, private_key_name)
401+
new_public_key_path = new_private_key_path+'.pub'
402+
config['auth']['ssh_private_key'] = Path(new_private_key_path).name
403+
404+
# dump config to cluster cluster_file
370405
with open(cluster_file_path, 'w') as file:
371406
yaml.dump(config, file, default_flow_style=False)
372-
private_key = os.path.expanduser(config['auth']['ssh_private_key'])
373-
public_key = private_key + '.pub'
374-
# copy private ssh file
375-
shutil.copyfile(private_key, os.path.join(cluster_folder,private_key.rsplit('/',1)[-1]))
376-
# copy public ssh file
377-
shutil.copyfile(public_key, os.path.join(cluster_folder,public_key.rsplit('/',1)[-1]))
378-
379-
# create script file
380-
with open(os.path.join(cluster_folder,'script.sh'), 'w') as script:
381-
script.writelines([
382-
"#!/bin/bash",
383-
f"\nray up -y {cluster_file_path}",
384-
f"\nray dashboard --port 8265 --remote-port 8265 {cluster_file_path}"]
385-
)
407+
408+
# move keys if generated on this run (located in /tmp), else copy from original location
409+
copy_or_move_file = shutil.move if 'tmp' in original_private_key_path else shutil.copyfile
410+
copy_or_move_file(original_private_key_path, new_private_key_path)
411+
copy_or_move_file(original_public_key_path, new_public_key_path)
412+
413+
write_script('create.sh',
414+
scripts_folder,
415+
[f"ray up -y {cluster_file_path}"])
416+
417+
write_script('connect.sh',
418+
scripts_folder,
419+
[f"ray dashboard --port 8265 --remote-port 8265 {cluster_file_path}"])
420+
421+
# kill tunnel created by ray dashboard by killing the PIDs involved
422+
write_script('disconnect.sh',
423+
scripts_folder,
424+
["lsof -i:8265 | awk 'NR>1 {print $2}' | sort -u | xargs kill"],
425+
run_from_cluster_dir = False)
426+
427+
write_script('terminate.sh',
428+
scripts_folder,
429+
[f"ray down -y {cluster_file_path}"])
430+
431+
write_script('ray.sh',
432+
scripts_folder,
433+
[f"ray $@"])
386434

387435
return cluster_folder
388436

0 commit comments

Comments
 (0)