7
7
import yaml
8
8
import time
9
9
from enum import Enum
10
- import uuid
10
+ from pathlib import Path
11
11
import inquirer
12
12
from inquirer import errors
13
13
from ibm_platform_services import IamIdentityV1
@@ -144,19 +144,19 @@ def validate_not_empty(answers, current):
144
144
raise errors .ValidationError ('' , reason = f"Key name can't be empty" )
145
145
return True
146
146
147
- def validate_cluster_name (answers , current ):
147
+ def validate_name (answers , current ):
148
148
"""
149
149
returns True if cluster name ray and IBM VPC VSI requirements.
150
150
since ray's cluster pattern of "^([a-zA-Z0-9_]+)$" is contained within the IBM's VSI pattern
151
151
of "^([a-z]|[a-z][-a-z0-9]*[a-z0-9])$" this validation assures compliance with the latter pattern.
152
152
"""
153
153
vsi_pattern = "^([a-z]|[a-z][-a-z0-9]*[a-z0-9])$"
154
154
if not current :
155
- raise errors .ValidationError ('' , reason = f"Key name can't be empty" )
155
+ raise errors .ValidationError ('' , reason = f"Name can't be empty" )
156
156
pattern = re .compile (vsi_pattern )
157
157
res = pattern .match (current )
158
158
if not res or len (res .group ())!= len (current ):
159
- raise errors .ValidationError ('' , reason = f"Cluster name doesn't adhere to pattern: { vsi_pattern } " )
159
+ raise errors .ValidationError ('' , reason = f"Name must adhere to pattern: { vsi_pattern } " )
160
160
return True
161
161
162
162
def validate_exists (answers , current ):
@@ -281,18 +281,18 @@ def verify_paths(input_path, output_path, verify_config=False):
281
281
""":returns a valid input and output path files, in accordance with provided paths.
282
282
if a given path is invalid, and user is unable to rectify, a default path will be chosen in its stead. """
283
283
284
- def _is_valid_input_path (path ):
284
+ def _is_valid_input_file (path ):
285
285
if not os .path .isfile (path ):
286
286
print (color_msg (f"\n Error - Path: '{ path } ' doesn't point to a file. " , color = Color .RED ))
287
287
return False
288
288
return True
289
289
290
- def _is_valid_output_path (path ):
290
+ def _is_valid_output_dir (path ):
291
291
""":returns path if it's either a valid absolute path, or a file name to be appended to current directory"""
292
292
293
293
if os .path .isdir (path ):
294
294
return path
295
- else :
295
+ else : # returns None
296
296
print (color_msg (f"{ path } doesn't lead to an existing directory" , color = Color .RED ))
297
297
298
298
def _prompt_user (path , default_config_file , verify_func , request , default_msg ):
@@ -307,10 +307,10 @@ def _prompt_user(path, default_config_file, verify_func, request, default_msg):
307
307
path = free_dialog (request )['answer' ]
308
308
309
309
if not verify_config :
310
- input_path = _prompt_user (input_path , '' , _is_valid_input_path ,
310
+ input_path = _prompt_user (input_path , '' , _is_valid_input_file ,
311
311
"Provide a path to your existing config file, or leave blank to configure from template" ,
312
312
'Using default input file\n ' )
313
- output_path = _prompt_user (output_path , os .getcwd (), _is_valid_output_path ,
313
+ output_path = _prompt_user (output_path , os .getcwd (), _is_valid_output_dir ,
314
314
"Provide a custom path for your config file, or leave blank for default output location" ,
315
315
'Using default output path\n ' )
316
316
return input_path , output_path
@@ -357,32 +357,80 @@ def get_profile_resources(instance_profile):
357
357
358
358
return cpu_num , memory_num , gpu_num
359
359
360
+ def write_script (script_name :str , path :str , content :list , run_from_cluster_dir = True ):
361
+ """creates a script named 'script_name' at 'path' folder
362
+ with 'content' as script commands. also add +x permission"""
363
+
364
+ USE_BASH = ['#!/bin/bash\n ' ]
365
+ CD_TO_CLUSTER_DIR = ['SCRIPTS_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )\n ' ,
366
+ 'CLUSTER_DIR=$(dirname "$SCRIPTS_DIR")\n ' ,
367
+ 'cd $CLUSTER_DIR\n ' ]
368
+ RETURN_TO_ORIGINAL_DIR = ['\n cd --' ]
369
+ if run_from_cluster_dir :
370
+ CMD = CD_TO_CLUSTER_DIR + content + RETURN_TO_ORIGINAL_DIR
371
+ else :
372
+ CMD = content
373
+ file_path = os .path .join (path , script_name )
374
+ with open (file_path , 'w' ) as script :
375
+ script .writelines (USE_BASH + CMD )
376
+ # add execute permissions to file's current permissions
377
+ current_permissions = os .stat (file_path ).st_mode
378
+ new_permissions = current_permissions | 0o111
379
+ os .chmod (file_path , new_permissions )
360
380
361
381
def dump_cluster_folder (config , output_folder ):
382
+
383
+ # create a output_folder and scripts_folder if doesn't exist
362
384
if not os .path .isdir (output_folder ):
363
385
os .mkdir (output_folder )
364
- cluster_folder = os .path .join (output_folder , f"ray-cluster-{ config ['cluster_name' ]} -{ str (uuid .uuid4 ())[:5 ]} " )
386
+ cluster_folder = os .path .join (output_folder , f"{ config ['cluster_name' ]} " )
387
+ scripts_folder = os .path .join (cluster_folder , f"scripts" )
388
+ os .makedirs (cluster_folder , exist_ok = True ) # directory already exists
389
+ os .makedirs (scripts_folder , exist_ok = True ) # directory already exists
390
+
391
+ cluster_file = "config.yaml"
392
+ cluster_file_path = os .path .join (cluster_folder , cluster_file )
365
393
366
- # dump cluster config_file
367
- cluster_file = f"cluster-{ config ['cluster_name' ]} -config.yaml" # extracting: {cluster_name}-{uuid}
368
- cluster_file_path = os .path .join (cluster_folder ,cluster_file )
369
- os .mkdir (cluster_folder )
394
+ # get source path of ssh keys and extract their name
395
+ original_private_key_path = os .path .expanduser (config ['auth' ]['ssh_private_key' ])
396
+ original_public_key_path = original_private_key_path + '.pub'
397
+ private_key_name = original_private_key_path .rsplit ('/' ,1 )[- 1 ]
398
+
399
+ # update ssh key path to output folder
400
+ new_private_key_path = os .path .join (cluster_folder , private_key_name )
401
+ new_public_key_path = new_private_key_path + '.pub'
402
+ config ['auth' ]['ssh_private_key' ] = Path (new_private_key_path ).name
403
+
404
+ # dump config to cluster cluster_file
370
405
with open (cluster_file_path , 'w' ) as file :
371
406
yaml .dump (config , file , default_flow_style = False )
372
- private_key = os .path .expanduser (config ['auth' ]['ssh_private_key' ])
373
- public_key = private_key + '.pub'
374
- # copy private ssh file
375
- shutil .copyfile (private_key , os .path .join (cluster_folder ,private_key .rsplit ('/' ,1 )[- 1 ]))
376
- # copy public ssh file
377
- shutil .copyfile (public_key , os .path .join (cluster_folder ,public_key .rsplit ('/' ,1 )[- 1 ]))
378
-
379
- # create script file
380
- with open (os .path .join (cluster_folder ,'script.sh' ), 'w' ) as script :
381
- script .writelines ([
382
- "#!/bin/bash" ,
383
- f"\n ray up -y { cluster_file_path } " ,
384
- f"\n ray dashboard --port 8265 --remote-port 8265 { cluster_file_path } " ]
385
- )
407
+
408
+ # move keys if generated on this run (located in /tmp), else copy from original location
409
+ copy_or_move_file = shutil .move if 'tmp' in original_private_key_path else shutil .copyfile
410
+ copy_or_move_file (original_private_key_path , new_private_key_path )
411
+ copy_or_move_file (original_public_key_path , new_public_key_path )
412
+
413
+ write_script ('create.sh' ,
414
+ scripts_folder ,
415
+ [f"ray up -y { cluster_file_path } " ])
416
+
417
+ write_script ('connect.sh' ,
418
+ scripts_folder ,
419
+ [f"ray dashboard --port 8265 --remote-port 8265 { cluster_file_path } " ])
420
+
421
+ # kill tunnel created by ray dashboard by killing the PIDs involved
422
+ write_script ('disconnect.sh' ,
423
+ scripts_folder ,
424
+ ["lsof -i:8265 | awk 'NR>1 {print $2}' | sort -u | xargs kill" ],
425
+ run_from_cluster_dir = False )
426
+
427
+ write_script ('terminate.sh' ,
428
+ scripts_folder ,
429
+ [f"ray down -y { cluster_file_path } " ])
430
+
431
+ write_script ('ray.sh' ,
432
+ scripts_folder ,
433
+ [f"ray $@" ])
386
434
387
435
return cluster_folder
388
436
0 commit comments