@@ -303,6 +303,49 @@ def torchx_config(
303
303
to_return ["requirements" ] = requirements
304
304
return to_return
305
305
306
+ def from_k8_cluster_object (rc ):
307
+ machine_types = (
308
+ rc ["metadata" ]["labels" ]["orderedinstance" ].split ("_" )
309
+ if "orderedinstance" in rc ["metadata" ]["labels" ]
310
+ else []
311
+ )
312
+ local_interactive = (
313
+ "volumeMounts"
314
+ in rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ]["containers" ][0 ]
315
+ )
316
+ cluster_config = ClusterConfiguration (
317
+ name = rc ["metadata" ]["name" ],
318
+ namespace = rc ["metadata" ]["namespace" ],
319
+ machine_types = machine_types ,
320
+ min_worker = rc ["spec" ]["workerGroupSpecs" ][0 ]["minReplicas" ],
321
+ max_worker = rc ["spec" ]["workerGroupSpecs" ][0 ]["maxReplicas" ],
322
+ min_cpus = rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ][
323
+ "containers"
324
+ ][0 ]["resources" ]["requests" ]["cpu" ],
325
+ max_cpus = rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ][
326
+ "containers"
327
+ ][0 ]["resources" ]["limits" ]["cpu" ],
328
+ min_memory = int (
329
+ rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ]["containers" ][0 ][
330
+ "resources"
331
+ ]["requests" ]["memory" ][:- 1 ]
332
+ ),
333
+ max_memory = int (
334
+ rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ]["containers" ][0 ][
335
+ "resources"
336
+ ]["limits" ]["memory" ][:- 1 ]
337
+ ),
338
+ gpu = rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ]["containers" ][0 ][
339
+ "resources"
340
+ ]["limits" ]["nvidia.com/gpu" ],
341
+ instascale = True if machine_types else False ,
342
+ image = rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ]["containers" ][
343
+ 0
344
+ ]["image" ],
345
+ local_interactive = local_interactive ,
346
+ )
347
+ return Cluster (cluster_config )
348
+
306
349
307
350
def list_all_clusters (namespace : str , print_to_console : bool = True ):
308
351
"""
@@ -339,6 +382,27 @@ def get_current_namespace(): # pragma: no cover
339
382
return "default"
340
383
341
384
385
+ def get_cluster (cluster_name : str , namespace : str = "default" ):
386
+ try :
387
+ config .load_kube_config ()
388
+ api_instance = client .CustomObjectsApi ()
389
+ rcs = api_instance .list_namespaced_custom_object (
390
+ group = "ray.io" ,
391
+ version = "v1alpha1" ,
392
+ namespace = namespace ,
393
+ plural = "rayclusters" ,
394
+ )
395
+ except Exception as e :
396
+ return _kube_api_error_handling (e )
397
+
398
+ for rc in rcs ["items" ]:
399
+ if rc ["metadata" ]["name" ] == cluster_name :
400
+ return Cluster .from_k8_cluster_object (rc )
401
+ raise FileNotFoundError (
402
+ f"Cluster { cluster_name } is not found in { namespace } namespace"
403
+ )
404
+
405
+
342
406
# private methods
343
407
344
408
0 commit comments