@@ -107,10 +107,13 @@ def view_clusters(namespace: str = None):
107
107
view_clusters function will display existing clusters with their specs, and handle user interactions.
108
108
"""
109
109
if not is_notebook ():
110
- warnings .warn ("view_clusters can only be used in a Jupyter Notebook environment." )
111
- return # Exit function if not in Jupyter Notebook
110
+ warnings .warn (
111
+ "view_clusters can only be used in a Jupyter Notebook environment."
112
+ )
113
+ return # Exit function if not in Jupyter Notebook
112
114
113
115
from .cluster import get_current_namespace
116
+
114
117
if not namespace :
115
118
namespace = get_current_namespace ()
116
119
@@ -124,42 +127,76 @@ def view_clusters(namespace: str = None):
124
127
return
125
128
126
129
classification_widget = widgets .ToggleButtons (
127
- options = ray_clusters_df ["Name" ].tolist (), value = ray_clusters_df ["Name" ].tolist ()[0 ],
128
- description = 'Select an existing cluster:' ,
130
+ options = ray_clusters_df ["Name" ].tolist (),
131
+ value = ray_clusters_df ["Name" ].tolist ()[0 ],
132
+ description = "Select an existing cluster:" ,
129
133
)
130
134
# Setting the initial value to trigger the event handler to display the cluster details.
131
135
initial_value = classification_widget .value
132
- _on_cluster_click ({"new" : initial_value }, raycluster_data_output , namespace , classification_widget )
133
- classification_widget .observe (lambda selection_change : _on_cluster_click (selection_change , raycluster_data_output , namespace , classification_widget ), names = "value" )
136
+ _on_cluster_click (
137
+ {"new" : initial_value }, raycluster_data_output , namespace , classification_widget
138
+ )
139
+ classification_widget .observe (
140
+ lambda selection_change : _on_cluster_click (
141
+ selection_change , raycluster_data_output , namespace , classification_widget
142
+ ),
143
+ names = "value" ,
144
+ )
134
145
135
146
# UI table buttons
136
147
delete_button = widgets .Button (
137
- description = 'Delete Cluster' ,
138
- icon = 'trash' ,
139
- tooltip = "Delete the selected cluster"
140
- )
141
- delete_button .on_click (lambda b : _on_delete_button_click (b , classification_widget , ray_clusters_df , raycluster_data_output , user_output , delete_button , list_jobs_button , ray_dashboard_button ))
148
+ description = "Delete Cluster" ,
149
+ icon = "trash" ,
150
+ tooltip = "Delete the selected cluster" ,
151
+ )
152
+ delete_button .on_click (
153
+ lambda b : _on_delete_button_click (
154
+ b ,
155
+ classification_widget ,
156
+ ray_clusters_df ,
157
+ raycluster_data_output ,
158
+ user_output ,
159
+ delete_button ,
160
+ list_jobs_button ,
161
+ ray_dashboard_button ,
162
+ )
163
+ )
142
164
143
165
list_jobs_button = widgets .Button (
144
- description = 'View Jobs' ,
145
- icon = 'suitcase' ,
146
- tooltip = "Open the Ray Job Dashboard"
147
- )
148
- list_jobs_button .on_click (lambda b : _on_list_jobs_button_click (b , classification_widget , ray_clusters_df , user_output , url_output ))
166
+ description = "View Jobs" , icon = "suitcase" , tooltip = "Open the Ray Job Dashboard"
167
+ )
168
+ list_jobs_button .on_click (
169
+ lambda b : _on_list_jobs_button_click (
170
+ b , classification_widget , ray_clusters_df , user_output , url_output
171
+ )
172
+ )
149
173
150
174
ray_dashboard_button = widgets .Button (
151
- description = 'Open Ray Dashboard' ,
152
- icon = 'dashboard' ,
153
- tooltip = "Open the Ray Dashboard in a new tab" ,
154
- layout = widgets .Layout (width = 'auto' ),
155
- )
156
- ray_dashboard_button .on_click (lambda b : _on_ray_dashboard_button_click (b , classification_widget , ray_clusters_df , user_output , url_output ))
175
+ description = "Open Ray Dashboard" ,
176
+ icon = "dashboard" ,
177
+ tooltip = "Open the Ray Dashboard in a new tab" ,
178
+ layout = widgets .Layout (width = "auto" ),
179
+ )
180
+ ray_dashboard_button .on_click (
181
+ lambda b : _on_ray_dashboard_button_click (
182
+ b , classification_widget , ray_clusters_df , user_output , url_output
183
+ )
184
+ )
157
185
158
186
display (widgets .VBox ([classification_widget , raycluster_data_output ]))
159
- display (widgets .HBox ([delete_button , list_jobs_button , ray_dashboard_button ]), url_output , user_output )
187
+ display (
188
+ widgets .HBox ([delete_button , list_jobs_button , ray_dashboard_button ]),
189
+ url_output ,
190
+ user_output ,
191
+ )
160
192
161
193
162
- def _on_cluster_click (selection_change , raycluster_data_output : widgets .Output , namespace : str , classification_widget : widgets .ToggleButtons ):
194
+ def _on_cluster_click (
195
+ selection_change ,
196
+ raycluster_data_output : widgets .Output ,
197
+ namespace : str ,
198
+ classification_widget : widgets .ToggleButtons ,
199
+ ):
163
200
"""
164
201
_on_cluster_click handles the event when a cluster is selected from the toggle buttons, updating the output with cluster details.
165
202
"""
@@ -168,21 +205,51 @@ def _on_cluster_click(selection_change, raycluster_data_output: widgets.Output,
168
205
ray_clusters_df = _fetch_cluster_data (namespace )
169
206
classification_widget .options = ray_clusters_df ["Name" ].tolist ()
170
207
with raycluster_data_output :
171
- display (HTML (ray_clusters_df [ray_clusters_df ["Name" ]== new_value ][["Name" , "Namespace" , "Num Workers" , "Head GPUs" , "Head CPU Req~Lim" , "Head Memory Req~Lim" , "Worker GPUs" , "Worker CPU Req~Lim" , "Worker Memory Req~Lim" , "status" ]].to_html (escape = False , index = False , border = 2 )))
172
-
173
-
174
- def _on_delete_button_click (b , classification_widget : widgets .ToggleButtons , ray_clusters_df : pd .DataFrame , raycluster_data_output : widgets .Output , user_output : widgets .Output , delete_button : widgets .Button , list_jobs_button : widgets .Button , ray_dashboard_button : widgets .Button ):
208
+ display (
209
+ HTML (
210
+ ray_clusters_df [ray_clusters_df ["Name" ] == new_value ][
211
+ [
212
+ "Name" ,
213
+ "Namespace" ,
214
+ "Num Workers" ,
215
+ "Head GPUs" ,
216
+ "Head CPU Req~Lim" ,
217
+ "Head Memory Req~Lim" ,
218
+ "Worker GPUs" ,
219
+ "Worker CPU Req~Lim" ,
220
+ "Worker Memory Req~Lim" ,
221
+ "status" ,
222
+ ]
223
+ ].to_html (escape = False , index = False , border = 2 )
224
+ )
225
+ )
226
+
227
+
228
+ def _on_delete_button_click (
229
+ b ,
230
+ classification_widget : widgets .ToggleButtons ,
231
+ ray_clusters_df : pd .DataFrame ,
232
+ raycluster_data_output : widgets .Output ,
233
+ user_output : widgets .Output ,
234
+ delete_button : widgets .Button ,
235
+ list_jobs_button : widgets .Button ,
236
+ ray_dashboard_button : widgets .Button ,
237
+ ):
175
238
"""
176
239
_on_delete_button_click handles the event when the Delete Button is clicked, deleting the selected cluster.
177
240
"""
178
241
cluster_name = classification_widget .value
179
- namespace = ray_clusters_df [ray_clusters_df ["Name" ]== classification_widget .value ]["Namespace" ].values [0 ]
242
+ namespace = ray_clusters_df [ray_clusters_df ["Name" ] == classification_widget .value ][
243
+ "Namespace"
244
+ ].values [0 ]
180
245
181
246
_delete_cluster (cluster_name , namespace )
182
247
183
248
with user_output :
184
249
user_output .clear_output ()
185
- print (f"Cluster { cluster_name } in the { namespace } namespace was deleted successfully." )
250
+ print (
251
+ f"Cluster { cluster_name } in the { namespace } namespace was deleted successfully."
252
+ )
186
253
187
254
# Refresh the dataframe
188
255
new_df = _fetch_cluster_data (namespace )
@@ -197,16 +264,28 @@ def _on_delete_button_click(b, classification_widget: widgets.ToggleButtons, ray
197
264
else :
198
265
classification_widget .options = new_df ["Name" ].tolist ()
199
266
200
- def _on_ray_dashboard_button_click (b , classification_widget : widgets .ToggleButtons , ray_clusters_df : pd .DataFrame , user_output : widgets .Output , url_output : widgets .Output ):
267
+
268
+ def _on_ray_dashboard_button_click (
269
+ b ,
270
+ classification_widget : widgets .ToggleButtons ,
271
+ ray_clusters_df : pd .DataFrame ,
272
+ user_output : widgets .Output ,
273
+ url_output : widgets .Output ,
274
+ ):
201
275
"""
202
276
_on_ray_dashboard_button_click handles the event when the Open Ray Dashboard button is clicked, opening the Ray Dashboard in a new tab
203
277
"""
204
278
from codeflare_sdk .cluster import Cluster
279
+
205
280
cluster_name = classification_widget .value
206
- namespace = ray_clusters_df [ray_clusters_df ["Name" ]== classification_widget .value ]["Namespace" ].values [0 ]
281
+ namespace = ray_clusters_df [ray_clusters_df ["Name" ] == classification_widget .value ][
282
+ "Namespace"
283
+ ].values [0 ]
207
284
208
285
# Suppress from Cluster Object initialisation widgets and outputs
209
- with widgets .Output (), contextlib .redirect_stdout (io .StringIO ()), contextlib .redirect_stderr (io .StringIO ()):
286
+ with widgets .Output (), contextlib .redirect_stdout (
287
+ io .StringIO ()
288
+ ), contextlib .redirect_stderr (io .StringIO ()):
210
289
cluster = Cluster (ClusterConfiguration (cluster_name , namespace ))
211
290
dashboard_url = cluster .cluster_dashboard_uri ()
212
291
@@ -216,22 +295,36 @@ def _on_ray_dashboard_button_click(b, classification_widget: widgets.ToggleButto
216
295
with url_output :
217
296
display (Javascript (f'window.open("{ dashboard_url } ", "_blank");' ))
218
297
219
- def _on_list_jobs_button_click (b , classification_widget : widgets .ToggleButtons , ray_clusters_df : pd .DataFrame , user_output : widgets .Output , url_output : widgets .Output ):
298
+
299
+ def _on_list_jobs_button_click (
300
+ b ,
301
+ classification_widget : widgets .ToggleButtons ,
302
+ ray_clusters_df : pd .DataFrame ,
303
+ user_output : widgets .Output ,
304
+ url_output : widgets .Output ,
305
+ ):
220
306
"""
221
307
_on_list_jobs_button_click handles the event when the View Jobs button is clicked, opening the Ray Jobs Dashboard in a new tab
222
308
"""
223
309
from codeflare_sdk .cluster import Cluster
310
+
224
311
cluster_name = classification_widget .value
225
- namespace = ray_clusters_df [ray_clusters_df ["Name" ]== classification_widget .value ]["Namespace" ].values [0 ]
312
+ namespace = ray_clusters_df [ray_clusters_df ["Name" ] == classification_widget .value ][
313
+ "Namespace"
314
+ ].values [0 ]
226
315
227
316
# Suppress from Cluster Object initialisation widgets and outputs
228
- with widgets .Output (), contextlib .redirect_stdout (io .StringIO ()), contextlib .redirect_stderr (io .StringIO ()):
317
+ with widgets .Output (), contextlib .redirect_stdout (
318
+ io .StringIO ()
319
+ ), contextlib .redirect_stderr (io .StringIO ()):
229
320
cluster = Cluster (ClusterConfiguration (cluster_name , namespace ))
230
321
dashboard_url = cluster .cluster_dashboard_uri ()
231
322
232
323
with user_output :
233
324
user_output .clear_output ()
234
- print (f"Opening Ray Jobs Dashboard for { cluster_name } cluster:\n { dashboard_url } /#/jobs" )
325
+ print (
326
+ f"Opening Ray Jobs Dashboard for { cluster_name } cluster:\n { dashboard_url } /#/jobs"
327
+ )
235
328
with url_output :
236
329
display (Javascript (f'window.open("{ dashboard_url } /#/jobs", "_blank");' ))
237
330
@@ -289,12 +382,14 @@ def _delete_cluster(
289
382
time .sleep (interval )
290
383
timeout -= interval
291
384
if timeout <= 0 :
292
- raise TimeoutError (f"Timeout waiting for { cluster_name } to be deleted." )
385
+ raise TimeoutError (
386
+ f"Timeout waiting for { cluster_name } to be deleted."
387
+ )
293
388
except ApiException as e :
294
389
# Resource is deleted
295
390
if e .status == 404 :
296
391
break
297
- except Exception as e :
392
+ except Exception as e : # pragma: no cover
298
393
return _kube_api_error_handling (e )
299
394
300
395
@@ -303,6 +398,7 @@ def _fetch_cluster_data(namespace):
303
398
_fetch_cluster_data function fetches all clusters and their spec in a given namespace and returns a DataFrame.
304
399
"""
305
400
from .cluster import list_all_clusters
401
+
306
402
rayclusters = list_all_clusters (namespace , False )
307
403
if not rayclusters :
308
404
return pd .DataFrame ()
@@ -311,26 +407,58 @@ def _fetch_cluster_data(namespace):
311
407
num_workers = [item .num_workers for item in rayclusters ]
312
408
head_extended_resources = [
313
409
f"{ list (item .head_extended_resources .keys ())[0 ]} : { list (item .head_extended_resources .values ())[0 ]} "
314
- if item .head_extended_resources else "0"
410
+ if item .head_extended_resources
411
+ else "0"
315
412
for item in rayclusters
316
413
]
317
414
worker_extended_resources = [
318
415
f"{ list (item .worker_extended_resources .keys ())[0 ]} : { list (item .worker_extended_resources .values ())[0 ]} "
319
- if item .worker_extended_resources else "0"
416
+ if item .worker_extended_resources
417
+ else "0"
418
+ for item in rayclusters
419
+ ]
420
+ head_cpu_requests = [
421
+ item .head_cpu_requests if item .head_cpu_requests else 0 for item in rayclusters
422
+ ]
423
+ head_cpu_limits = [
424
+ item .head_cpu_limits if item .head_cpu_limits else 0 for item in rayclusters
425
+ ]
426
+ head_cpu_rl = [
427
+ f"{ requests } ~{ limits } "
428
+ for requests , limits in zip (head_cpu_requests , head_cpu_limits )
429
+ ]
430
+ head_mem_requests = [
431
+ item .head_mem_requests if item .head_mem_requests else 0 for item in rayclusters
432
+ ]
433
+ head_mem_limits = [
434
+ item .head_mem_limits if item .head_mem_limits else 0 for item in rayclusters
435
+ ]
436
+ head_mem_rl = [
437
+ f"{ requests } ~{ limits } "
438
+ for requests , limits in zip (head_mem_requests , head_mem_limits )
439
+ ]
440
+ worker_cpu_requests = [
441
+ item .worker_cpu_requests if item .worker_cpu_requests else 0
442
+ for item in rayclusters
443
+ ]
444
+ worker_cpu_limits = [
445
+ item .worker_cpu_limits if item .worker_cpu_limits else 0 for item in rayclusters
446
+ ]
447
+ worker_cpu_rl = [
448
+ f"{ requests } ~{ limits } "
449
+ for requests , limits in zip (worker_cpu_requests , worker_cpu_limits )
450
+ ]
451
+ worker_mem_requests = [
452
+ item .worker_mem_requests if item .worker_mem_requests else 0
320
453
for item in rayclusters
321
454
]
322
- head_cpu_requests = [item .head_cpu_requests if item .head_cpu_requests else 0 for item in rayclusters ]
323
- head_cpu_limits = [item .head_cpu_limits if item .head_cpu_limits else 0 for item in rayclusters ]
324
- head_cpu_rl = [f"{ requests } ~{ limits } " for requests , limits in zip (head_cpu_requests , head_cpu_limits )]
325
- head_mem_requests = [item .head_mem_requests if item .head_mem_requests else 0 for item in rayclusters ]
326
- head_mem_limits = [item .head_mem_limits if item .head_mem_limits else 0 for item in rayclusters ]
327
- head_mem_rl = [f"{ requests } ~{ limits } " for requests , limits in zip (head_mem_requests , head_mem_limits )]
328
- worker_cpu_requests = [item .worker_cpu_requests if item .worker_cpu_requests else 0 for item in rayclusters ]
329
- worker_cpu_limits = [item .worker_cpu_limits if item .worker_cpu_limits else 0 for item in rayclusters ]
330
- worker_cpu_rl = [f"{ requests } ~{ limits } " for requests , limits in zip (worker_cpu_requests , worker_cpu_limits )]
331
- worker_mem_requests = [item .worker_mem_requests if item .worker_mem_requests else 0 for item in rayclusters ]
332
- worker_mem_limits = [item .worker_mem_limits if item .worker_mem_limits else 0 for item in rayclusters ]
333
- worker_mem_rl = [f"{ requests } ~{ limits } " for requests , limits in zip (worker_mem_requests , worker_mem_limits )]
455
+ worker_mem_limits = [
456
+ item .worker_mem_limits if item .worker_mem_limits else 0 for item in rayclusters
457
+ ]
458
+ worker_mem_rl = [
459
+ f"{ requests } ~{ limits } "
460
+ for requests , limits in zip (worker_mem_requests , worker_mem_limits )
461
+ ]
334
462
status = [item .status .name for item in rayclusters ]
335
463
336
464
status = [_format_status (item .status ) for item in rayclusters ]
@@ -345,7 +473,7 @@ def _fetch_cluster_data(namespace):
345
473
"Head Memory Req~Lim" : head_mem_rl ,
346
474
"Worker CPU Req~Lim" : worker_cpu_rl ,
347
475
"Worker Memory Req~Lim" : worker_mem_rl ,
348
- "status" : status
476
+ "status" : status ,
349
477
}
350
478
return pd .DataFrame (data )
351
479
@@ -359,6 +487,6 @@ def _format_status(status):
359
487
RayClusterStatus .SUSPENDED : '<span style="color: #007BFF;">Suspended ❄️</span>' ,
360
488
RayClusterStatus .FAILED : '<span style="color: red;">Failed ✗</span>' ,
361
489
RayClusterStatus .UNHEALTHY : '<span style="color: purple;">Unhealthy</span>' ,
362
- RayClusterStatus .UNKNOWN : '<span style="color: purple;">Unknown</span>'
490
+ RayClusterStatus .UNKNOWN : '<span style="color: purple;">Unknown</span>' ,
363
491
}
364
492
return status_map .get (status , status )
0 commit comments