Skip to content

Commit bfb39a2

Browse files
committed
cluster status safeguards
1 parent 3464c14 commit bfb39a2

File tree

2 files changed

+22
-7
lines changed

2 files changed

+22
-7
lines changed

Diff for: src/codeflare_sdk/cluster/cluster.py

+18-5
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,15 @@ def status(
176176
ready = False
177177
status = CodeFlareClusterStatus.FAILED # should deleted be separate
178178
return status, ready # exit early, no need to check ray status
179-
elif appwrapper.status in [AppWrapperStatus.PENDING]:
179+
elif appwrapper.status in [
180+
AppWrapperStatus.PENDING,
181+
AppWrapperStatus.QUEUEING,
182+
]:
180183
ready = False
181-
status = CodeFlareClusterStatus.QUEUED
184+
if appwrapper.status == AppWrapperStatus.PENDING:
185+
status = CodeFlareClusterStatus.QUEUED
186+
else:
187+
status = CodeFlareClusterStatus.QUEUEING
182188
if print_to_console:
183189
pretty_print.print_app_wrappers_status([appwrapper])
184190
return (
@@ -563,11 +569,18 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
563569

564570

565571
def _map_to_app_wrapper(aw) -> AppWrapper:
572+
if "status" in aw and "canrun" in aw["status"]:
573+
return AppWrapper(
574+
name=aw["metadata"]["name"],
575+
status=AppWrapperStatus(aw["status"]["state"].lower()),
576+
can_run=aw["status"]["canrun"],
577+
job_state=aw["status"]["queuejobstate"],
578+
)
566579
return AppWrapper(
567580
name=aw["metadata"]["name"],
568-
status=AppWrapperStatus(aw["status"]["state"].lower()),
569-
can_run=aw["status"]["canrun"],
570-
job_state=aw["status"]["queuejobstate"],
581+
status=AppWrapperStatus("queueing"),
582+
can_run=False,
583+
job_state="Still adding to queue",
571584
)
572585

573586

Diff for: src/codeflare_sdk/cluster/model.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class AppWrapperStatus(Enum):
3939
Defines the possible reportable states of an AppWrapper.
4040
"""
4141

42+
QUEUEING = "queueing"
4243
PENDING = "pending"
4344
RUNNING = "running"
4445
FAILED = "failed"
@@ -55,8 +56,9 @@ class CodeFlareClusterStatus(Enum):
5556
READY = 1
5657
STARTING = 2
5758
QUEUED = 3
58-
FAILED = 4
59-
UNKNOWN = 5
59+
QUEUEING = 4
60+
FAILED = 5
61+
UNKNOWN = 6
6062

6163

6264
@dataclass

0 commit comments

Comments
 (0)