Skip to content

Commit 699b025

Browse files
authored
Improve UX for autostopping (skypilot-org#676)
* Add progress bar for status refreshing * Keep autostop after refreshing * Add glob for start * Fix message for autostop * Fix messages for autostop
1 parent 1b5c52c commit 699b025

File tree

3 files changed

+44
-31
lines changed

3 files changed

+44
-31
lines changed

sky/backends/backend_utils.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import jinja2
2222
import rich.console as rich_console
23+
import rich.progress as rich_progress
2324

2425
import sky
2526
from sky import authentication as auth
@@ -1140,7 +1141,12 @@ def get_clusters(refresh: bool) -> List[Dict[str, Any]]:
11401141
records = global_user_state.get_clusters()
11411142
if not refresh:
11421143
return records
1143-
return [_ping_cluster_or_set_to_stopped(record) for record in records]
1144+
updated_records = []
1145+
for record in rich_progress.track(records,
1146+
description='Refreshing cluster status'):
1147+
record = _ping_cluster_or_set_to_stopped(record)
1148+
updated_records.append(record)
1149+
return updated_records
11441150

11451151

11461152
def query_head_ip_with_retries(cluster_yaml: str, retry_count: int = 1) -> str:

sky/cli.py

+32-26
Original file line numberDiff line numberDiff line change
@@ -1114,7 +1114,7 @@ def stop(
11141114
default=None,
11151115
is_flag=True,
11161116
help='Tear down all existing clusters.')
1117-
@click.option('--idle_minutes',
1117+
@click.option('--idle-minutes',
11181118
'-i',
11191119
type=int,
11201120
default=None,
@@ -1215,6 +1215,13 @@ def _filter(name, all_clusters):
12151215
return cluster_record
12161216
return None
12171217

1218+
# Get GLOB cluster names
1219+
glob_clusters = []
1220+
for cluster in clusters:
1221+
glob_cluster = global_user_state.get_glob_cluster_names(cluster)
1222+
glob_clusters.extend(glob_cluster)
1223+
clusters = list(set(glob_clusters))
1224+
12181225
all_clusters = global_user_state.get_clusters()
12191226
for name in clusters:
12201227
record = _filter(name, all_clusters)
@@ -1407,7 +1414,8 @@ def _terminate_or_stop_clusters(
14071414

14081415
operation = 'Terminating' if terminate else 'Stopping'
14091416
if idle_minutes_to_autostop is not None:
1410-
operation = 'Scheduling auto-stop on'
1417+
verb = 'Scheduling' if idle_minutes_to_autostop >= 0 else 'Cancelling'
1418+
operation = f'{verb} auto-stop on'
14111419
plural = 's' if len(to_down) > 1 else ''
14121420
progress = rich_progress.Progress(transient=True)
14131421
task = progress.add_task(
@@ -1431,34 +1439,32 @@ def _terminate_or_stop(record):
14311439
f'{colorama.Style.RESET_ALL}')
14321440
elif idle_minutes_to_autostop is not None:
14331441
cluster_status = backend_utils.get_status_from_cluster_name(name)
1434-
if cluster_status != global_user_state.ClusterStatus.UP:
1435-
message = (
1436-
f'{colorama.Fore.GREEN}Scheduling autostop for cluster '
1437-
f'{name} (status: {cluster_status.value})... skipped'
1438-
f'{colorama.Style.RESET_ALL}\n'
1439-
' Auto-stop can only be scheduled on '
1440-
f'{global_user_state.ClusterStatus.UP.value} cluster.')
1441-
elif not isinstance(backend, backends.CloudVmRayBackend):
1442-
message = (
1443-
f'{colorama.Fore.GREEN}Scheduling auto-stop for cluster '
1444-
f'{name}... skipped{colorama.Style.RESET_ALL}\n'
1445-
' Auto-stopping is only supported by backend: '
1446-
f'{backends.CloudVmRayBackend.NAME}')
1442+
if not isinstance(backend, backends.CloudVmRayBackend):
1443+
message = (f'{colorama.Fore.GREEN}{operation} cluster '
1444+
f'{name}... skipped{colorama.Style.RESET_ALL}'
1445+
'\n Auto-stopping is only supported by backend: '
1446+
f'{backends.CloudVmRayBackend.NAME}')
14471447
else:
1448-
backend.set_autostop(handle, idle_minutes_to_autostop)
1449-
if idle_minutes_to_autostop < 0:
1448+
if cluster_status != global_user_state.ClusterStatus.UP:
14501449
message = (
1451-
f'{colorama.Fore.GREEN}Cancelling auto-stop for '
1452-
f'cluster {name}...done{colorama.Style.RESET_ALL}')
1450+
f'{colorama.Fore.GREEN}{operation} cluster '
1451+
f'{name} (status: {cluster_status.value})... skipped'
1452+
f'{colorama.Style.RESET_ALL}'
1453+
'\n Auto-stop can only be run on '
1454+
f'{global_user_state.ClusterStatus.UP.value} cluster.')
14531455
else:
1456+
backend.set_autostop(handle, idle_minutes_to_autostop)
14541457
message = (
1455-
f'{colorama.Fore.GREEN}Scheduling auto-stop for '
1456-
f'cluster {name}...done{colorama.Style.RESET_ALL}\n'
1457-
f' The cluster will be stopped after '
1458-
f'{idle_minutes_to_autostop} minutes of idleness.\n'
1459-
' To cancel the autostop, run: '
1460-
f'{colorama.Style.BRIGHT}sky autostop {name} --cancel'
1461-
f'{colorama.Style.RESET_ALL}')
1458+
f'{colorama.Fore.GREEN}{operation} '
1459+
f'cluster {name}...done{colorama.Style.RESET_ALL}')
1460+
if idle_minutes_to_autostop >= 0:
1461+
message += (
1462+
f'\n The cluster will be stopped after '
1463+
f'{idle_minutes_to_autostop} minutes of idleness.'
1464+
'\n To cancel the autostop, run: '
1465+
f'{colorama.Style.BRIGHT}'
1466+
f'sky autostop {name} --cancel'
1467+
f'{colorama.Style.RESET_ALL}')
14621468
else:
14631469
success = backend.teardown(handle, terminate=terminate, purge=purge)
14641470
if success:

sky/global_user_state.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,11 @@ def add_or_update_cluster(cluster_name: str,
164164
'VALUES (?, ?, ?, ?, ?, '
165165
# Keep the old autostop value if it exists, otherwise set it to
166166
# default -1.
167-
'COALESCE((SELECT autostop FROM clusters WHERE name=?), -1))',
167+
'COALESCE('
168+
'(SELECT autostop FROM clusters WHERE name=? AND status!=?), -1)'
169+
')', # VALUES
168170
(cluster_name, cluster_launched_at, handle, last_use, status.value,
169-
cluster_name))
171+
cluster_name, ClusterStatus.STOPPED.value))
170172
_DB.conn.commit()
171173

172174

@@ -190,11 +192,10 @@ def remove_cluster(cluster_name: str, terminate: bool):
190192
# will directly try to ssh, which leads to timeout.
191193
handle.head_ip = None
192194
_DB.cursor.execute(
193-
'UPDATE clusters SET handle=(?), status=(?), autostop=(?) '
195+
'UPDATE clusters SET handle=(?), status=(?) '
194196
'WHERE name=(?)', (
195197
pickle.dumps(handle),
196198
ClusterStatus.STOPPED.value,
197-
-1,
198199
cluster_name,
199200
))
200201
_DB.conn.commit()

0 commit comments

Comments
 (0)