20
20
21
21
import colorama
22
22
import filelock
23
- from rich import console as rich_console
24
23
25
24
import sky
26
25
from sky import backends
48
47
SKY_REMOTE_RAY_VERSION = backend_utils .SKY_REMOTE_RAY_VERSION
49
48
50
49
logger = sky_logging .init_logger (__name__ )
51
- console = rich_console .Console ()
52
50
53
51
_PATH_SIZE_MEGABYTES_WARN_THRESHOLD = 256
54
52
@@ -726,8 +724,9 @@ def _try_provision_tpu(self, to_provision: 'resources_lib.Resources',
726
724
assert 'tpu-create-script' in config_dict , \
727
725
'Expect TPU provisioning with gcloud.'
728
726
try :
729
- with console .status ('[bold cyan]Provisioning TPU '
730
- f'[green]{ tpu_name } [/]' ):
727
+ with backend_utils .safe_console_status (
728
+ '[bold cyan]Provisioning TPU '
729
+ f'[green]{ tpu_name } [/]' ):
731
730
backend_utils .run (f'bash { config_dict ["tpu-create-script" ]} ' ,
732
731
stdout = subprocess .PIPE ,
733
732
stderr = subprocess .PIPE )
@@ -1317,7 +1316,8 @@ def provision(self,
1317
1316
# PENDING / RUNNING jobs for the real status, since we do not
1318
1317
# know the actual previous status of the cluster.
1319
1318
cmd = backend_utils .JobLibCodeGen .update_status ()
1320
- with console .status ('[bold cyan]Preparing Job Queue' ):
1319
+ with backend_utils .safe_console_status (
1320
+ '[bold cyan]Preparing Job Queue' ):
1321
1321
returncode , _ , stderr = self .run_on_head (
1322
1322
handle , cmd , require_outputs = True )
1323
1323
backend_utils .handle_returncode (returncode , cmd ,
@@ -1403,7 +1403,7 @@ def _sync_workdir_node(ip):
1403
1403
tail_cmd = f'tail -n100 -f { log_path } '
1404
1404
logger .info ('To view detailed progress: '
1405
1405
f'{ style .BRIGHT } { tail_cmd } { style .RESET_ALL } ' )
1406
- with console . status ('[bold cyan]Syncing[/]' ):
1406
+ with backend_utils . safe_console_status ('[bold cyan]Syncing[/]' ):
1407
1407
backend_utils .run_in_parallel (_sync_workdir_node , ip_list )
1408
1408
1409
1409
def sync_file_mounts (
@@ -1478,7 +1478,7 @@ def _sync_node(ip):
1478
1478
logger .info (f'{ fore .CYAN } Syncing (to { num_nodes } node{ plural } ): '
1479
1479
f'{ style .BRIGHT } { src } { style .RESET_ALL } -> '
1480
1480
f'{ style .BRIGHT } { dst } { style .RESET_ALL } ' )
1481
- with console . status ('[bold cyan]Syncing[/]' ):
1481
+ with backend_utils . safe_console_status ('[bold cyan]Syncing[/]' ):
1482
1482
backend_utils .run_in_parallel (_sync_node , ip_list )
1483
1483
1484
1484
# Check the files and warn
@@ -1633,7 +1633,8 @@ def _setup_node(ip: int) -> int:
1633
1633
plural = 's' if num_nodes > 1 else ''
1634
1634
logger .info (f'{ fore .CYAN } Running setup on { num_nodes } node{ plural } .'
1635
1635
f'{ style .RESET_ALL } ' )
1636
- with console .status ('[bold cyan]Running setup[/]' ):
1636
+ with backend_utils .safe_console_status (
1637
+ '[bold cyan]Running setup[/]' ):
1637
1638
backend_utils .run_in_parallel (_setup_node , ip_list )
1638
1639
logger .info (f'{ fore .GREEN } Setup completed.{ style .RESET_ALL } ' )
1639
1640
end = time .time ()
@@ -1989,8 +1990,8 @@ def teardown_no_lock(self,
1989
1990
# autoscaler.
1990
1991
resource_group = config ['provider' ]['resource_group' ]
1991
1992
terminate_cmd = f'az group delete -y --name { resource_group } '
1992
- with console . status (f'[bold cyan]Terminating '
1993
- f'[green]{ cluster_name } ' ):
1993
+ with backend_utils . safe_console_status (f'[bold cyan]Terminating '
1994
+ f'[green]{ cluster_name } ' ):
1994
1995
returncode , stdout , stderr = log_lib .run_with_log (
1995
1996
terminate_cmd ,
1996
1997
log_abs_path ,
@@ -2014,8 +2015,9 @@ def teardown_no_lock(self,
2014
2015
terminate_cmd = (
2015
2016
f'aws ec2 terminate-instances --region { region } '
2016
2017
f'--instance-ids $({ query_cmd } )' )
2017
- with console .status (f'[bold cyan]Terminating '
2018
- f'[green]{ cluster_name } ' ):
2018
+ with backend_utils .safe_console_status (
2019
+ f'[bold cyan]Terminating '
2020
+ f'[green]{ cluster_name } ' ):
2019
2021
returncode , stdout , stderr = log_lib .run_with_log (
2020
2022
terminate_cmd ,
2021
2023
log_abs_path ,
@@ -2031,8 +2033,9 @@ def teardown_no_lock(self,
2031
2033
terminate_cmd = (
2032
2034
f'gcloud compute instances delete --zone={ zone } --quiet '
2033
2035
f'$({ query_cmd } )' )
2034
- with console .status (f'[bold cyan]Terminating '
2035
- f'[green]{ cluster_name } ' ):
2036
+ with backend_utils .safe_console_status (
2037
+ f'[bold cyan]Terminating '
2038
+ f'[green]{ cluster_name } ' ):
2036
2039
returncode , stdout , stderr = log_lib .run_with_log (
2037
2040
terminate_cmd ,
2038
2041
log_abs_path ,
@@ -2052,16 +2055,18 @@ def teardown_no_lock(self,
2052
2055
f .flush ()
2053
2056
2054
2057
teardown_verb = 'Terminating' if terminate else 'Stopping'
2055
- with console .status (f'[bold cyan]{ teardown_verb } '
2056
- f'[green]{ cluster_name } ' ):
2058
+ with backend_utils .safe_console_status (
2059
+ f'[bold cyan]{ teardown_verb } '
2060
+ f'[green]{ cluster_name } ' ):
2057
2061
returncode , stdout , stderr = log_lib .run_with_log (
2058
2062
['ray' , 'down' , '-y' , f .name ],
2059
2063
log_abs_path ,
2060
2064
stream_logs = False ,
2061
2065
require_outputs = True )
2062
2066
2063
2067
if handle .tpu_delete_script is not None :
2064
- with console .status ('[bold cyan]Terminating TPU...' ):
2068
+ with backend_utils .safe_console_status (
2069
+ '[bold cyan]Terminating TPU...' ):
2065
2070
tpu_rc , tpu_stdout , tpu_stderr = log_lib .run_with_log (
2066
2071
['bash' , handle .tpu_delete_script ],
2067
2072
log_abs_path ,
@@ -2098,7 +2103,7 @@ def teardown_no_lock(self,
2098
2103
backend_utils .SSHConfigHelper .remove_cluster (cluster_name ,
2099
2104
handle .head_ip ,
2100
2105
auth_config )
2101
- name = global_user_state . get_cluster_name_from_handle ( handle )
2106
+ name = handle . cluster_name
2102
2107
global_user_state .remove_cluster (name , terminate = terminate )
2103
2108
2104
2109
if terminate :
0 commit comments