Skip to content

Commit 91be9f2

Browse files
CLI help str rewording. (skypilot-org#1821)
* CLI help str rewording. * Updates.
1 parent 718951c commit 91be9f2

File tree

2 files changed

+103
-56
lines changed

2 files changed

+103
-56
lines changed

sky/cli.py

Lines changed: 88 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,19 +1671,22 @@ def status(all: bool, refresh: bool, show_spot_jobs: bool, clusters: List[str]):
16711671
@usage_lib.entrypoint
16721672
def cost_report(all: bool): # pylint: disable=redefined-builtin
16731673
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
1674-
"""Show cost reports for each cluster.
1674+
"""Show estimated costs for launched clusters.
16751675
1676-
The following fields for each cluster are recorded: cluster name,
1677-
resources, launched time, duration that cluster was up,
1678-
and total cost.
1676+
For each cluster, this shows: cluster name, resources, launched time,
1677+
duration that cluster was up, and total estimated cost.
16791678
16801679
The estimated cost column indicates the price for the cluster based on the
1681-
type of resources being used and the duration of use up until the call
1682-
to status. This means if the cluster is UP, successive calls to report
1683-
will show increasing price. The estimated cost is calculated based on
1684-
the local cache of the cluster status, and may not be accurate for
1685-
the cluster with autostop/use_spot set or terminated/stopped
1686-
on the cloud console.
1680+
type of resources being used and the duration of use up until now. This
1681+
means if the cluster is UP, successive calls to cost-report will show
1682+
increasing price.
1683+
1684+
The estimated cost is calculated based on the local cache of the cluster
1685+
status, and may not be accurate for:
1686+
1687+
- clusters with autostop/use_spot set; or
1688+
1689+
- clusters that were terminated/stopped on the cloud console.
16871690
"""
16881691
cluster_records = core.cost_report()
16891692
nonreserved_cluster_records = []
@@ -1778,8 +1781,8 @@ def queue(clusters: Sequence[str], skip_finished: bool, all_users: bool):
17781781
'-s',
17791782
is_flag=True,
17801783
default=False,
1781-
help='Sync down the logs of the job (this is useful for distributed jobs to'
1782-
'download a separate log for each job from all the workers).')
1784+
help='Sync down the logs of a job to the local machine. For a distributed'
1785+
' job, a separate log file from each worker will be downloaded.')
17831786
@click.option(
17841787
'--status',
17851788
is_flag=True,
@@ -1790,8 +1793,9 @@ def queue(clusters: Sequence[str], skip_finished: bool, all_users: bool):
17901793
'--follow/--no-follow',
17911794
is_flag=True,
17921795
default=True,
1793-
help=('Follow the logs of the job. [default: --follow] '
1794-
'If --no-follow is specified, print the log so far and exit.'))
1796+
help=('Follow the logs of a job. '
1797+
'If --no-follow is specified, print the log so far and exit. '
1798+
'[default: --follow]'))
17951799
@click.argument('cluster',
17961800
required=True,
17971801
type=str,
@@ -1813,12 +1817,14 @@ def logs(
18131817
18141818
1. If no flags are provided, tail the logs of the job_id specified. At most
18151819
one job_id can be provided.
1816-
2. If --status is specified, print the status of the job and exit with
1817-
returncode 0 if the job is succeeded, or 1 otherwise. At most one job_id can
1820+
1821+
2. If ``--status`` is specified, print the status of the job and exit with
1822+
returncode 0 if the job succeeded, or 1 otherwise. At most one job_id can
18181823
be specified.
1819-
3. If --sync-down is specified, the logs of the job will be downloaded from
1820-
the cluster and saved to the local machine under `~/sky_logs`. Mulitple
1821-
job_ids can be specified.
1824+
1825+
3. If ``--sync-down`` is specified, the logs of the job will be downloaded
1826+
from the cluster and saved to the local machine under
1827+
``~/sky_logs``. Mulitple job_ids can be specified.
18221828
"""
18231829
if sync_down and status:
18241830
raise click.UsageError(
@@ -2881,11 +2887,14 @@ def tpunode(cluster: str, yes: bool, port_forward: Optional[List[int]],
28812887
@cli.command()
28822888
@usage_lib.entrypoint
28832889
def check():
2884-
"""Determine the set of clouds available to use.
2890+
"""Check which clouds are available to use.
2891+
2892+
This checks access credentials for all clouds supported by SkyPilot. If a
2893+
cloud is detected to be inaccessible, the reason and correction steps will
2894+
be shown.
28852895
2886-
This checks access credentials for AWS, Azure and GCP; on failure, it shows
2887-
the reason and suggests correction steps. Tasks will only run on clouds
2888-
that you have access to.
2896+
The enabled clouds are cached and form the "search space" to be considered
2897+
for each task.
28892898
"""
28902899
sky_check.check()
28912900

@@ -2916,15 +2925,15 @@ def show_gpus(
29162925
all: bool, # pylint: disable=redefined-builtin
29172926
cloud: Optional[str],
29182927
region: Optional[str]):
2919-
"""Show supported GPU/TPU/accelerators.
2928+
"""Show supported GPU/TPU/accelerators and their prices.
29202929
29212930
The names and counts shown can be set in the ``accelerators`` field in task
29222931
YAMLs, or in the ``--gpus`` flag in CLI commands. For example, if this
29232932
table shows 8x V100s are supported, then the string ``V100:8`` will be
29242933
accepted by the above.
29252934
2926-
To show the detailed information of a GPU/TPU type (which clouds offer it,
2927-
the quantity in each VM type, etc.), use ``sky show-gpus <gpu>``.
2935+
To show the detailed information of a GPU/TPU type (its price, which clouds
2936+
offer it, the quantity in each VM type, etc.), use ``sky show-gpus <gpu>``.
29282937
29292938
To show all accelerators, including less common ones and their detailed
29302939
information, use ``sky show-gpus --all``.
@@ -3058,7 +3067,7 @@ def _output():
30583067

30593068
@cli.group(cls=_NaturalOrderGroup)
30603069
def storage():
3061-
"""Storage related commands."""
3070+
"""SkyPilot Storage CLI."""
30623071
pass
30633072

30643073

@@ -3115,7 +3124,7 @@ def storage_delete(names: Sequence[str], all: bool): # pylint: disable=redefine
31153124

31163125
@cli.group(cls=_NaturalOrderGroup)
31173126
def admin():
3118-
"""Sky administrator commands for local clusters."""
3127+
"""SkyPilot On-prem administrator CLI."""
31193128
pass
31203129

31213130

@@ -3191,7 +3200,7 @@ def admin_deploy(clusterspec_yaml: str):
31913200

31923201
@cli.group(cls=_NaturalOrderGroup)
31933202
def spot():
3194-
"""Commands for managed spot jobs."""
3203+
"""Managed Spot commands (spot instances with auto-recovery)."""
31953204
pass
31963205

31973206

@@ -3354,32 +3363,56 @@ def spot_launch(
33543363
def spot_queue(all: bool, refresh: bool, skip_finished: bool):
33553364
"""Show statuses of managed spot jobs.
33563365
3357-
\b
33583366
Each spot job can have one of the following statuses:
33593367
3360-
\b
3361-
- SUBMITTED: The job is submitted to the spot controller.
3362-
- STARTING: The job is starting (starting a spot cluster).
3363-
- RUNNING: The job is running.
3364-
- RECOVERING: The spot cluster is recovering from a preemption.
3365-
- SUCCEEDED: The job succeeded.
3366-
- FAILED: The job failed due to an error from the job itself.
3367-
- FAILED_NO_RESOURCES: The job failed due to resources being unavailable
3368-
after a maximum number of retry attempts.
3369-
- FAILED_CONTROLLER: The job failed due to an unexpected error in the spot
3370-
controller.
3371-
- CANCELLING: The job was requested to be cancelled by the user, and the
3372-
cancellation is in progress.
3373-
- CANCELLED: The job was cancelled by the user.
3374-
3375-
If the job failed, either due to user code or spot unavailability, the error
3376-
log can be found with ``sky spot logs --controller job_id``.
3368+
- ``PENDING``: Job is waiting for a free slot on the spot controller to be
3369+
accepted.
3370+
3371+
- ``SUBMITTED``: Job is submitted to and accepted by the spot controller.
3372+
3373+
- ``STARTING``: Job is starting (provisioning a spot cluster).
3374+
3375+
- ``RUNNING``: Job is running.
3376+
3377+
- ``RECOVERING``: The spot cluster is recovering from a preemption.
3378+
3379+
- ``SUCCEEDED``: Job succeeded.
3380+
3381+
- ``CANCELLING``: Job was requested to be cancelled by the user, and the
3382+
cancellation is in progress.
3383+
3384+
- ``CANCELLED``: Job was cancelled by the user.
3385+
3386+
- ``FAILED``: Job failed due to an error from the job itself.
3387+
3388+
- ``FAILED_SETUP``: Job failed due to an error from the job's ``setup``
3389+
commands.
3390+
3391+
- ``FAILED_PRECHECKS``: Job failed due to an error from our prechecks such
3392+
as invalid cluster names or an infeasible resource is specified.
3393+
3394+
- ``FAILED_NO_RESOURCE``: Job failed due to resources being unavailable
3395+
after a maximum number of retries.
3396+
3397+
- ``FAILED_CONTROLLER``: Job failed due to an unexpected error in the spot
3398+
controller.
3399+
3400+
If the job failed, either due to user code or spot unavailability, the
3401+
error log can be found with ``sky spot logs --controller``, e.g.:
3402+
3403+
.. code-block:: bash
3404+
3405+
sky spot logs --controller job_id
3406+
3407+
This also shows the logs for provisioning and any preemption and recovery
3408+
attempts.
33773409
33783410
(Tip) To fetch job statuses every 60 seconds, use ``watch``:
33793411
33803412
.. code-block:: bash
33813413
33823414
watch -n60 sky spot queue
3415+
33833416
"""
33843417
click.secho('Fetching managed spot job statuses...', fg='yellow')
33853418
with log_utils.safe_rich_status('[cyan]Checking spot jobs[/]'):
@@ -3422,20 +3455,19 @@ def spot_queue(all: bool, refresh: bool, skip_finished: bool):
34223455
def spot_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool):
34233456
"""Cancel managed spot jobs.
34243457
3425-
You can provide either a job name or a list of job ids to be cancelled.
3458+
You can provide either a job name or a list of job IDs to be cancelled.
34263459
They are exclusive options.
3460+
34273461
Examples:
34283462
34293463
.. code-block:: bash
34303464
3431-
# Cancel managed spot job with name 'my-job'
3432-
$ sky spot cancel -n my-job
3433-
3434-
# Cancel managed spot jobs with IDs 1, 2, 3
3435-
$ sky spot cancel 1 2 3
3436-
3465+
# Cancel managed spot job with name 'my-job'
3466+
$ sky spot cancel -n my-job
3467+
\b
3468+
# Cancel managed spot jobs with IDs 1, 2, 3
3469+
$ sky spot cancel 1 2 3
34373470
"""
3438-
34393471
_, handle = spot_lib.is_spot_controller_up(
34403472
'All managed spot jobs should have finished.')
34413473
if handle is None:
@@ -3544,7 +3576,7 @@ def _get_candidate_configs(yaml_path: str) -> Optional[List[Dict[str, str]]]:
35443576

35453577
@cli.group(cls=_NaturalOrderGroup)
35463578
def bench():
3547-
"""Sky Benchmark related commands."""
3579+
"""SkyPilot Benchmark CLI."""
35483580
pass
35493581

35503582

sky/utils/cli_utils/status_utils.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,21 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
109109
reserved_group_name: Optional[str] = None):
110110
"""Compute cluster table values and display for cost report.
111111
112+
For each cluster, this shows: cluster name, resources, launched time,
113+
duration that cluster was up, and total estimated cost.
114+
115+
The estimated cost column indicates the price for the cluster based on the
116+
type of resources being used and the duration of use up until now. This
117+
means if the cluster is UP, successive calls to cost-report will show
118+
increasing price.
119+
120+
The estimated cost is calculated based on the local cache of the cluster
121+
status, and may not be accurate for:
122+
123+
- clusters with autostop/use_spot set; or
124+
125+
- clusters that were terminated/stopped on the cloud console.
126+
112127
Returns:
113128
Number of pending auto{stop,down} clusters.
114129
"""

0 commit comments

Comments
 (0)