-
Notifications
You must be signed in to change notification settings - Fork 53
/
Copy pathsubmit.py
74 lines (66 loc) · 2.15 KB
/
submit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import click
import os
from codeflare_sdk.cluster.cluster import Cluster
import pickle
from torchx.runner import get_runner
from codeflare_sdk.cluster.cluster import get_cluster
@click.group()
def cli():
"""
Submit a defined resource to the Kubernetes cluster
"""
pass
@cli.command()
@click.argument("name", type=str)
@click.option("--wait", is_flag=True)
def raycluster(name, wait):
"""
Submit a defined RayCluster to the Kubernetes cluster
"""
cluster = Cluster.from_definition_yaml(name + ".yaml")
if not cluster:
click.echo(
"Error submitting RayCluster. Make sure the RayCluster is defined before submitting it"
)
return
if not wait:
cluster.up()
click.echo("Cluster submitted successfully")
return
cluster.up()
cluster.wait_ready()
@cli.command()
@click.pass_context
@click.argument("name", type=str)
@click.option("--cluster-name", type=str)
@click.option("--namespace", type=str)
def job(ctx, name, cluster_name, namespace):
"""
Submit a defined job to the Kubernetes cluster or a RayCluster
"""
runner = get_runner()
job_path = ctx.obj.codeflare_path + f"/{name}"
if not os.path.isfile(job_path):
click.echo(
f"Error submitting job. Make sure the job is defined before submitting it"
)
return
with open(job_path, "rb") as file:
job_def = pickle.load(file)
if not cluster_name:
job = job_def.submit()
submission_id = runner.describe(job._app_handle).name.split(":")[1]
click.echo(f"Job {submission_id} submitted successfully")
return
namespace = namespace or ctx.obj.current_namespace
try:
cluster = get_cluster(cluster_name, namespace)
except FileNotFoundError:
click.echo(f"Cluster {name} not found in {namespace} namespace")
return
job = job_def.submit(cluster)
full_name = runner.describe(job._app_handle).name
submission_id = full_name[full_name.rfind(name) :]
click.echo(
f"Job {submission_id} submitted onto {cluster_name} RayCluster successfully\nView dashboard: {cluster.cluster_dashboard_uri()}"
)