Skip to content

Commit 6aedea2

Browse files
committed
Added namespace retrieval and dashboard route access via kubernetes
1 parent 4c7f21c commit 6aedea2

File tree

2 files changed

+50
-19
lines changed

2 files changed

+50
-19
lines changed

Diff for: src/codeflare_sdk/cluster/cluster.py

+48-17
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def create_app_wrapper(self):
6969
"""
7070

7171
if self.config.namespace is None:
72-
self.config.namespace = oc.get_project_name()
72+
self.config.namespace = get_current_namespace()
7373
if type(self.config.namespace) is not str:
7474
raise TypeError(
7575
f"Namespace {self.config.namespace} is of type {type(self.config.namespace)}. Check your Kubernetes Authentication."
@@ -263,16 +263,21 @@ def cluster_dashboard_uri(self) -> str:
263263
Returns a string containing the cluster's dashboard URI.
264264
"""
265265
try:
266-
with oc.project(self.config.namespace):
267-
route = oc.invoke(
268-
"get", ["route", "-o", "jsonpath='{$.items[*].spec.host}'"]
269-
)
270-
route = route.out().split(" ")
271-
route = [x for x in route if f"ray-dashboard-{self.config.name}" in x]
272-
route = route[0].strip().strip("'")
273-
return f"http://{route}"
266+
config.load_kube_config()
267+
api_instance = client.CustomObjectsApi()
268+
routes = api_instance.list_namespaced_custom_object(
269+
group="route.openshift.io",
270+
version="v1",
271+
namespace=self.config.namespace,
272+
plural="routes",
273+
)
274274
except:
275-
return "Dashboard route not available yet, have you run cluster.up()?"
275+
pass
276+
277+
for route in routes["items"]:
278+
if route["metadata"]["name"] == f"ray-dashboard-{self.config.name}":
279+
return f"http://{route['spec']['host']}"
280+
return "Dashboard route not available yet, have you run cluster.up()?"
276281

277282
def list_jobs(self) -> List:
278283
"""
@@ -336,6 +341,19 @@ def list_all_queued(namespace: str, print_to_console: bool = True):
336341
return app_wrappers
337342

338343

344+
def get_current_namespace():
345+
try:
346+
_, active_context = config.list_kube_config_contexts()
347+
except config.ConfigException:
348+
raise PermissionError(
349+
"Retrieving current namespace not permitted, have you put in correct/up-to-date auth credentials?"
350+
)
351+
try:
352+
return active_context["context"]["namespace"]
353+
except KeyError:
354+
return "default"
355+
356+
339357
# private methods
340358

341359

@@ -465,12 +483,25 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
465483
else:
466484
status = RayClusterStatus.UNKNOWN
467485

468-
with oc.project(rc["metadata"]["namespace"]), oc.timeout(10 * 60):
469-
route = (
470-
oc.selector(f"route/ray-dashboard-{rc['metadata']['name']}")
471-
.object()
472-
.model.spec.host
473-
)
486+
config.load_kube_config()
487+
api_instance = client.CustomObjectsApi()
488+
routes = api_instance.list_namespaced_custom_object(
489+
group="route.openshift.io",
490+
version="v1",
491+
namespace=rc["metadata"]["namespace"],
492+
plural="routes",
493+
)
494+
ray_route = None
495+
for route in routes["items"]:
496+
if route["metadata"]["name"] == f"ray-dashboard-{rc['metadata']['name']}":
497+
ray_route = route["spec"]["host"]
498+
499+
# with oc.project(rc["metadata"]["namespace"]), oc.timeout(10 * 60):
500+
# route = (
501+
# oc.selector(f"route/ray-dashboard-{rc['metadata']['name']}")
502+
# .object()
503+
# .model.spec.host
504+
# )
474505

475506
return RayCluster(
476507
name=rc["metadata"]["name"],
@@ -489,7 +520,7 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
489520
]["resources"]["limits"]["cpu"],
490521
worker_gpu=0, # hard to detect currently how many gpus, can override it with what the user asked for
491522
namespace=rc["metadata"]["namespace"],
492-
dashboard=route,
523+
dashboard=ray_route,
493524
)
494525

495526

Diff for: src/codeflare_sdk/job/jobs.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717
from typing import TYPE_CHECKING, Optional, Dict, List
1818
from pathlib import Path
1919

20-
import openshift as oc
2120
from torchx.components.dist import ddp
2221
from torchx.runner import get_runner
2322
from torchx.specs import AppHandle, parse_app_handle, AppDryRunInfo
2423

2524
if TYPE_CHECKING:
2625
from ..cluster.cluster import Cluster
26+
from ..cluster.cluster import get_current_namespace
2727

2828
all_jobs: List["Job"] = []
2929
torchx_runner = get_runner()
@@ -124,7 +124,7 @@ def _missing_spec(self, spec: str):
124124
def _dry_run_no_cluster(self):
125125
if self.scheduler_args is not None:
126126
if self.scheduler_args.get("namespace") is None:
127-
self.scheduler_args["namespace"] = oc.get_project_name()
127+
self.scheduler_args["namespace"] = get_current_namespace()
128128
return torchx_runner.dryrun(
129129
app=ddp(
130130
*self.script_args,

0 commit comments

Comments
 (0)