Skip to content

Remove oc client and add helper functions #187

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 13, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 31 additions & 57 deletions demo-notebooks/interactive/local_interactive.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,12 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "4364ac2e-dd10-4d30-ba66-12708daefb3f",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Written to: hfgputest-1.yaml\n"
]
}
],
"outputs": [],
"source": [
"# Create our cluster and submit appwrapper\n",
"namespace = \"default\"\n",
Expand Down Expand Up @@ -89,7 +81,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "12eef53c",
"metadata": {},
Expand All @@ -99,38 +90,21 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"id": "cf1b749e-2335-42c2-b673-26768ec9895d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"rayclient-hfgputest-1-default.apps.tedbig412.cp.fyre.ibm.com\n"
]
}
],
"outputs": [],
"source": [
"import openshift as oc\n",
"from codeflare_sdk.utils import generate_cert\n",
"\n",
"if local_interactive:\n",
" generate_cert.generate_tls_cert(cluster_name, namespace)\n",
" generate_cert.export_env(cluster_name, namespace)\n",
"\n",
"with oc.project(namespace):\n",
" routes=oc.selector(\"route\").objects()\n",
" rayclient_url=\"\"\n",
" for r in routes:\n",
" if \"rayclient\" in r.name():\n",
" rayclient_url=r.model.spec.host\n",
"print(rayclient_url)"
" generate_cert.export_env(cluster_name, namespace)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 6,
"id": "9483bb98-33b3-4beb-9b15-163d7e76c1d7",
"metadata": {
"scrolled": true,
Expand All @@ -141,15 +115,15 @@
"name": "stderr",
"output_type": "stream",
"text": [
"2023-05-31 14:12:37,816\tINFO client_builder.py:251 -- Passing the following kwargs to ray.init() on the server: logging_level\n",
"2023-05-31 14:12:37,820\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.IDLE\n",
"2023-05-31 14:12:38,034\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.CONNECTING\n",
"2023-05-31 14:12:38,246\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.READY\n",
"2023-05-31 14:12:38,290\tDEBUG worker.py:807 -- Pinging server.\n",
"2023-05-31 14:12:40,521\tDEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000001000000\n",
"2023-05-31 14:12:40,523\tDEBUG worker.py:564 -- Scheduling task get_dashboard_url 0 b'\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x00\\x00\\x00\\x01\\x00\\x00\\x00'\n",
"2023-05-31 14:12:40,535\tDEBUG worker.py:640 -- Retaining c8ef45ccd0112571ffffffffffffffffffffffff0100000001000000\n",
"2023-05-31 14:12:41,379\tDEBUG worker.py:636 -- Releasing c8ef45ccd0112571ffffffffffffffffffffffff0100000001000000\n"
"2023-06-27 19:14:16,088\tINFO client_builder.py:251 -- Passing the following kwargs to ray.init() on the server: logging_level\n",
"2023-06-27 19:14:16,100\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.IDLE\n",
"2023-06-27 19:14:16,308\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.CONNECTING\n",
"2023-06-27 19:14:16,434\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.READY\n",
"2023-06-27 19:14:16,436\tDEBUG worker.py:807 -- Pinging server.\n",
"2023-06-27 19:14:18,634\tDEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000001000000\n",
"2023-06-27 19:14:18,635\tDEBUG worker.py:564 -- Scheduling task get_dashboard_url 0 b'\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x00\\x00\\x00\\x01\\x00\\x00\\x00'\n",
"2023-06-27 19:14:18,645\tDEBUG worker.py:640 -- Retaining c8ef45ccd0112571ffffffffffffffffffffffff0100000001000000\n",
"2023-06-27 19:14:19,454\tDEBUG worker.py:636 -- Releasing c8ef45ccd0112571ffffffffffffffffffffffff0100000001000000\n"
]
},
{
Expand Down Expand Up @@ -190,18 +164,18 @@
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left\"><b>Dashboard:</b></td>\n",
" <td style=\"text-align: left\"><b><a href=\"http://10.254.12.141:8265\" target=\"_blank\">http://10.254.12.141:8265</a></b></td>\n",
" <td style=\"text-align: left\"><b><a href=\"http://10.254.20.41:8265\" target=\"_blank\">http://10.254.20.41:8265</a></b></td>\n",
"</tr>\n",
"\n",
" </table>\n",
" </div>\n",
"</div>\n"
],
"text/plain": [
"ClientContext(dashboard_url='10.254.12.141:8265', python_version='3.8.13', ray_version='2.1.0', ray_commit='23f34d948dae8de9b168667ab27e6cf940b3ae85', protocol_version='2022-10-05', _num_clients=1, _context_to_restore=<ray.util.client._ClientContext object at 0x10e5d2bb0>)"
"ClientContext(dashboard_url='10.254.20.41:8265', python_version='3.8.13', ray_version='2.1.0', ray_commit='23f34d948dae8de9b168667ab27e6cf940b3ae85', protocol_version='2022-10-05', _num_clients=1, _context_to_restore=<ray.util.client._ClientContext object at 0x108ca2730>)"
]
},
"execution_count": 12,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -210,12 +184,12 @@
"import ray\n",
"\n",
"ray.shutdown()\n",
"ray.init(address=f\"ray://{rayclient_url}\", logging_level=\"DEBUG\")"
"ray.init(address=cluster.local_client_url(), logging_level=\"DEBUG\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 7,
"id": "3436eb4a-217c-4109-a3c3-309fda7e2442",
"metadata": {},
"outputs": [],
Expand All @@ -239,7 +213,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 8,
"id": "5cca1874-2be3-4631-ae48-9adfa45e3af3",
"metadata": {
"scrolled": true,
Expand All @@ -250,8 +224,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"2023-05-31 14:13:29,868\tDEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000002000000\n",
"2023-05-31 14:13:29,870\tDEBUG worker.py:564 -- Scheduling task heavy_calculation 0 b'\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x00\\x00\\x00\\x02\\x00\\x00\\x00'\n"
"2023-06-27 19:14:28,222\tDEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000002000000\n",
"2023-06-27 19:14:28,222\tDEBUG worker.py:564 -- Scheduling task heavy_calculation 0 b'\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x00\\x00\\x00\\x02\\x00\\x00\\x00'\n"
]
}
],
Expand All @@ -261,16 +235,16 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 9,
"id": "01172c29-e8bf-41ef-8db5-eccb07906111",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-05-31 14:13:32,643\tDEBUG worker.py:640 -- Retaining 16310a0f0a45af5cffffffffffffffffffffffff0100000001000000\n",
"2023-05-31 14:13:34,677\tDEBUG worker.py:439 -- Internal retry for get [ClientObjectRef(16310a0f0a45af5cffffffffffffffffffffffff0100000001000000)]\n"
"2023-06-27 19:14:29,202\tDEBUG worker.py:640 -- Retaining 16310a0f0a45af5cffffffffffffffffffffffff0100000001000000\n",
"2023-06-27 19:14:31,224\tDEBUG worker.py:439 -- Internal retry for get [ClientObjectRef(16310a0f0a45af5cffffffffffffffffffffffff0100000001000000)]\n"
]
},
{
Expand All @@ -279,7 +253,7 @@
"1789.4644387076714"
]
},
"execution_count": 15,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -290,18 +264,18 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 10,
"id": "9e79b547-a457-4232-b77d-19147067b972",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-05-31 14:13:37,659\tDEBUG dataclient.py:287 -- Got unawaited response connection_cleanup {\n",
"2023-06-27 19:14:33,161\tDEBUG dataclient.py:287 -- Got unawaited response connection_cleanup {\n",
"}\n",
"\n",
"2023-05-31 14:13:38,681\tDEBUG dataclient.py:278 -- Shutting down data channel.\n"
"2023-06-27 19:14:34,460\tDEBUG dataclient.py:278 -- Shutting down data channel.\n"
]
}
],
Expand All @@ -312,7 +286,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 11,
"id": "2c198f1f-68bf-43ff-a148-02b5cb000ff2",
"metadata": {},
"outputs": [],
Expand Down
15 changes: 14 additions & 1 deletion src/codeflare_sdk/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
RayCluster,
RayClusterStatus,
)

from kubernetes import client, config

import yaml
Expand Down Expand Up @@ -344,6 +343,13 @@ def from_k8_cluster_object(rc):
)
return Cluster(cluster_config)

def local_client_url(self):
if self.config.local_interactive == True:
ingress_domain = _get_ingress_domain()
return f"ray://rayclient-{self.config.name}-{self.config.namespace}.{ingress_domain}"
else:
return "None"


def list_all_clusters(namespace: str, print_to_console: bool = True):
"""
Expand Down Expand Up @@ -402,6 +408,13 @@ def get_cluster(cluster_name: str, namespace: str = "default"):


# private methods
def _get_ingress_domain():
config.load_kube_config()
api_client = client.CustomObjectsApi()
ingress = api_client.get_cluster_custom_object(
"config.openshift.io", "v1", "ingresses", "cluster"
)
return ingress["spec"]["domain"]


def _kube_api_error_handling(e: Exception): # pragma: no cover
Expand Down
13 changes: 7 additions & 6 deletions src/codeflare_sdk/utils/generate_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import sys
import argparse
import uuid
import openshift as oc
from kubernetes import client, config


def read_template(template):
Expand Down Expand Up @@ -239,12 +239,13 @@ def enable_local_interactive(resources, cluster_name, namespace):
][0].get("command")[2]

command = command.replace("deployment-name", cluster_name)

server_name = (
oc.whoami("--show-server").split(":")[1].split("//")[1].replace("api", "apps")
config.load_kube_config()
api_client = client.CustomObjectsApi()
ingress = api_client.get_cluster_custom_object(
"config.openshift.io", "v1", "ingresses", "cluster"
)

command = command.replace("server-name", server_name)
domain = ingress["spec"]["domain"]
command = command.replace("server-name", domain)

item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"][
"initContainers"
Expand Down
19 changes: 19 additions & 0 deletions tests/unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,25 @@ def test_cluster_uris(mocker):
)


def test_local_client_url(mocker):
mocker.patch(
"kubernetes.client.CustomObjectsApi.get_cluster_custom_object",
return_value={"spec": {"domain": ""}},
)
mocker.patch(
"codeflare_sdk.cluster.cluster._get_ingress_domain",
return_value="apps.cluster.awsroute.org",
)
default_config = ClusterConfiguration(
name="unit-test-cluster", namespace="ns", local_interactive=True
)
cluster = Cluster(default_config)
assert (
cluster.local_client_url()
== "ray://rayclient-unit-test-cluster-ns.apps.cluster.awsroute.org"
)


def ray_addr(self, *args):
return self._address

Expand Down