Skip to content

Refactor Ray and AppWrapper modules #695

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions src/codeflare_sdk.egg-info/PKG-INFO

This file was deleted.

25 changes: 0 additions & 25 deletions src/codeflare_sdk.egg-info/SOURCES.txt

This file was deleted.

Empty file.
1 change: 0 additions & 1 deletion src/codeflare_sdk.egg-info/top_level.txt

This file was deleted.

17 changes: 8 additions & 9 deletions src/codeflare_sdk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
from .cluster import (
AWManager,
from .ray import (
Cluster,
ClusterConfiguration,
RayClusterStatus,
AppWrapperStatus,
CodeFlareClusterStatus,
RayCluster,
AppWrapper,
get_cluster,
list_all_queued,
list_all_clusters,
view_clusters,
AWManager,
AppWrapperStatus,
RayJobClient,
)

from .cluster import view_clusters

from .common import (
Authentication,
KubeConfiguration,
TokenAuthentication,
KubeConfigFileAuthentication,
)

from .job import RayJobClient

from .utils import generate_cert
from .utils.demos import copy_demo_nbs
from .common.utils import generate_cert
from .common.utils.demos import copy_demo_nbs

from importlib.metadata import version, PackageNotFoundError

Expand Down
18 changes: 0 additions & 18 deletions src/codeflare_sdk/cluster/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,3 @@
from .model import (
RayClusterStatus,
AppWrapperStatus,
CodeFlareClusterStatus,
RayCluster,
AppWrapper,
)

from .cluster import (
Cluster,
ClusterConfiguration,
get_cluster,
list_all_queued,
list_all_clusters,
)

from .widgets import (
view_clusters,
)

from .awload import AWManager
18 changes: 10 additions & 8 deletions src/codeflare_sdk/cluster/widgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,18 @@
import ipywidgets as widgets
from IPython.display import display, HTML, Javascript
import pandas as pd
from .config import ClusterConfiguration
from .model import RayClusterStatus
from ..ray.cluster.config import ClusterConfiguration
from ..ray.cluster.status import RayClusterStatus
from ..common import _kube_api_error_handling
from ..common.kubernetes_cluster.auth import (
config_check,
get_api_client,
)


def cluster_up_down_buttons(cluster: "codeflare_sdk.cluster.Cluster") -> widgets.Button:
def cluster_up_down_buttons(
cluster: "codeflare_sdk.ray.cluster.cluster.Cluster",
) -> widgets.Button:
"""
The cluster_up_down_buttons function returns two button widgets for a create and delete button.
The function uses the appwrapper bool to distinguish between resource type for the tool tip.
Expand Down Expand Up @@ -115,7 +117,7 @@ def view_clusters(namespace: str = None):
)
return # Exit function if not in Jupyter Notebook

from .cluster import get_current_namespace
from ..ray.cluster.cluster import get_current_namespace

if not namespace:
namespace = get_current_namespace()
Expand Down Expand Up @@ -278,7 +280,7 @@ def _on_ray_dashboard_button_click(
"""
_on_ray_dashboard_button_click handles the event when the Open Ray Dashboard button is clicked, opening the Ray Dashboard in a new tab
"""
from codeflare_sdk.cluster import Cluster
from codeflare_sdk.ray.cluster import Cluster

cluster_name = classification_widget.value
namespace = ray_clusters_df[ray_clusters_df["Name"] == classification_widget.value][
Expand Down Expand Up @@ -309,7 +311,7 @@ def _on_list_jobs_button_click(
"""
_on_list_jobs_button_click handles the event when the View Jobs button is clicked, opening the Ray Jobs Dashboard in a new tab
"""
from codeflare_sdk.cluster import Cluster
from codeflare_sdk.ray.cluster import Cluster

cluster_name = classification_widget.value
namespace = ray_clusters_df[ray_clusters_df["Name"] == classification_widget.value][
Expand Down Expand Up @@ -342,7 +344,7 @@ def _delete_cluster(
_delete_cluster function deletes the cluster with the given name and namespace.
It optionally waits for the cluster to be deleted.
"""
from .cluster import _check_aw_exists
from ..ray.cluster.cluster import _check_aw_exists

try:
config_check()
Expand Down Expand Up @@ -400,7 +402,7 @@ def _fetch_cluster_data(namespace):
"""
_fetch_cluster_data function fetches all clusters and their spec in a given namespace and returns a DataFrame.
"""
from .cluster import list_all_clusters
from ..ray.cluster.cluster import list_all_clusters

rayclusters = list_all_clusters(namespace, False)
if not rayclusters:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
from cryptography import x509
from cryptography.x509.oid import NameOID
import datetime
from ..common.kubernetes_cluster.auth import (
from ..kubernetes_cluster.auth import (
config_check,
get_api_client,
)
from kubernetes import client
from ..common import _kube_api_error_handling
from .. import _kube_api_error_handling


def generate_ca_cert(days: int = 30):
Expand Down
16 changes: 16 additions & 0 deletions src/codeflare_sdk/ray/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from .appwrapper import AppWrapper, AppWrapperStatus, AWManager

from .client import (
RayJobClient,
)

from .cluster import (
Cluster,
ClusterConfiguration,
get_cluster,
list_all_queued,
list_all_clusters,
RayClusterStatus,
CodeFlareClusterStatus,
RayCluster,
)
6 changes: 6 additions & 0 deletions src/codeflare_sdk/ray/appwrapper/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .awload import AWManager

from .status import (
AppWrapperStatus,
AppWrapper,
)
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
import yaml

from kubernetes import client
from ..common import _kube_api_error_handling
from ..common.kubernetes_cluster.auth import (
from ...common import _kube_api_error_handling
from ...common.kubernetes_cluster.auth import (
config_check,
get_api_client,
)
Expand Down
46 changes: 46 additions & 0 deletions src/codeflare_sdk/ray/appwrapper/status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2024 IBM, Red Hat
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
The status sub-module defines Enums containing information for
AppWrapper states, as well as dataclasses to store information for AppWrappers.
"""

from dataclasses import dataclass
from enum import Enum


class AppWrapperStatus(Enum):
"""
Defines the possible reportable phases of an AppWrapper.
"""

SUSPENDED = "suspended"
RESUMING = "resuming"
RUNNING = "running"
RESETTING = "resetting"
SUSPENDING = "suspending"
SUCCEEDED = "succeeded"
FAILED = "failed"
TERMINATING = "terminating"


@dataclass
class AppWrapper:
"""
For storing information about an AppWrapper.
"""

name: str
status: AppWrapperStatus
13 changes: 13 additions & 0 deletions src/codeflare_sdk/ray/cluster/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from .status import (
RayClusterStatus,
CodeFlareClusterStatus,
RayCluster,
)

from .cluster import (
Cluster,
ClusterConfiguration,
get_cluster,
list_all_queued,
list_all_clusters,
)
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,29 @@

from ray.job_submission import JobSubmissionClient

from ..common.kubernetes_cluster.auth import (
from ...common.kubernetes_cluster.auth import (
config_check,
get_api_client,
)
from ..utils import pretty_print
from ..utils.generate_yaml import (
from . import pretty_print
from .generate_yaml import (
generate_appwrapper,
head_worker_gpu_count_from_cluster,
)
from ..common import _kube_api_error_handling
from ..utils.generate_yaml import is_openshift_cluster
from ...common import _kube_api_error_handling
from .generate_yaml import is_openshift_cluster

from .config import ClusterConfiguration
from .model import (
AppWrapper,
AppWrapperStatus,
from .status import (
CodeFlareClusterStatus,
RayCluster,
RayClusterStatus,
)
from .widgets import (
from ..appwrapper import (
AppWrapper,
AppWrapperStatus,
)
from ...cluster.widgets import (
cluster_up_down_buttons,
is_notebook,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
import os
import uuid
from kubernetes import client
from ..common import _kube_api_error_handling
from ..common.kubernetes_cluster.auth import (
from ...common import _kube_api_error_handling
from ...common.kubernetes_cluster.auth import (
get_api_client,
config_check,
)
Expand Down Expand Up @@ -80,7 +80,7 @@ def is_kind_cluster():

def update_names(
cluster_yaml: dict,
cluster: "codeflare_sdk.cluster.Cluster",
cluster: "codeflare_sdk.ray.cluster.cluster.Cluster",
):
metadata = cluster_yaml.get("metadata")
metadata["name"] = cluster.config.name
Expand Down Expand Up @@ -135,7 +135,7 @@ def update_resources(


def head_worker_gpu_count_from_cluster(
cluster: "codeflare_sdk.cluster.Cluster",
cluster: "codeflare_sdk.ray.cluster.cluster.Cluster",
) -> typing.Tuple[int, int]:
head_gpus = 0
worker_gpus = 0
Expand All @@ -155,7 +155,7 @@ def head_worker_gpu_count_from_cluster(


def head_worker_resources_from_cluster(
cluster: "codeflare_sdk.cluster.Cluster",
cluster: "codeflare_sdk.ray.cluster.cluster.Cluster",
) -> typing.Tuple[dict, dict]:
to_return = {}, {}
for k in cluster.config.head_extended_resource_requests.keys():
Expand All @@ -178,7 +178,7 @@ def head_worker_resources_from_cluster(

def update_nodes(
ray_cluster_dict: dict,
cluster: "codeflare_sdk.cluster.Cluster",
cluster: "codeflare_sdk.ray.cluster.cluster.Cluster",
):
head = ray_cluster_dict.get("spec").get("headGroupSpec")
worker = ray_cluster_dict.get("spec").get("workerGroupSpecs")[0]
Expand Down Expand Up @@ -325,7 +325,7 @@ def write_user_yaml(user_yaml, output_file_name):
print(f"Written to: {output_file_name}")


def generate_appwrapper(cluster: "codeflare_sdk.cluster.Cluster"):
def generate_appwrapper(cluster: "codeflare_sdk.ray.cluster.cluster.Cluster"):
cluster_yaml = read_template(cluster.config.template)
appwrapper_name, _ = gen_names(cluster.config.name)
update_names(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
from rich.panel import Panel
from rich import box
from typing import List
from ..cluster.model import RayCluster, AppWrapper, RayClusterStatus
from .status import RayCluster, RayClusterStatus
from ..appwrapper.status import AppWrapper


def print_no_resources_found():
Expand Down
Loading
Loading