Skip to content

Commit 6a01352

Browse files
committed
Adds RayCluster.apply()
- Adds RayCluster.apply() implementation - Adds e2e tests for apply - Adds unit tests for apply - Exclude unit tests code from coverage - Add coverage to cluster.py - Adding coverage for the case of an openshift cluster
1 parent be9763a commit 6a01352

File tree

11 files changed

+568
-37
lines changed

11 files changed

+568
-37
lines changed

.coveragerc

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[run]
2+
omit =
3+
src/**/test_*.py,,src/codeflare_sdk/common/utils/unit_test_support.py
4+
5+
[report]
6+
exclude_lines =
7+
pragma: no cover

.github/workflows/coverage-badge.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
poetry install --with test
2727
- name: Generate coverage report
2828
run: |
29-
coverage run -m pytest
29+
coverage run -m pytest --omit="src/**/test_*.py,src/codeflare_sdk/common/utils/unit_test_support.py"
3030
3131
- name: Coverage Badge
3232
uses: tj-actions/coverage-badge-py@v2

CONTRIBUTING.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ pytest -v src/codeflare_sdk
7676

7777
### Local e2e Testing
7878

79-
- Please follow the [e2e documentation](https://github.com/project-codeflare/codeflare-sdk/blob/main/docs/e2e.md)
79+
- Please follow the [e2e documentation](https://github.com/project-codeflare/codeflare-sdk/blob/main/docs/sphinx/user-docs/e2e.rst)
8080

8181
#### Code Coverage
8282

src/codeflare_sdk/common/kueue/test_kueue.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
from ..utils.unit_test_support import get_local_queue, createClusterConfig
14+
from ..utils.unit_test_support import get_local_queue, create_cluster_config
1515
from unittest.mock import patch
1616
from codeflare_sdk.ray.cluster.cluster import Cluster, ClusterConfiguration
1717
import yaml
@@ -46,7 +46,7 @@ def test_cluster_creation_no_aw_local_queue(mocker):
4646
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
4747
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
4848
)
49-
config = createClusterConfig()
49+
config = create_cluster_config()
5050
config.name = "unit-test-cluster-kueue"
5151
config.write_to_file = True
5252
config.local_queue = "local-queue-default"
@@ -59,7 +59,7 @@ def test_cluster_creation_no_aw_local_queue(mocker):
5959
)
6060

6161
# With resources loaded in memory, no Local Queue specified.
62-
config = createClusterConfig()
62+
config = create_cluster_config()
6363
config.name = "unit-test-cluster-kueue"
6464
config.write_to_file = False
6565
cluster = Cluster(config)
@@ -79,7 +79,7 @@ def test_aw_creation_local_queue(mocker):
7979
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
8080
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
8181
)
82-
config = createClusterConfig()
82+
config = create_cluster_config()
8383
config.name = "unit-test-aw-kueue"
8484
config.appwrapper = True
8585
config.write_to_file = True
@@ -93,7 +93,7 @@ def test_aw_creation_local_queue(mocker):
9393
)
9494

9595
# With resources loaded in memory, no Local Queue specified.
96-
config = createClusterConfig()
96+
config = create_cluster_config()
9797
config.name = "unit-test-aw-kueue"
9898
config.appwrapper = True
9999
config.write_to_file = False
@@ -114,7 +114,7 @@ def test_get_local_queue_exists_fail(mocker):
114114
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
115115
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
116116
)
117-
config = createClusterConfig()
117+
config = create_cluster_config()
118118
config.name = "unit-test-aw-kueue"
119119
config.appwrapper = True
120120
config.write_to_file = True

src/codeflare_sdk/common/utils/unit_test_support.py

+55-11
Original file line numberDiff line numberDiff line change
@@ -26,32 +26,34 @@
2626
aw_dir = os.path.expanduser("~/.codeflare/resources/")
2727

2828

29-
def createClusterConfig():
29+
def create_cluster_config(num_workers=2, write_to_file=False):
3030
config = ClusterConfiguration(
3131
name="unit-test-cluster",
3232
namespace="ns",
33-
num_workers=2,
33+
num_workers=num_workers,
3434
worker_cpu_requests=3,
3535
worker_cpu_limits=4,
3636
worker_memory_requests=5,
3737
worker_memory_limits=6,
3838
appwrapper=True,
39-
write_to_file=False,
39+
write_to_file=write_to_file,
4040
)
4141
return config
4242

4343

44-
def createClusterWithConfig(mocker):
45-
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
46-
mocker.patch(
47-
"kubernetes.client.CustomObjectsApi.get_cluster_custom_object",
48-
return_value={"spec": {"domain": "apps.cluster.awsroute.org"}},
49-
)
50-
cluster = Cluster(createClusterConfig())
44+
def create_cluster(mocker, num_workers=2, write_to_file=False):
45+
cluster = Cluster(create_cluster_config(num_workers, write_to_file))
5146
return cluster
5247

5348

54-
def createClusterWrongType():
49+
def patch_cluster_with_dynamic_client(mocker, cluster, dynamic_client=None):
50+
mocker.patch.object(cluster, "get_dynamic_client", return_value=dynamic_client)
51+
mocker.patch.object(cluster, "down", return_value=None)
52+
mocker.patch.object(cluster, "config_check", return_value=None)
53+
# mocker.patch.object(cluster, "_throw_for_no_raycluster", return_value=None)
54+
55+
56+
def create_cluster_wrong_type():
5557
config = ClusterConfiguration(
5658
name="unit-test-cluster",
5759
namespace="ns",
@@ -383,6 +385,48 @@ def mocked_ingress(port, cluster_name="unit-test-cluster", annotations: dict = N
383385
return mock_ingress
384386

385387

388+
# Global dictionary to maintain state in the mock
389+
cluster_state = {}
390+
391+
392+
# The mock side_effect function for server_side_apply
393+
def mock_server_side_apply(resource, body=None, name=None, namespace=None, **kwargs):
394+
# Simulate the behavior of server_side_apply:
395+
# Update a mock state that represents the cluster's current configuration.
396+
# Stores the state in a global dictionary for simplicity.
397+
398+
global cluster_state
399+
400+
if not resource or not body or not name or not namespace:
401+
raise ValueError("Missing required parameters for server_side_apply")
402+
403+
# Extract worker count from the body if it exists
404+
try:
405+
worker_count = (
406+
body["spec"]["workerGroupSpecs"][0]["replicas"]
407+
if "spec" in body and "workerGroupSpecs" in body["spec"]
408+
else None
409+
)
410+
except KeyError:
411+
worker_count = None
412+
413+
# Apply changes to the cluster_state mock
414+
cluster_state[name] = {
415+
"namespace": namespace,
416+
"worker_count": worker_count,
417+
"body": body,
418+
}
419+
420+
# Return a response that mimics the behavior of a successful apply
421+
return {
422+
"status": "success",
423+
"applied": True,
424+
"name": name,
425+
"namespace": namespace,
426+
"worker_count": worker_count,
427+
}
428+
429+
386430
@patch.dict("os.environ", {"NB_PREFIX": "test-prefix"})
387431
def create_cluster_all_config_params(mocker, cluster_name, is_appwrapper) -> Cluster:
388432
mocker.patch(

src/codeflare_sdk/common/widgets/test_widgets.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import codeflare_sdk.common.widgets.widgets as cf_widgets
1616
import pandas as pd
1717
from unittest.mock import MagicMock, patch
18-
from ..utils.unit_test_support import get_local_queue, createClusterConfig
18+
from ..utils.unit_test_support import get_local_queue, create_cluster_config
1919
from codeflare_sdk.ray.cluster.cluster import Cluster
2020
from codeflare_sdk.ray.cluster.status import (
2121
RayCluster,
@@ -38,7 +38,7 @@ def test_cluster_up_down_buttons(mocker):
3838
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
3939
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
4040
)
41-
cluster = Cluster(createClusterConfig())
41+
cluster = Cluster(create_cluster_config())
4242

4343
with patch("ipywidgets.Button") as MockButton, patch(
4444
"ipywidgets.Checkbox"

src/codeflare_sdk/ray/cluster/cluster.py

+87-8
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@
5252
import requests
5353

5454
from kubernetes import config
55+
from kubernetes.dynamic import DynamicClient
56+
from kubernetes import client as k8s_client
57+
from kubernetes.client.rest import ApiException
58+
5559
from kubernetes.client.rest import ApiException
5660
import warnings
5761

@@ -84,6 +88,14 @@ def __init__(self, config: ClusterConfiguration):
8488
if is_notebook():
8589
cluster_up_down_buttons(self)
8690

91+
def get_dynamic_client(self): # pragma: no cover
92+
"""Return a dynamic client, optionally mocked in tests."""
93+
return DynamicClient(get_api_client())
94+
95+
def config_check(self):
96+
"""Return a dynamic client, optionally mocked in tests."""
97+
return config_check()
98+
8799
@property
88100
def _client_headers(self):
89101
k8_client = get_api_client()
@@ -95,9 +107,7 @@ def _client_headers(self):
95107

96108
@property
97109
def _client_verify_tls(self):
98-
if not _is_openshift_cluster or not self.config.verify_tls:
99-
return False
100-
return True
110+
return _is_openshift_cluster and self.config.verify_tls
101111

102112
@property
103113
def job_client(self):
@@ -121,7 +131,6 @@ def create_resource(self):
121131
Called upon cluster object creation, creates an AppWrapper yaml based on
122132
the specifications of the ClusterConfiguration.
123133
"""
124-
125134
if self.config.namespace is None:
126135
self.config.namespace = get_current_namespace()
127136
if self.config.namespace is None:
@@ -130,7 +139,6 @@ def create_resource(self):
130139
raise TypeError(
131140
f"Namespace {self.config.namespace} is of type {type(self.config.namespace)}. Check your Kubernetes Authentication."
132141
)
133-
134142
return build_ray_cluster(self)
135143

136144
# creates a new cluster with the provided or default spec
@@ -139,10 +147,11 @@ def up(self):
139147
Applies the Cluster yaml, pushing the resource request onto
140148
the Kueue localqueue.
141149
"""
142-
150+
print(
151+
"WARNING: The up() function is planned for deprecation in favor of apply()."
152+
)
143153
# check if RayCluster CustomResourceDefinition exists if not throw RuntimeError
144154
self._throw_for_no_raycluster()
145-
146155
namespace = self.config.namespace
147156

148157
try:
@@ -176,6 +185,52 @@ def up(self):
176185
except Exception as e: # pragma: no cover
177186
return _kube_api_error_handling(e)
178187

188+
# Applies a new cluster with the provided or default spec
189+
def apply(self, force=False):
190+
"""
191+
Applies the Cluster yaml using server-side apply.
192+
If 'force' is set to True, conflicts will be forced.
193+
"""
194+
# check if RayCluster CustomResourceDefinition exists if not throw RuntimeError
195+
self._throw_for_no_raycluster()
196+
namespace = self.config.namespace
197+
198+
try:
199+
self.config_check()
200+
api_instance = client.CustomObjectsApi(get_api_client())
201+
crds = self.get_dynamic_client().resources
202+
api_instance = crds.get(
203+
api_version="workload.codeflare.dev/v1beta2", kind="AppWrapper"
204+
)
205+
if self.config.appwrapper:
206+
if self.config.write_to_file:
207+
with open(self.resource_yaml) as f:
208+
aw = yaml.load(f, Loader=yaml.FullLoader)
209+
api_instance.server_side_apply(
210+
group="workload.codeflare.dev",
211+
version="v1beta2",
212+
namespace=namespace,
213+
plural="appwrappers",
214+
body=aw,
215+
)
216+
else:
217+
api_instance.server_side_apply(
218+
group="workload.codeflare.dev",
219+
version="v1beta2",
220+
namespace=namespace,
221+
plural="appwrappers",
222+
body=self.resource_yaml,
223+
)
224+
print(f"AppWrapper: '{self.config.name}' has successfully been created")
225+
else:
226+
api_instance = crds.get(api_version="ray.io/v1", kind="RayCluster")
227+
self._component_resources_apply(namespace, api_instance)
228+
print(
229+
f"Ray Cluster: '{self.config.name}' has successfully been applied"
230+
)
231+
except Exception as e: # pragma: no cover
232+
return _kube_api_error_handling(e)
233+
179234
def _throw_for_no_raycluster(self):
180235
api_instance = client.CustomObjectsApi(get_api_client())
181236
try:
@@ -204,7 +259,7 @@ def down(self):
204259
resource_name = self.config.name
205260
self._throw_for_no_raycluster()
206261
try:
207-
config_check()
262+
self.config_check()
208263
api_instance = client.CustomObjectsApi(get_api_client())
209264
if self.config.appwrapper:
210265
api_instance.delete_namespaced_custom_object(
@@ -507,6 +562,16 @@ def _component_resources_up(
507562
else:
508563
_create_resources(self.resource_yaml, namespace, api_instance)
509564

565+
def _component_resources_apply(
566+
self, namespace: str, api_instance: client.CustomObjectsApi
567+
):
568+
if self.config.write_to_file:
569+
with open(self.resource_yaml) as f:
570+
ray_cluster = yaml.safe_load(f)
571+
_apply_resources(ray_cluster, namespace, api_instance)
572+
else:
573+
_apply_resources(self.resource_yaml, namespace, api_instance)
574+
510575
def _component_resources_down(
511576
self, namespace: str, api_instance: client.CustomObjectsApi
512577
):
@@ -744,6 +809,20 @@ def _create_resources(yamls, namespace: str, api_instance: client.CustomObjectsA
744809
)
745810

746811

812+
def _apply_resources(
813+
yamls, namespace: str, api_instance: client.CustomObjectsApi, force=False
814+
):
815+
api_instance.server_side_apply(
816+
field_manager="cluster-manager",
817+
group="ray.io",
818+
version="v1",
819+
namespace=namespace,
820+
plural="rayclusters",
821+
body=yamls,
822+
force_conflicts=force, # Allow forcing conflicts if needed
823+
)
824+
825+
747826
def _check_aw_exists(name: str, namespace: str) -> bool:
748827
try:
749828
config_check()

0 commit comments

Comments
 (0)