Skip to content

Commit 7d77038

Browse files
committed
use custom image
1 parent 572d215 commit 7d77038

File tree

3 files changed

+56
-12
lines changed

3 files changed

+56
-12
lines changed

.github/workflows/e2e_tests.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ concurrency:
1717
cancel-in-progress: true
1818

1919
env:
20-
CODEFLARE_OPERATOR_IMG: "quay.io/project-codeflare/codeflare-operator:dev"
20+
CODEFLARE_OPERATOR_IMG: "quay.io/kryanbeane/codeflare-operator:dev"
2121

2222
jobs:
2323
kubernetes:
@@ -108,6 +108,12 @@ jobs:
108108
kubectl create clusterrolebinding sdk-user-pod-creator --clusterrole=pod-creator --user=sdk-user
109109
kubectl config use-context sdk-user
110110
111+
- name: Setup tmate session for debugging
112+
if: always()
113+
uses: mxschmitt/action-tmate@v3
114+
with:
115+
detached: true
116+
111117
- name: Run e2e tests
112118
run: |
113119
export CODEFLARE_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }}

src/codeflare_sdk/common/utils/generate_cert.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -230,22 +230,41 @@ def generate_tls_cert(cluster_name, namespace, days=30):
230230

231231
def export_env(cluster_name, namespace):
232232
"""
233-
Sets environment variables to configure TLS for a Ray cluster.
233+
Sets environment variables to configure TLS for a Ray client connection when mTLS is enabled.
234+
235+
The `tls.crt` and `tls.key` files generated by `generate_tls_cert` are client-side credentials,
236+
signed by the cluster's CA. `ca.crt` is the cluster's CA certificate.
237+
238+
This function sets:
239+
- `RAY_USE_TLS="1"` to enable TLS.
240+
- `RAY_TLS_CA_CERT` to the path of `ca.crt` for server certificate verification.
241+
- `RAY_TLS_CLIENT_CERT` and `RAY_TLS_CLIENT_KEY` to the paths of the client's `tls.crt`
242+
and `tls.key` respectively, for client authentication by the server.
243+
- `RAY_TLS_SERVER_CERT` and `RAY_TLS_SERVER_KEY` are also set to the client's `tls.crt`
244+
and `tls.key`. This is maintained based on previous observations that these might be
245+
utilized by certain Ray client setups, ensuring broad compatibility.
234246
235247
Args:
236248
cluster_name (str):
237249
The name of the Ray cluster.
238250
namespace (str):
239251
The Kubernetes namespace where the Ray cluster is located.
240-
241-
Environment Variables Set:
242-
- RAY_USE_TLS: Enables TLS for Ray.
243-
- RAY_TLS_SERVER_CERT: Path to the TLS server certificate.
244-
- RAY_TLS_SERVER_KEY: Path to the TLS server private key.
245-
- RAY_TLS_CA_CERT: Path to the CA certificate.
246252
"""
247253
tls_dir = os.path.join(os.getcwd(), f"tls-{cluster_name}-{namespace}")
254+
client_cert_path = os.path.join(tls_dir, "tls.crt")
255+
client_key_path = os.path.join(tls_dir, "tls.key")
256+
ca_cert_path = os.path.join(tls_dir, "ca.crt")
257+
248258
os.environ["RAY_USE_TLS"] = "1"
249-
os.environ["RAY_TLS_SERVER_CERT"] = os.path.join(tls_dir, "tls.crt")
250-
os.environ["RAY_TLS_SERVER_KEY"] = os.path.join(tls_dir, "tls.key")
251-
os.environ["RAY_TLS_CA_CERT"] = os.path.join(tls_dir, "ca.crt")
259+
260+
# CA certificate for verifying the server
261+
os.environ["RAY_TLS_CA_CERT"] = ca_cert_path
262+
263+
# Standard mTLS client variables: client's own certificate and key
264+
os.environ["RAY_TLS_CLIENT_CERT"] = client_cert_path
265+
os.environ["RAY_TLS_CLIENT_KEY"] = client_key_path
266+
267+
# Also set RAY_TLS_SERVER_CERT/KEY to client cert/key, maintaining previous setup style
268+
# while ensuring client certs are explicitly available via RAY_TLS_CLIENT_*
269+
os.environ["RAY_TLS_SERVER_CERT"] = client_cert_path
270+
os.environ["RAY_TLS_SERVER_KEY"] = client_key_path

tests/e2e/local_interactive_sdk_kind_test.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pytest
99
import ray
1010
import math
11-
11+
import time
1212
from support import *
1313

1414

@@ -66,6 +66,25 @@ def run_local_interactives(
6666
print(cluster.local_client_url())
6767

6868
ray.shutdown()
69+
70+
print("RAY DEBUGGING")
71+
print("\n========== PYTHON DEBUG INFO ==========")
72+
print(f"Ray cluster client URL: {cluster.local_client_url()}")
73+
print(f"Cluster name: {cluster_name}")
74+
print(f"Current working directory: {os.getcwd()}")
75+
print(f"Cluster: {cluster}")
76+
print(f"Cluster namespace: {self.namespace}")
77+
print(f"Cluster name: {cluster_name}")
78+
print(f"Cluster config: {cluster.config}")
79+
print(f"Cluster config namespace: {cluster.config.namespace}")
80+
print(f"Cluster config name: {cluster.config.name}")
81+
print(f"Cluster config num_workers: {cluster.config.num_workers}")
82+
print(f"Cluster config num_workers: {cluster.config.num_workers}")
83+
print("END OF RAY DEBUGGING")
84+
85+
print("Sleeping for 15 minutes before ray.init for debugging...")
86+
time.sleep(900)
87+
6988
ray.init(address=cluster.local_client_url(), logging_level="DEBUG")
7089

7190
@ray.remote(num_gpus=number_of_gpus / 2)

0 commit comments

Comments
 (0)