-
Notifications
You must be signed in to change notification settings - Fork 469
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add initial perf tests for 100 RayCluster and 100 RayJob (#2102)
Signed-off-by: Andrew Sy Kim <[email protected]>
- Loading branch information
1 parent
f27e4ac
commit 33a9b24
Showing
9 changed files
with
341 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
name: kuberay | ||
namespace: | ||
number: 10 | ||
tuningSets: | ||
- name: Uniform100qps | ||
qpsLoad: | ||
qps: 100 | ||
steps: | ||
- name: Start measurements | ||
measurements: | ||
- Identifier: PodStartupLatency | ||
Method: PodStartupLatency | ||
Params: | ||
action: start | ||
labelSelector: app.kubernetes.io/created-by = kuberay-operator | ||
threshold: 5m | ||
- Identifier: WaitForControlledPodsRunning | ||
Method: WaitForControlledPodsRunning | ||
Params: | ||
action: start | ||
apiVersion: ray.io/v1 | ||
kind: RayCluster | ||
labelSelector: app.kubernetes.io/created-by = kuberay-operator | ||
operationTimeout: 120s | ||
- name: Creating Ray clusters | ||
phases: | ||
- namespaceRange: | ||
min: 1 | ||
max: 10 | ||
replicasPerNamespace: 10 | ||
tuningSet: Uniform100qps | ||
objectBundle: | ||
- basename: raycluster | ||
objectTemplatePath: raycluster.yaml | ||
templateFillMap: | ||
Replicas: 1 | ||
- name: Wait for RayClusters ready | ||
measurements: | ||
- Identifier: WaitForRayCluster | ||
Method: Exec | ||
Params: | ||
timeout: 10m | ||
command: | ||
- "bash" | ||
- "100-raycluster/wait-for-rayclusters.sh" | ||
- name: Wait for pods to be running | ||
measurements: | ||
- Identifier: WaitForControlledPodsRunning | ||
Method: WaitForControlledPodsRunning | ||
Params: | ||
action: gather | ||
operationTimeout: 10m | ||
- name: Measure pod startup latency | ||
measurements: | ||
- Identifier: PodStartupLatency | ||
Method: PodStartupLatency | ||
Params: | ||
action: gather |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
apiVersion: ray.io/v1 | ||
kind: RayCluster | ||
metadata: | ||
name: {{.Name}} | ||
spec: | ||
rayVersion: '2.9.3' | ||
headGroupSpec: | ||
serviceType: ClusterIP | ||
rayStartParams: | ||
dashboard-host: '0.0.0.0' | ||
template: | ||
spec: | ||
containers: | ||
- name: ray-head | ||
image: rayproject/ray:2.9.3 | ||
ports: | ||
- containerPort: 6379 | ||
name: gcs | ||
- containerPort: 8265 | ||
name: dashboard | ||
- containerPort: 10001 | ||
name: client | ||
lifecycle: | ||
preStop: | ||
exec: | ||
command: ["/bin/sh","-c","ray stop"] | ||
volumeMounts: | ||
- mountPath: /tmp/ray | ||
name: ray-logs | ||
resources: | ||
limits: | ||
cpu: "1" | ||
requests: | ||
cpu: "100m" | ||
volumes: | ||
- name: ray-logs | ||
emptyDir: {} | ||
workerGroupSpecs: | ||
- replicas: {{.Replicas}} | ||
minReplicas: 1 | ||
maxReplicas: 10 | ||
# logical group name, for this called small-group, also can be functional | ||
groupName: small-group | ||
rayStartParams: {} | ||
template: | ||
spec: | ||
containers: | ||
- name: ray-worker | ||
image: rayproject/ray:2.9.3 | ||
lifecycle: | ||
preStop: | ||
exec: | ||
command: ["/bin/sh","-c","ray stop"] | ||
volumeMounts: | ||
- mountPath: /tmp/ray | ||
name: ray-logs | ||
resources: | ||
limits: | ||
cpu: "1" | ||
requests: | ||
cpu: "100m" | ||
volumes: | ||
- name: ray-logs | ||
emptyDir: {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<testsuite name="ClusterLoaderV2" tests="0" failures="0" errors="0" time="58.451"> | ||
<testcase name="kuberay overall (github.com/ray-project/kuberary/benchmark/perf-tests/100-raycluster/config.yaml)" classname="ClusterLoaderV2" time="58.449897441"></testcase> | ||
<testcase name="kuberay: [step: 01] Start measurements [00] - PodStartupLatency" classname="ClusterLoaderV2" time="0.271670737"></testcase> | ||
<testcase name="kuberay: [step: 01] Start measurements [01] - WaitForControlledPodsRunning" classname="ClusterLoaderV2" time="0.673679139"></testcase> | ||
<testcase name="kuberay: [step: 02] Creating Ray clusters" classname="ClusterLoaderV2" time="1.112338422"></testcase> | ||
<testcase name="kuberay: [step: 03] Wait for pods to be running [00] - WaitForControlledPodsRunning" classname="ClusterLoaderV2" time="5.199491235"></testcase> | ||
<testcase name="kuberay: [step: 04] Measure pod startup latency [00] - PodStartupLatency" classname="ClusterLoaderV2" time="0.477699884"></testcase> | ||
</testsuite> |
17 changes: 17 additions & 0 deletions
17
benchmark/perf-tests/100-raycluster/wait-for-rayclusters.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!/bin/bash | ||
|
||
expect_succeeded=100 | ||
echo "waiting for $expect_succeeded RayClusters to be completed successfully" | ||
|
||
while true; do | ||
num_succeeded=$(kubectl get raycluster -A -o jsonpath='{range .items[*]}{.metadata.name} {.status.state}{"\n"}' | grep -c ready) | ||
echo "$num_succeeded RayClusters ready..." | ||
|
||
if [[ "$num_succeeded" == "$expect_succeeded" ]]; then | ||
break; | ||
fi | ||
|
||
sleep 5 | ||
done | ||
|
||
echo "$num_succeeded RayClusters ready!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
name: kuberay | ||
namespace: | ||
number: 10 | ||
tuningSets: | ||
- name: Uniform100qps | ||
qpsLoad: | ||
qps: 100 | ||
steps: | ||
- name: Start measurements | ||
measurements: | ||
- Identifier: PodStartupLatency | ||
Method: PodStartupLatency | ||
Params: | ||
action: start | ||
labelSelector: app.kubernetes.io/created-by = kuberay-operator | ||
threshold: 5m | ||
- Identifier: WaitForControlledPodsRunning | ||
Method: WaitForControlledPodsRunning | ||
Params: | ||
action: start | ||
apiVersion: ray.io/v1 | ||
kind: RayCluster | ||
labelSelector: app.kubernetes.io/created-by = kuberay-operator | ||
operationTimeout: 120s | ||
- Identifier: JobLifecycleLatency | ||
Method: JobLifecycleLatency | ||
Params: | ||
action: start | ||
labelSelector: app.kubernetes.io/created-by = kuberay-operator | ||
threshold: 5m | ||
- name: Creating RayJobs | ||
phases: | ||
- namespaceRange: | ||
min: 1 | ||
max: 10 | ||
replicasPerNamespace: 10 | ||
tuningSet: Uniform100qps | ||
objectBundle: | ||
- basename: rayjob | ||
objectTemplatePath: rayjob.yaml | ||
- name: Wait for RayJobs complete | ||
measurements: | ||
- Identifier: WaitForRayJob | ||
Method: Exec | ||
Params: | ||
timeout: 10m | ||
command: | ||
- "bash" | ||
- "100-rayjob/wait-for-rayjobs.sh" | ||
- name: Wait for pods to be running | ||
measurements: | ||
- Identifier: WaitForControlledPodsRunning | ||
Method: WaitForControlledPodsRunning | ||
Params: | ||
action: gather | ||
operationTimeout: 10m | ||
- name: Measure pod startup latency | ||
measurements: | ||
- Identifier: PodStartupLatency | ||
Method: PodStartupLatency | ||
Params: | ||
action: gather | ||
- name: Measure job finished | ||
measurements: | ||
- Identifier: JobLifecycleLatency | ||
Method: JobLifecycleLatency | ||
Params: | ||
action: gather |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
apiVersion: ray.io/v1 | ||
kind: RayJob | ||
metadata: | ||
name: {{.Name}} | ||
spec: | ||
entrypoint: python -c "import ray; ray.init(); print(ray.cluster_resources())" | ||
shutdownAfterJobFinishes: true | ||
rayClusterSpec: | ||
rayVersion: '2.9.3' | ||
headGroupSpec: | ||
rayStartParams: | ||
dashboard-host: '0.0.0.0' | ||
template: | ||
spec: | ||
containers: | ||
- name: ray-head | ||
image: rayproject/ray:2.9.3 | ||
ports: | ||
- containerPort: 6379 | ||
name: gcs-server | ||
- containerPort: 8265 | ||
name: dashboard | ||
- containerPort: 10001 | ||
name: client | ||
resources: | ||
limits: | ||
cpu: "1" | ||
requests: | ||
cpu: "100m" | ||
workerGroupSpecs: | ||
- replicas: 1 | ||
minReplicas: 1 | ||
maxReplicas: 5 | ||
groupName: small-group | ||
rayStartParams: {} | ||
template: | ||
spec: | ||
containers: | ||
- name: ray-worker | ||
image: rayproject/ray:2.9.3 | ||
lifecycle: | ||
preStop: | ||
exec: | ||
command: [ "/bin/sh","-c","ray stop" ] | ||
resources: | ||
limits: | ||
cpu: "1" | ||
requests: | ||
cpu: "100m" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<testsuite name="ClusterLoaderV2" tests="0" failures="0" errors="0" time="284.804"> | ||
<testcase name="kuberay overall (100-rayjob/config.yaml)" classname="ClusterLoaderV2" time="284.80261448"></testcase> | ||
<testcase name="kuberay: [step: 01] Start measurements [00] - PodStartupLatency" classname="ClusterLoaderV2" time="0.106826602"></testcase> | ||
<testcase name="kuberay: [step: 01] Start measurements [01] - WaitForControlledPodsRunning" classname="ClusterLoaderV2" time="0.207335286"></testcase> | ||
<testcase name="kuberay: [step: 01] Start measurements [02] - JobLifecycleLatency" classname="ClusterLoaderV2" time="0.106730692"></testcase> | ||
<testcase name="kuberay: [step: 02] Creating RayJobs" classname="ClusterLoaderV2" time="1.059487968"></testcase> | ||
<testcase name="kuberay: [step: 03] Wait for RayJobs complete [00] - WaitForRayJob" classname="ClusterLoaderV2" time="217.399873864"></testcase> | ||
<testcase name="kuberay: [step: 04] Wait for pods to be running [00] - WaitForControlledPodsRunning" classname="ClusterLoaderV2" time="5.011879337"></testcase> | ||
<testcase name="kuberay: [step: 05] Measure pod startup latency [00] - PodStartupLatency" classname="ClusterLoaderV2" time="0.04856601"></testcase> | ||
<testcase name="kuberay: [step: 06] Measure job finished [00] - JobLifecycleLatency" classname="ClusterLoaderV2" time="1.001760404"></testcase> | ||
</testsuite> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!/bin/bash | ||
|
||
expect_succeeded=100 | ||
echo "waiting for $expect_succeeded RayJobs to be completed successfully" | ||
|
||
while true; do | ||
num_succeeded=$(kubectl get rayjob -A -o jsonpath='{range .items[*]}{.metadata.name} {.status.jobStatus}{"\n"}' | grep -c SUCCEEDED) | ||
echo "$num_succeeded RayJobs completed..." | ||
|
||
if [[ "$num_succeeded" == "$expect_succeeded" ]]; then | ||
break; | ||
fi | ||
|
||
sleep 5 | ||
done | ||
|
||
echo "$num_succeeded RayJobs completed!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# KubeRay Performance Tests | ||
|
||
This directory contains a collection of large scale KubeRay tests using [clusterloader2](https://github.com/kubernetes/perf-tests/tree/master/clusterloader2). | ||
clusterloader2 is a Kubernetes load testing tool by [SIG Scalability](https://github.com/kubernetes/community/blob/master/sig-scalability) used for Kubernetes scalability and performance testing. | ||
|
||
## Running clusterloader2 tests | ||
|
||
First, install the perf-tests repository and compile the clusterloader2 binary | ||
``` | ||
git clone [email protected]:kubernetes/perf-tests.git | ||
cd perf-tests/clusterloader2 | ||
go build -o clusterloader2 ./cmd | ||
``` | ||
|
||
Run the following command to run clusterloader2 against one of the test folders. In this example we'll run the test configured in the [100-raycluster](./100-raycluster/) folder. | ||
``` | ||
clusterloader2 --provider=<provider-name> --kubeconfig=<path to kubeconfig> --testconfig=100-raycluster/config.yaml | ||
``` | ||
|
||
## Tests & Results | ||
|
||
Each directory contains a test scenario and it's clusterloader2 configuraiton. Within the directories contains a `results` subdirectory containing junit.xml files generated by clusterloader2 | ||
for previously executed runs of the tests. | ||
|
||
The current lists of tests are: | ||
* [100 RayCluster test](./100-raycluster/) | ||
|
||
|
||
## Run a performance test with Kind | ||
|
||
You can test clusterloader2 configs using Kind. | ||
|
||
First create a kind cluster: | ||
``` | ||
kind create cluster --image=kindest/node:v1.27.3 | ||
``` | ||
|
||
Install kuberay; | ||
``` | ||
helm install kuberay-operator kuberay/kuberay-operator --version 1.1.0 | ||
``` | ||
|
||
Run a clusterloader2 test: | ||
``` | ||
clusterloader2 --provider kind --kubeconfig ~/.kube/config --testconfig ./100-raycluster/config.yaml | ||
``` |