forked from awslabs/kubernetes-iteration-toolkit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload.yaml
162 lines (161 loc) · 6.24 KB
/
load.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
---
apiVersion: tekton.dev/v1beta1
kind: Task
metadata:
name: load
namespace: scalability
spec:
description: "clusterloader2 task to run various types of cl2 tests on a given cluster."
params:
- name: giturl
description: "git url to clone the package"
default: https://github.com/kubernetes/perf-tests.git
- name: cl2-branch
description: "The branch of clusterloader2 you want to use"
default: "master"
- name: nodes-per-namespace
description: "nodes per namespace to get created for load test "
default: "100"
- name: cl2-load-test-throughput
description: " throughput used for mutate operations"
default: "15"
- name: pods-per-node
description: "pod density"
default: "10"
- name: nodes
description: "number of dataplane nodes to run the load test against"
default: "1000"
- name: results-bucket
description: "Results bucket with path of s3 to upload results"
- name: region
default: "us-west-2"
description: The region where the cluster is in.
- name: cluster-name
description: The name of the EKS cluster you want to spin.
- name: amp-workspace-id
description: The AMP workspace ID where remote write needs to happen.
default: ""
results:
- name: datapoint
description: Stores the CL2 result that can be consumed by other tasks (e.g. cloudwatch)
- name: s3_result
description: Stores the S3 result path after compute
workspaces:
- name: source
mountPath: /src/k8s.io/
- name: results
- name: config
mountPath: /config/
stepTemplate:
env:
- name: KUBECONFIG
value: /config/kubeconfig
steps:
- name: git-clone
image: alpine/git
workingDir: $(workspaces.source.path)
script: |
git clone $(params.giturl)
cd $(workspaces.source.path)/perf-tests/
git fetch origin --verbose --tags
git checkout $(params.cl2-branch)
git branch
- name: prepare-loadtest
image: golang:1.22
workingDir: $(workspaces.source.path)
script: |
S3_RESULT_PATH=$(params.results-bucket)
echo $S3_RESULT_PATH > $(results.s3_result.path)
echo "S3 Path: $S3_RESULT_PATH"
cat > "$(workspaces.source.path)/overrides.yaml" <<EOL
NODES_PER_NAMESPACE: $(params.nodes-per-namespace)
CL2_LOAD_TEST_THROUGHPUT: $(params.cl2-load-test-throughput)
CL2_DELETE_TEST_THROUGHPUT: $(params.cl2-load-test-throughput)
CL2_SCHEDULER_THROUGHPUT_THRESHOLD: 70
PODS_PER_NODE: $(params.pods-per-node)
CL2_ENABLE_CLUSTER_OOMS_TRACKER: false
CL2_RATE_LIMIT_POD_CREATION: false
CL2_USE_HOST_NETWORK_PODS: false
# we are not testing PVS at this point
CL2_ENABLE_PVS: false
ENABLE_SYSTEM_POD_METRICS: false
NODE_MODE: master
CL2_DISABLE_DAEMONSETS: true
CL2_ALLOWED_SLOW_API_CALLS: 1000000
CL2_PROMETHEUS_NODE_SELECTOR: "eks.amazonaws.com/nodegroup: monitoring-$(params.cluster-name)-nodes-1"
EOL
cat $(workspaces.source.path)/overrides.yaml
cp $(workspaces.source.path)/overrides.yaml $(workspaces.results.path)/overrides.yaml
# Enable Prometheus if the remote workspace id is provided
if [ -n "$(params.amp-workspace-id)" ]; then
cat << EOF >> $(workspaces.source.path)/perf-tests/clusterloader2/pkg/prometheus/manifests/prometheus-prometheus.yaml
containers:
- name: aws-sigv4-proxy-sidecar
image: public.ecr.aws/aws-observability/aws-sigv4-proxy:1.0
args:
- --name
- aps
- --region
- $(params.region)
- --host
- aps-workspaces.$(params.region).amazonaws.com
- --port
- :8005
ports:
- name: aws-sigv4-proxy
containerPort: 8005
remoteWrite:
- url: http://localhost:8005/workspaces/$(params.amp-workspace-id)/api/v1/remote_write
queueConfig:
capacity: 2500
maxSamplesPerSend: 1000
maxShards: 200
externalLabels:
cluster_name: $(params.cluster-name)
s3_path: $S3_RESULT_PATH
EOF
cat $(workspaces.source.path)/perf-tests/clusterloader2/pkg/prometheus/manifests/prometheus-prometheus.yaml
cat << EOF >> $(workspaces.source.path)/perf-tests/clusterloader2/pkg/prometheus/manifests/0prometheus-operator-deployment.yaml
tolerations:
- key: monitoring
operator: Exists
effect: NoSchedule
EOF
cat $(workspaces.source.path)/perf-tests/clusterloader2/pkg/prometheus/manifests/0prometheus-operator-deployment.yaml
fi
# Building clusterloader2 binary
cd $(workspaces.source.path)/perf-tests/clusterloader2/
GOOS=linux CGO_ENABLED=0 go build -v -o ./clusterloader ./cmd
- name: run-loadtest
image: alpine/k8s:1.30.2
onError: continue
script: |
#!/bin/bash
if [ -n "$(params.amp-workspace-id)" ]; then
export ENABLE_PROMETHEUS_SERVER=true
export PROMETHEUS_PVC_STORAGE_CLASS=gp2
export PROMETHEUS_SCRAPE_KUBE_PROXY=false
export PROMETHEUS_KUBE_PROXY_SELECTOR_KEY=k8s-app
export PROMETHEUS_SCRAPE_APISERVER_ONLY=true
fi
cat $(workspaces.source.path)/perf-tests/clusterloader2/testing/load/config.yaml
cd $(workspaces.source.path)/perf-tests/clusterloader2/
ENABLE_EXEC_SERVICE=false ./clusterloader --kubeconfig=$KUBECONFIG --testconfig=$(workspaces.source.path)/perf-tests/clusterloader2/testing/load/config.yaml --testoverrides=$(workspaces.source.path)/overrides.yaml --nodes=$(params.nodes) --provider=eks --report-dir=$(workspaces.results.path) --alsologtostderr --v=2
exit_code=$?
if [ $exit_code -eq 0 ]; then
echo "1" | tee $(results.datapoint.path)
else
echo "0" | tee $(results.datapoint.path)
fi
exit $exit_code
timeout: 30000s
- name: upload-results
image: amazon/aws-cli
workingDir: $(workspaces.results.path)
script: |
S3_RESULT_PATH=$(cat $(results.s3_result.path))
echo "S3 Path: $S3_RESULT_PATH"
aws sts get-caller-identity
# we expect to see all files from loadtest that clusterloader2 outputs here in this dir
ls -larth
aws s3 cp . s3://$S3_RESULT_PATH/ --recursive