forked from cilium/cilium
-
Notifications
You must be signed in to change notification settings - Fork 0
227 lines (197 loc) · 8.29 KB
/
scale-test-node-throughput-gce.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
name: Node Throughput Test
on:
schedule:
- cron: '39 0 * * *'
# For testing uncomment following lines:
# push:
# branches:
# - your_branch_name
permissions:
# To be able to access the repository with actions/checkout
contents: read
# To be able to request the JWT from GitHub's OIDC provider
id-token: write
concurrency:
# Structure:
# - Workflow name
# - Event type
# - A unique identifier depending on event type:
# - schedule: SHA
# - workflow_dispatch: PR number
#
# This structure ensures a unique concurrency group name is generated for each
# type of testing, such that re-runs will cancel the previous run.
group: |
${{ github.workflow }}
${{ github.event_name }}
${{
(github.event_name == 'schedule' && github.sha) ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
}}
cancel-in-progress: true
env:
# renovate: datasource=golang-version depName=go
go_version: 1.22.5
# Adding k8s.local to the end makes kops happy-
# has stricter DNS naming requirements.
test_name: node-throughput
cluster_name: ${{ github.run_id }}-${{ github.run_attempt }}
GCP_PERF_RESULTS_BUCKET: gs://cilium-scale-results
# renovate: datasource=docker depName=google/cloud-sdk
gcloud_version: 483.0.0
jobs:
install-and-scaletest:
runs-on: ubuntu-latest
name: Install and Scale Test
timeout-minutes: 120
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Set Environment Variables
uses: ./.github/actions/set-env-variables
- name: Get Cilium's default values
id: default_vars
uses: ./.github/actions/helm-default
with:
image-tag: ${{ github.sha }}
- name: Set up job variables
id: vars
run: |
SHA="${{ github.sha }}"
# Setup Cilium install options
CILIUM_INSTALL_DEFAULTS="${{ steps.default_vars.outputs.cilium_install_defaults }} \
--helm-set=cluster.name=${{ env.cluster_name }} \
--wait=false"
# Adding k8s.local to the end makes kops happy
# has stricter DNS naming requirements.
CLUSTER_NAME="${{ env.test_name }}-${{ env.cluster_name }}.k8s.local"
echo SHA=${SHA} >> $GITHUB_OUTPUT
echo cilium_install_defaults=${CILIUM_INSTALL_DEFAULTS} >> $GITHUB_OUTPUT
echo CLUSTER_NAME=${CLUSTER_NAME} >> $GITHUB_OUTPUT
- name: Wait for images to be available
timeout-minutes: 30
shell: bash
run: |
for image in cilium-ci operator-generic-ci hubble-relay-ci ; do
until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.SHA }} &> /dev/null; do sleep 45s; done
done
- name: Install Go
uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
with:
go-version: ${{ env.go_version }}
- name: Install Kops
uses: cilium/scale-tests-action/install-kops@c3a2f10946bf94430b40fd9e3ad40834add6d555 # main
- name: Setup gcloud credentials
uses: google-github-actions/auth@71fee32a0bb7e97b4d33d548e7d957010649d8fa # v2.1.3
with:
workload_identity_provider: ${{ secrets.GCP_PERF_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_PERF_SA }}
create_credentials_file: true
export_environment_variables: true
- name: Setup gcloud CLI
uses: google-github-actions/setup-gcloud@98ddc00a17442e89a24bbf282954a3b65ce6d200 # v2.1.0
with:
project_id: ${{ secrets.GCP_PERF_PROJECT_ID }}
version: ${{ env.gcloud_version }}
- name: Clone ClusterLoader2
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
repository: kubernetes/perf-tests
# Avoid using renovate to update this dependency because: (1)
# perf-tests does not tag or release, so renovate will pull
# all updates to the default branch and (2) continually
# updating CL2 may impact the stability of the scale test
# results.
ref: 920c39ef245a81bd8fb39d7fecf39eb35820d9ef
persist-credentials: false
sparse-checkout: clusterloader2
path: perf-tests
- name: Deploy cluster
id: deploy-cluster
uses: cilium/scale-tests-action/create-cluster@c3a2f10946bf94430b40fd9e3ad40834add6d555 # main
timeout-minutes: 30
with:
cluster_name: ${{ steps.vars.outputs.cluster_name }}
control_plane_size: n2-standard-4
control_plane_count: 1
node_size: e2-standard-8
node_count: 1
kops_state: ${{ secrets.GCP_PERF_KOPS_STATE_STORE }}
project_id: ${{ secrets.GCP_PERF_PROJECT_ID }}
- name: Install Cilium CLI
uses: cilium/cilium-cli@e386af2b9f500e4c40436ac660cd6602da104fc7 # v0.16.14
with:
skip-build: ${{ env.CILIUM_CLI_SKIP_BUILD }}
image-repo: ${{ env.CILIUM_CLI_IMAGE_REPO }}
image-tag: ${{ env.CILIUM_CLI_VERSION }}
- name: Display version info of installed tools
run: |
echo "--- go ---"
go version
echo "--- cilium-cli ---"
cilium version --client
echo "--- kops ---"
./kops version
echo "--- gcloud ---"
gcloud version
- name: Setup firewall rules
uses: cilium/scale-tests-action/setup-firewall@c3a2f10946bf94430b40fd9e3ad40834add6d555 # main
with:
cluster_name: ${{ steps.vars.outputs.cluster_name }}
- name: Install Cilium
run: |
cilium install ${{ steps.vars.outputs.cilium_install_defaults }}
- name: Wait for cluster to be ready
uses: cilium/scale-tests-action/validate-cluster@c3a2f10946bf94430b40fd9e3ad40834add6d555 # main
timeout-minutes: 20
with:
cluster_name: ${{ steps.vars.outputs.cluster_name }}
kops_state: ${{ secrets.GCP_PERF_KOPS_STATE_STORE }}
- name: Wait for Cilium status to be ready
run: |
cilium status --wait
- name: Run CL2
id: run-cl2
working-directory: ./perf-tests/clusterloader2
timeout-minutes: 30
shell: bash
# --enable-exec-service=false to reduce number of pods so 100 pods can fit in node
# POD_STARTUP_LATENCY_THRESHOLD=60s so the test doesn't fail, currently we have ~30s pods startup latency
run: |
mkdir ./report
echo POD_STARTUP_LATENCY_THRESHOLD: 60s >> ./testoverrides.yaml
echo POD_COUNT: 98 >> ./testoverrides.yaml
go run ./cmd/clusterloader.go \
-v=4 \
--testconfig=./testing/node-throughput/config.yaml \
--testoverrides=./testoverrides.yaml \
--enable-exec-service=false \
--provider=gce \
--enable-prometheus-server \
--testoverrides=./testing/prometheus/not-scrape-kube-proxy.yaml \
--tear-down-prometheus-server=false \
--report-dir=./report \
--kubeconfig=$HOME/.kube/config \
2>&1 | tee cl2-output.txt
- name: Get sysdump
if: ${{ always() && steps.run-cl2.outcome != 'skipped' }}
run: |
cilium status
cilium sysdump --output-filename cilium-sysdump-final
- name: Cleanup cluster
if: ${{ always() && steps.deploy-cluster.outcome != 'skipped' }}
uses: cilium/scale-tests-action/cleanup-cluster@c3a2f10946bf94430b40fd9e3ad40834add6d555 # main
with:
cluster_name: ${{ steps.vars.outputs.cluster_name }}
kops_state: ${{ secrets.GCP_PERF_KOPS_STATE_STORE }}
- name: Export results and sysdump to GS bucket
if: ${{ always() && steps.run-cl2.outcome != 'skipped' }}
uses: cilium/scale-tests-action/export-results@c3a2f10946bf94430b40fd9e3ad40834add6d555 # main
with:
test_name: ${{ env.test_name }}
results_bucket: ${{ env.GCP_PERF_RESULTS_BUCKET }}
artifacts: ./perf-tests/clusterloader2/report/*
other_files: cilium-sysdump-final.zip ./perf-tests/clusterloader2/cl2-output.txt