Skip to content

Commit 7ea41c1

Browse files
authored
refactor(ci): migrate to updated slo action (#748)
* refactor(ci): migrate to updated slo action * runs on larger runner
1 parent 2730c0b commit 7ea41c1

File tree

20 files changed

+876
-831
lines changed

20 files changed

+876
-831
lines changed

.github/workflows/slo-report.yml

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
name: SLO Report
1+
name: slo-report
22

33
on:
44
workflow_run:
5-
workflows: ['SLO']
5+
workflows: ["SLO"]
66
types:
77
- completed
88

@@ -11,12 +11,47 @@ jobs:
1111
runs-on: ubuntu-latest
1212
name: Publish YDB SLO Report
1313
permissions:
14+
checks: write
1415
contents: read
1516
pull-requests: write
1617
if: github.event.workflow_run.conclusion == 'success'
1718
steps:
1819
- name: Publish YDB SLO Report
19-
uses: ydb-platform/ydb-slo-action/report@53e02500d4a98a6b67d9009bc46e839236f15f81
20+
uses: ydb-platform/ydb-slo-action/report@13c687b7d4b2879da79dd12932dee0ed2b65dd1c
2021
with:
2122
github_token: ${{ secrets.GITHUB_TOKEN }}
2223
github_run_id: ${{ github.event.workflow_run.id }}
24+
remove-slo-label:
25+
if: always() && github.event.workflow_run.event == 'pull_request'
26+
name: Remove SLO Label
27+
needs: ydb-slo-action-report
28+
runs-on: ubuntu-latest
29+
permissions:
30+
pull-requests: write
31+
steps:
32+
- name: Remove SLO label from PR
33+
uses: actions/github-script@v7
34+
with:
35+
script: |
36+
const pullRequests = context.payload.workflow_run.pull_requests;
37+
if (pullRequests && pullRequests.length > 0) {
38+
for (const pr of pullRequests) {
39+
try {
40+
await github.rest.issues.removeLabel({
41+
owner: context.repo.owner,
42+
repo: context.repo.repo,
43+
issue_number: pr.number,
44+
name: 'SLO'
45+
});
46+
console.log(`Removed SLO label from PR #${pr.number}`);
47+
} catch (error) {
48+
if (error.status === 404) {
49+
console.log(`SLO label not found on PR #${pr.number}, skipping`);
50+
} else {
51+
throw error;
52+
}
53+
}
54+
}
55+
} else {
56+
console.log('No pull requests associated with this workflow run');
57+
}

.github/workflows/slo.yml

Lines changed: 188 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,136 +1,240 @@
11
name: SLO
22

33
on:
4-
push:
5-
branches:
6-
- main
74
pull_request:
5+
types: [opened, reopened, synchronize, labeled]
86
branches:
97
- main
108
workflow_dispatch:
119
inputs:
12-
github_pull_request_number:
13-
required: true
14-
slo_workload_duration_seconds:
15-
default: '600'
10+
github_issue:
11+
description: "GitHub issue / PR number where the SLO report will be posted (optional; will be inferred for PR runs)"
12+
required: false
13+
baseline_ref:
14+
description: "Baseline commit/branch/tag to compare against (leave empty to auto-detect merge-base with main)"
1615
required: false
1716
slo_workload_read_max_rps:
18-
default: '1000'
17+
description: "Maximum read RPS for the SLO workload"
1918
required: false
19+
default: "1000"
2020
slo_workload_write_max_rps:
21-
default: '100'
21+
description: "Maximum write RPS for the SLO workload"
2222
required: false
23+
default: "100"
24+
slo_workload_duration_seconds:
25+
description: "Duration of the SLO workload in seconds"
26+
required: false
27+
default: "600"
28+
29+
permissions:
30+
contents: read
31+
pull-requests: write
32+
checks: write
2333

2434
jobs:
2535
ydb-slo-action:
26-
if: (!contains(github.event.pull_request.labels.*.name, 'no slo'))
27-
2836
name: Run YDB SLO Tests
29-
runs-on: ubuntu-latest
37+
runs-on: "large-runner"
38+
39+
# Run on PRs only when labeled "SLO"; allow manual runs via workflow_dispatch
40+
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'SLO') }}
3041

3142
strategy:
43+
fail-fast: false
3244
matrix:
3345
include:
34-
- prefix: table
46+
- id: sync-table
47+
prefix: table
3548
workload: sync-table
36-
create-args: grpc://localhost:2135 /Root/testdb
37-
run-args: |
38-
grpc://localhost:2135 /Root/testdb \
39-
--prom-pgw localhost:9091 \
40-
--report-period 250 \
41-
--time ${{inputs.slo_workload_duration_seconds || 600}} \
42-
--read-rps ${{inputs.slo_workload_read_max_rps || 1000}} \
43-
--write-rps ${{inputs.slo_workload_write_max_rps || 100}} \
44-
--read-timeout 1000 \
45-
--write-timeout 1000
46-
cleanup-args: grpc://localhost:2135 /Root/testdb
47-
- prefix: table
49+
- id: sync-query
50+
prefix: table
4851
workload: sync-query
49-
create-args: grpc://localhost:2135 /Root/testdb
50-
run-args: |
51-
grpc://localhost:2135 /Root/testdb \
52-
--prom-pgw localhost:9091 \
53-
--report-period 250 \
54-
--time ${{inputs.slo_workload_duration_seconds || 600}} \
55-
--read-rps ${{inputs.slo_workload_read_max_rps || 1000}} \
56-
--write-rps ${{inputs.slo_workload_write_max_rps || 100}} \
57-
--read-timeout 1000 \
58-
--write-timeout 1000
59-
cleanup-args: grpc://localhost:2135 /Root/testdb
60-
# - prefix: topic
61-
# workload: topic-basic
62-
# create-args: |
63-
# grpc://localhost:2135 /Root/testdb \
64-
# --path /Root/testdb/slo_topic \
65-
# --partitions-count 10
66-
# run-args: |
67-
# grpc://localhost:2135 /Root/testdb \
68-
# --path /Root/testdb/slo_topic \
69-
# --prom-pgw localhost:9091 \
70-
# --partitions-count 10 \
71-
# --read-threads 10 \
72-
# --write-threads 10 \
73-
# --report-period 250 \
74-
# --time ${{inputs.slo_workload_duration_seconds || 600}} \
75-
# --read-rps ${{inputs.slo_workload_read_max_rps || 100}} \
76-
# --write-rps ${{inputs.slo_workload_write_max_rps || 100}} \
77-
# --read-timeout 5000 \
78-
# --write-timeout 5000
79-
# cleanup-args: grpc://localhost:2135 /Root/testdb --path /Root/testdb/slo_topic
80-
8152

8253
concurrency:
8354
group: slo-${{ github.ref }}-${{ matrix.workload }}
8455
cancel-in-progress: true
8556

8657
steps:
87-
- name: Checkout repository
88-
uses: actions/checkout@v4
58+
- name: Checkout current version
59+
uses: actions/checkout@v6
60+
with:
61+
path: current
62+
fetch-depth: 0
63+
64+
- name: Determine baseline commit
65+
id: baseline
66+
shell: bash
67+
run: |
68+
cd current
69+
70+
git fetch --no-tags --prune --depth=0 origin +refs/heads/main:refs/remotes/origin/main
71+
72+
if [[ -n "${{ inputs.baseline_ref }}" ]]; then
73+
BASELINE="${{ inputs.baseline_ref }}"
74+
else
75+
BASELINE="$(git merge-base HEAD origin/main)"
76+
fi
77+
78+
echo "sha=$BASELINE" >> "$GITHUB_OUTPUT"
79+
80+
# Try to determine a human-readable ref name for baseline
81+
if git merge-base --is-ancestor "$BASELINE" origin/main && \
82+
[[ "$(git rev-parse origin/main)" == "$BASELINE" ]]; then
83+
BASELINE_REF="main"
84+
else
85+
BRANCH="$(git branch -r --contains "$BASELINE" | grep -v HEAD | head -1 | sed 's/.*\///' || true)"
86+
if [[ -n "$BRANCH" ]]; then
87+
BASELINE_REF="${BRANCH}@${BASELINE:0:7}"
88+
else
89+
BASELINE_REF="${BASELINE:0:7}"
90+
fi
91+
fi
8992
90-
- name: Install Python3
91-
uses: actions/setup-python@v5
93+
echo "ref=$BASELINE_REF" >> "$GITHUB_OUTPUT"
94+
95+
- name: Checkout baseline version
96+
uses: actions/checkout@v6
9297
with:
93-
python-version: '3.8'
94-
cache: 'pip'
98+
ref: ${{ steps.baseline.outputs.sha }}
99+
path: baseline
100+
fetch-depth: 1
101+
102+
- name: Show Docker versions
103+
run: |
104+
docker --version
105+
docker compose version
95106
96-
- name: Install dependencies
107+
- name: Build workload images (current + baseline)
97108
run: |
98-
python -m pip install --no-cache-dir --upgrade pip
99-
python -m pip install --no-cache-dir -e .
100-
python -m pip install --no-cache-dir -r tests/slo/requirements.txt
109+
docker build \
110+
-f "$GITHUB_WORKSPACE/current/tests/slo/Dockerfile" \
111+
-t "ydb-app-current" \
112+
"$GITHUB_WORKSPACE/current"
113+
114+
docker build \
115+
-f "$GITHUB_WORKSPACE/baseline/tests/slo/Dockerfile" \
116+
-t "ydb-app-baseline" \
117+
"$GITHUB_WORKSPACE/baseline"
101118
102119
- name: Initialize YDB SLO
103-
uses: ydb-platform/ydb-slo-action/init@53e02500d4a98a6b67d9009bc46e839236f15f81
120+
id: ydb_slo
121+
uses: ydb-platform/ydb-slo-action/init@13c687b7d4b2879da79dd12932dee0ed2b65dd1c
104122
with:
105-
github_pull_request_number: ${{ github.event.inputs.github_pull_request_number }}
123+
github_issue: ${{ github.event.pull_request.number || inputs.github_issue }}
106124
github_token: ${{ secrets.GITHUB_TOKEN }}
107-
workload_name: ${{ matrix.workload }}
108-
ydb_database_node_count: 5
125+
workload_name: ydb-python-${{ matrix.workload }}
126+
workload_current_ref: ${{ github.head_ref || github.ref_name }}
127+
workload_baseline_ref: ${{ steps.baseline.outputs.ref }}
109128

110129
- name: Prepare SLO Database
111130
run: |
112-
python ./tests/slo/src ${{ matrix.prefix }}-create ${{ matrix.create-args }}
131+
docker run --rm \
132+
--network ydb_ydb-net \
133+
--add-host "ydb:172.28.0.11" \
134+
--add-host "ydb:172.28.0.12" \
135+
--add-host "ydb:172.28.0.13" \
136+
--add-host "ydb:172.28.0.99" \
137+
-e "WORKLOAD=${{ matrix.workload }}" \
138+
-e "REF=${{ github.head_ref || github.ref_name }}" \
139+
ydb-app-current \
140+
${{ matrix.prefix }}-create grpc://ydb:2136 /Root/testdb
113141
114-
- name: Run SLO Tests
142+
- name: Run SLO Tests (current + baseline in parallel)
143+
timeout-minutes: 15
115144
env:
116-
REF: '${{ github.head_ref || github.ref }}'
117-
WORKLOAD: '${{ matrix.workload }}'
145+
WORKLOAD: ${{ matrix.workload }}
146+
DURATION: ${{ inputs.slo_workload_duration_seconds || 600 }}
147+
READ_RPS: ${{ inputs.slo_workload_read_max_rps || 1000 }}
148+
WRITE_RPS: ${{ inputs.slo_workload_write_max_rps || 100 }}
149+
CURRENT_REF: ${{ github.head_ref || github.ref_name }}
150+
BASELINE_REF: ${{ steps.baseline.outputs.ref }}
118151
run: |
119-
python ./tests/slo/src ${{ matrix.prefix }}-run ${{ matrix.run-args }}
152+
ARGS="${{ matrix.prefix }}-run grpc://ydb:2136 /Root/testdb \
153+
--otlp-endpoint http://prometheus:9090/api/v1/otlp/v1/metrics \
154+
--report-period 250 \
155+
--time ${DURATION} \
156+
--read-rps ${READ_RPS} \
157+
--write-rps ${WRITE_RPS} \
158+
--read-timeout 1000 \
159+
--write-timeout 1000"
120160
121-
- if: always()
122-
name: Cleanup SLO Database
123-
run: |
124-
python ./tests/slo/src ${{ matrix.prefix }}-cleanup ${{ matrix.cleanup-args }}
161+
echo "Starting current workload (ref=${CURRENT_REF}, workload=${WORKLOAD})..."
162+
docker run -d \
163+
--name ydb-app-current \
164+
--network ydb_ydb-net \
165+
--add-host "ydb:172.28.0.11" \
166+
--add-host "ydb:172.28.0.12" \
167+
--add-host "ydb:172.28.0.13" \
168+
--add-host "ydb:172.28.0.99" \
169+
-e "REF=${CURRENT_REF}" \
170+
-e "WORKLOAD=${WORKLOAD}" \
171+
ydb-app-current \
172+
$ARGS
173+
174+
echo "Starting baseline workload (ref=${BASELINE_REF}, workload=${WORKLOAD})..."
175+
docker run -d \
176+
--name ydb-app-baseline \
177+
--network ydb_ydb-net \
178+
--add-host "ydb:172.28.0.11" \
179+
--add-host "ydb:172.28.0.12" \
180+
--add-host "ydb:172.28.0.13" \
181+
--add-host "ydb:172.28.0.99" \
182+
-e "REF=${BASELINE_REF}" \
183+
-e "WORKLOAD=${WORKLOAD}" \
184+
ydb-app-baseline \
185+
$ARGS
186+
187+
echo ""
188+
echo "==================== INITIAL CURRENT LOGS ===================="
189+
docker logs -n 15 ydb-app-current 2>&1 || echo "No current container"
190+
echo ""
191+
echo "==================== INITIAL BASELINE LOGS ===================="
192+
docker logs -n 15 ydb-app-baseline 2>&1 || echo "No baseline container"
193+
echo ""
194+
195+
echo "Waiting for workloads to complete (${DURATION}s)..."
196+
sleep ${DURATION}
197+
198+
echo "Stopping containers after ${DURATION}s..."
199+
docker stop --timeout=30 ydb-app-current ydb-app-baseline 2>&1 || true
200+
201+
# Force kill if still running
202+
docker kill ydb-app-current ydb-app-baseline 2>&1 || true
203+
204+
# Check exit codes
205+
CURRENT_EXIT=$(docker inspect ydb-app-current --format='{{.State.ExitCode}}' 2>/dev/null || echo "1")
206+
BASELINE_EXIT=$(docker inspect ydb-app-baseline --format='{{.State.ExitCode}}' 2>/dev/null || echo "1")
207+
208+
echo "Current exit code: ${CURRENT_EXIT}"
209+
echo "Baseline exit code: ${BASELINE_EXIT}"
210+
211+
echo ""
212+
echo "==================== FINAL CURRENT LOGS ===================="
213+
docker logs -n 15 ydb-app-current 2>&1 || echo "No current container"
214+
echo ""
215+
echo "==================== FINAL BASELINE LOGS ===================="
216+
docker logs -n 15 ydb-app-baseline 2>&1 || echo "No baseline container"
217+
echo ""
218+
219+
if [[ "${CURRENT_EXIT}" != "0" || "${BASELINE_EXIT}" != "0" ]]; then
220+
echo "One or both workloads failed."
221+
exit 1
222+
fi
223+
224+
echo "SUCCESS: Workloads completed successfully"
125225
126226
- if: always()
127-
name: Store ydb chaos testing logs
227+
name: Store logs
128228
run: |
129-
docker logs ydb-chaos > chaos-ydb.log
229+
docker logs ydb-app-current > current.log 2>&1 || echo "No current container" > current.log
230+
docker logs ydb-app-baseline > baseline.log 2>&1 || echo "No baseline container" > baseline.log
130231
131232
- if: always()
233+
name: Upload logs
132234
uses: actions/upload-artifact@v4
133235
with:
134-
name: ${{ matrix.workload }}-chaos-ydb.log
135-
path: ./chaos-ydb.log
236+
name: ydb-python-${{ matrix.workload }}-logs
237+
path: |
238+
./current.log
239+
./baseline.log
136240
retention-days: 1

0 commit comments

Comments
 (0)