|
1 | 1 | name: SLO |
2 | 2 |
|
3 | 3 | on: |
4 | | - push: |
5 | | - branches: |
6 | | - - main |
7 | 4 | pull_request: |
| 5 | + types: [opened, reopened, synchronize, labeled] |
8 | 6 | branches: |
9 | 7 | - main |
10 | 8 | workflow_dispatch: |
11 | 9 | inputs: |
12 | | - github_pull_request_number: |
13 | | - required: true |
14 | | - slo_workload_duration_seconds: |
15 | | - default: '600' |
| 10 | + github_issue: |
| 11 | + description: "GitHub issue / PR number where the SLO report will be posted (optional; will be inferred for PR runs)" |
| 12 | + required: false |
| 13 | + baseline_ref: |
| 14 | + description: "Baseline commit/branch/tag to compare against (leave empty to auto-detect merge-base with main)" |
16 | 15 | required: false |
17 | 16 | slo_workload_read_max_rps: |
18 | | - default: '1000' |
| 17 | + description: "Maximum read RPS for the SLO workload" |
19 | 18 | required: false |
| 19 | + default: "1000" |
20 | 20 | slo_workload_write_max_rps: |
21 | | - default: '100' |
| 21 | + description: "Maximum write RPS for the SLO workload" |
22 | 22 | required: false |
| 23 | + default: "100" |
| 24 | + slo_workload_duration_seconds: |
| 25 | + description: "Duration of the SLO workload in seconds" |
| 26 | + required: false |
| 27 | + default: "600" |
| 28 | + |
| 29 | +permissions: |
| 30 | + contents: read |
| 31 | + pull-requests: write |
| 32 | + checks: write |
23 | 33 |
|
24 | 34 | jobs: |
25 | 35 | ydb-slo-action: |
26 | | - if: (!contains(github.event.pull_request.labels.*.name, 'no slo')) |
27 | | - |
28 | 36 | name: Run YDB SLO Tests |
29 | | - runs-on: ubuntu-latest |
| 37 | + runs-on: "large-runner" |
| 38 | + |
| 39 | + # Run on PRs only when labeled "SLO"; allow manual runs via workflow_dispatch |
| 40 | + if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'SLO') }} |
30 | 41 |
|
31 | 42 | strategy: |
| 43 | + fail-fast: false |
32 | 44 | matrix: |
33 | 45 | include: |
34 | | - - prefix: table |
| 46 | + - id: sync-table |
| 47 | + prefix: table |
35 | 48 | workload: sync-table |
36 | | - create-args: grpc://localhost:2135 /Root/testdb |
37 | | - run-args: | |
38 | | - grpc://localhost:2135 /Root/testdb \ |
39 | | - --prom-pgw localhost:9091 \ |
40 | | - --report-period 250 \ |
41 | | - --time ${{inputs.slo_workload_duration_seconds || 600}} \ |
42 | | - --read-rps ${{inputs.slo_workload_read_max_rps || 1000}} \ |
43 | | - --write-rps ${{inputs.slo_workload_write_max_rps || 100}} \ |
44 | | - --read-timeout 1000 \ |
45 | | - --write-timeout 1000 |
46 | | - cleanup-args: grpc://localhost:2135 /Root/testdb |
47 | | - - prefix: table |
| 49 | + - id: sync-query |
| 50 | + prefix: table |
48 | 51 | workload: sync-query |
49 | | - create-args: grpc://localhost:2135 /Root/testdb |
50 | | - run-args: | |
51 | | - grpc://localhost:2135 /Root/testdb \ |
52 | | - --prom-pgw localhost:9091 \ |
53 | | - --report-period 250 \ |
54 | | - --time ${{inputs.slo_workload_duration_seconds || 600}} \ |
55 | | - --read-rps ${{inputs.slo_workload_read_max_rps || 1000}} \ |
56 | | - --write-rps ${{inputs.slo_workload_write_max_rps || 100}} \ |
57 | | - --read-timeout 1000 \ |
58 | | - --write-timeout 1000 |
59 | | - cleanup-args: grpc://localhost:2135 /Root/testdb |
60 | | - # - prefix: topic |
61 | | - # workload: topic-basic |
62 | | - # create-args: | |
63 | | - # grpc://localhost:2135 /Root/testdb \ |
64 | | - # --path /Root/testdb/slo_topic \ |
65 | | - # --partitions-count 10 |
66 | | - # run-args: | |
67 | | - # grpc://localhost:2135 /Root/testdb \ |
68 | | - # --path /Root/testdb/slo_topic \ |
69 | | - # --prom-pgw localhost:9091 \ |
70 | | - # --partitions-count 10 \ |
71 | | - # --read-threads 10 \ |
72 | | - # --write-threads 10 \ |
73 | | - # --report-period 250 \ |
74 | | - # --time ${{inputs.slo_workload_duration_seconds || 600}} \ |
75 | | - # --read-rps ${{inputs.slo_workload_read_max_rps || 100}} \ |
76 | | - # --write-rps ${{inputs.slo_workload_write_max_rps || 100}} \ |
77 | | - # --read-timeout 5000 \ |
78 | | - # --write-timeout 5000 |
79 | | - # cleanup-args: grpc://localhost:2135 /Root/testdb --path /Root/testdb/slo_topic |
80 | | - |
81 | 52 |
|
82 | 53 | concurrency: |
83 | 54 | group: slo-${{ github.ref }}-${{ matrix.workload }} |
84 | 55 | cancel-in-progress: true |
85 | 56 |
|
86 | 57 | steps: |
87 | | - - name: Checkout repository |
88 | | - uses: actions/checkout@v4 |
| 58 | + - name: Checkout current version |
| 59 | + uses: actions/checkout@v6 |
| 60 | + with: |
| 61 | + path: current |
| 62 | + fetch-depth: 0 |
| 63 | + |
| 64 | + - name: Determine baseline commit |
| 65 | + id: baseline |
| 66 | + shell: bash |
| 67 | + run: | |
| 68 | + cd current |
| 69 | +
|
| 70 | + git fetch --no-tags --prune --depth=0 origin +refs/heads/main:refs/remotes/origin/main |
| 71 | +
|
| 72 | + if [[ -n "${{ inputs.baseline_ref }}" ]]; then |
| 73 | + BASELINE="${{ inputs.baseline_ref }}" |
| 74 | + else |
| 75 | + BASELINE="$(git merge-base HEAD origin/main)" |
| 76 | + fi |
| 77 | +
|
| 78 | + echo "sha=$BASELINE" >> "$GITHUB_OUTPUT" |
| 79 | +
|
| 80 | + # Try to determine a human-readable ref name for baseline |
| 81 | + if git merge-base --is-ancestor "$BASELINE" origin/main && \ |
| 82 | + [[ "$(git rev-parse origin/main)" == "$BASELINE" ]]; then |
| 83 | + BASELINE_REF="main" |
| 84 | + else |
| 85 | + BRANCH="$(git branch -r --contains "$BASELINE" | grep -v HEAD | head -1 | sed 's/.*\///' || true)" |
| 86 | + if [[ -n "$BRANCH" ]]; then |
| 87 | + BASELINE_REF="${BRANCH}@${BASELINE:0:7}" |
| 88 | + else |
| 89 | + BASELINE_REF="${BASELINE:0:7}" |
| 90 | + fi |
| 91 | + fi |
89 | 92 |
|
90 | | - - name: Install Python3 |
91 | | - uses: actions/setup-python@v5 |
| 93 | + echo "ref=$BASELINE_REF" >> "$GITHUB_OUTPUT" |
| 94 | +
|
| 95 | + - name: Checkout baseline version |
| 96 | + uses: actions/checkout@v6 |
92 | 97 | with: |
93 | | - python-version: '3.8' |
94 | | - cache: 'pip' |
| 98 | + ref: ${{ steps.baseline.outputs.sha }} |
| 99 | + path: baseline |
| 100 | + fetch-depth: 1 |
| 101 | + |
| 102 | + - name: Show Docker versions |
| 103 | + run: | |
| 104 | + docker --version |
| 105 | + docker compose version |
95 | 106 |
|
96 | | - - name: Install dependencies |
| 107 | + - name: Build workload images (current + baseline) |
97 | 108 | run: | |
98 | | - python -m pip install --no-cache-dir --upgrade pip |
99 | | - python -m pip install --no-cache-dir -e . |
100 | | - python -m pip install --no-cache-dir -r tests/slo/requirements.txt |
| 109 | + docker build \ |
| 110 | + -f "$GITHUB_WORKSPACE/current/tests/slo/Dockerfile" \ |
| 111 | + -t "ydb-app-current" \ |
| 112 | + "$GITHUB_WORKSPACE/current" |
| 113 | +
|
| 114 | + docker build \ |
| 115 | + -f "$GITHUB_WORKSPACE/baseline/tests/slo/Dockerfile" \ |
| 116 | + -t "ydb-app-baseline" \ |
| 117 | + "$GITHUB_WORKSPACE/baseline" |
101 | 118 |
|
102 | 119 | - name: Initialize YDB SLO |
103 | | - uses: ydb-platform/ydb-slo-action/init@53e02500d4a98a6b67d9009bc46e839236f15f81 |
| 120 | + id: ydb_slo |
| 121 | + uses: ydb-platform/ydb-slo-action/init@13c687b7d4b2879da79dd12932dee0ed2b65dd1c |
104 | 122 | with: |
105 | | - github_pull_request_number: ${{ github.event.inputs.github_pull_request_number }} |
| 123 | + github_issue: ${{ github.event.pull_request.number || inputs.github_issue }} |
106 | 124 | github_token: ${{ secrets.GITHUB_TOKEN }} |
107 | | - workload_name: ${{ matrix.workload }} |
108 | | - ydb_database_node_count: 5 |
| 125 | + workload_name: ydb-python-${{ matrix.workload }} |
| 126 | + workload_current_ref: ${{ github.head_ref || github.ref_name }} |
| 127 | + workload_baseline_ref: ${{ steps.baseline.outputs.ref }} |
109 | 128 |
|
110 | 129 | - name: Prepare SLO Database |
111 | 130 | run: | |
112 | | - python ./tests/slo/src ${{ matrix.prefix }}-create ${{ matrix.create-args }} |
| 131 | + docker run --rm \ |
| 132 | + --network ydb_ydb-net \ |
| 133 | + --add-host "ydb:172.28.0.11" \ |
| 134 | + --add-host "ydb:172.28.0.12" \ |
| 135 | + --add-host "ydb:172.28.0.13" \ |
| 136 | + --add-host "ydb:172.28.0.99" \ |
| 137 | + -e "WORKLOAD=${{ matrix.workload }}" \ |
| 138 | + -e "REF=${{ github.head_ref || github.ref_name }}" \ |
| 139 | + ydb-app-current \ |
| 140 | + ${{ matrix.prefix }}-create grpc://ydb:2136 /Root/testdb |
113 | 141 |
|
114 | | - - name: Run SLO Tests |
| 142 | + - name: Run SLO Tests (current + baseline in parallel) |
| 143 | + timeout-minutes: 15 |
115 | 144 | env: |
116 | | - REF: '${{ github.head_ref || github.ref }}' |
117 | | - WORKLOAD: '${{ matrix.workload }}' |
| 145 | + WORKLOAD: ${{ matrix.workload }} |
| 146 | + DURATION: ${{ inputs.slo_workload_duration_seconds || 600 }} |
| 147 | + READ_RPS: ${{ inputs.slo_workload_read_max_rps || 1000 }} |
| 148 | + WRITE_RPS: ${{ inputs.slo_workload_write_max_rps || 100 }} |
| 149 | + CURRENT_REF: ${{ github.head_ref || github.ref_name }} |
| 150 | + BASELINE_REF: ${{ steps.baseline.outputs.ref }} |
118 | 151 | run: | |
119 | | - python ./tests/slo/src ${{ matrix.prefix }}-run ${{ matrix.run-args }} |
| 152 | + ARGS="${{ matrix.prefix }}-run grpc://ydb:2136 /Root/testdb \ |
| 153 | + --otlp-endpoint http://prometheus:9090/api/v1/otlp/v1/metrics \ |
| 154 | + --report-period 250 \ |
| 155 | + --time ${DURATION} \ |
| 156 | + --read-rps ${READ_RPS} \ |
| 157 | + --write-rps ${WRITE_RPS} \ |
| 158 | + --read-timeout 1000 \ |
| 159 | + --write-timeout 1000" |
120 | 160 |
|
121 | | - - if: always() |
122 | | - name: Cleanup SLO Database |
123 | | - run: | |
124 | | - python ./tests/slo/src ${{ matrix.prefix }}-cleanup ${{ matrix.cleanup-args }} |
| 161 | + echo "Starting current workload (ref=${CURRENT_REF}, workload=${WORKLOAD})..." |
| 162 | + docker run -d \ |
| 163 | + --name ydb-app-current \ |
| 164 | + --network ydb_ydb-net \ |
| 165 | + --add-host "ydb:172.28.0.11" \ |
| 166 | + --add-host "ydb:172.28.0.12" \ |
| 167 | + --add-host "ydb:172.28.0.13" \ |
| 168 | + --add-host "ydb:172.28.0.99" \ |
| 169 | + -e "REF=${CURRENT_REF}" \ |
| 170 | + -e "WORKLOAD=${WORKLOAD}" \ |
| 171 | + ydb-app-current \ |
| 172 | + $ARGS |
| 173 | +
|
| 174 | + echo "Starting baseline workload (ref=${BASELINE_REF}, workload=${WORKLOAD})..." |
| 175 | + docker run -d \ |
| 176 | + --name ydb-app-baseline \ |
| 177 | + --network ydb_ydb-net \ |
| 178 | + --add-host "ydb:172.28.0.11" \ |
| 179 | + --add-host "ydb:172.28.0.12" \ |
| 180 | + --add-host "ydb:172.28.0.13" \ |
| 181 | + --add-host "ydb:172.28.0.99" \ |
| 182 | + -e "REF=${BASELINE_REF}" \ |
| 183 | + -e "WORKLOAD=${WORKLOAD}" \ |
| 184 | + ydb-app-baseline \ |
| 185 | + $ARGS |
| 186 | +
|
| 187 | + echo "" |
| 188 | + echo "==================== INITIAL CURRENT LOGS ====================" |
| 189 | + docker logs -n 15 ydb-app-current 2>&1 || echo "No current container" |
| 190 | + echo "" |
| 191 | + echo "==================== INITIAL BASELINE LOGS ====================" |
| 192 | + docker logs -n 15 ydb-app-baseline 2>&1 || echo "No baseline container" |
| 193 | + echo "" |
| 194 | +
|
| 195 | + echo "Waiting for workloads to complete (${DURATION}s)..." |
| 196 | + sleep ${DURATION} |
| 197 | +
|
| 198 | + echo "Stopping containers after ${DURATION}s..." |
| 199 | + docker stop --timeout=30 ydb-app-current ydb-app-baseline 2>&1 || true |
| 200 | +
|
| 201 | + # Force kill if still running |
| 202 | + docker kill ydb-app-current ydb-app-baseline 2>&1 || true |
| 203 | +
|
| 204 | + # Check exit codes |
| 205 | + CURRENT_EXIT=$(docker inspect ydb-app-current --format='{{.State.ExitCode}}' 2>/dev/null || echo "1") |
| 206 | + BASELINE_EXIT=$(docker inspect ydb-app-baseline --format='{{.State.ExitCode}}' 2>/dev/null || echo "1") |
| 207 | +
|
| 208 | + echo "Current exit code: ${CURRENT_EXIT}" |
| 209 | + echo "Baseline exit code: ${BASELINE_EXIT}" |
| 210 | +
|
| 211 | + echo "" |
| 212 | + echo "==================== FINAL CURRENT LOGS ====================" |
| 213 | + docker logs -n 15 ydb-app-current 2>&1 || echo "No current container" |
| 214 | + echo "" |
| 215 | + echo "==================== FINAL BASELINE LOGS ====================" |
| 216 | + docker logs -n 15 ydb-app-baseline 2>&1 || echo "No baseline container" |
| 217 | + echo "" |
| 218 | +
|
| 219 | + if [[ "${CURRENT_EXIT}" != "0" || "${BASELINE_EXIT}" != "0" ]]; then |
| 220 | + echo "One or both workloads failed." |
| 221 | + exit 1 |
| 222 | + fi |
| 223 | +
|
| 224 | + echo "SUCCESS: Workloads completed successfully" |
125 | 225 |
|
126 | 226 | - if: always() |
127 | | - name: Store ydb chaos testing logs |
| 227 | + name: Store logs |
128 | 228 | run: | |
129 | | - docker logs ydb-chaos > chaos-ydb.log |
| 229 | + docker logs ydb-app-current > current.log 2>&1 || echo "No current container" > current.log |
| 230 | + docker logs ydb-app-baseline > baseline.log 2>&1 || echo "No baseline container" > baseline.log |
130 | 231 |
|
131 | 232 | - if: always() |
| 233 | + name: Upload logs |
132 | 234 | uses: actions/upload-artifact@v4 |
133 | 235 | with: |
134 | | - name: ${{ matrix.workload }}-chaos-ydb.log |
135 | | - path: ./chaos-ydb.log |
| 236 | + name: ydb-python-${{ matrix.workload }}-logs |
| 237 | + path: | |
| 238 | + ./current.log |
| 239 | + ./baseline.log |
136 | 240 | retention-days: 1 |
0 commit comments