Retry SessionExpired in topic streams #1348
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: SLO | |
| on: | |
| pull_request: | |
| types: [opened, reopened, synchronize, labeled] | |
| branches: | |
| - main | |
| workflow_dispatch: | |
| inputs: | |
| github_issue: | |
| description: "GitHub issue / PR number where the SLO report will be posted (optional; will be inferred for PR runs)" | |
| required: false | |
| baseline_ref: | |
| description: "Baseline commit/branch/tag to compare against (leave empty to auto-detect merge-base with main)" | |
| required: false | |
| slo_workload_read_max_rps: | |
| description: "Maximum read RPS for the SLO workload" | |
| required: false | |
| default: "1000" | |
| slo_workload_write_max_rps: | |
| description: "Maximum write RPS for the SLO workload" | |
| required: false | |
| default: "100" | |
| slo_workload_duration_seconds: | |
| description: "Duration of the SLO workload in seconds" | |
| required: false | |
| default: "600" | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| checks: write | |
| jobs: | |
| ydb-slo-action: | |
| name: Run YDB SLO Tests | |
| runs-on: "large-runner-python-sdk" | |
| # Run on PRs only when labeled "SLO"; allow manual runs via workflow_dispatch | |
| if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'SLO') }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - id: sync-table | |
| prefix: table | |
| workload: sync-table | |
| - id: sync-query | |
| prefix: table | |
| workload: sync-query | |
| concurrency: | |
| group: slo-${{ github.ref }}-${{ matrix.workload }} | |
| cancel-in-progress: true | |
| steps: | |
| - name: Install dependencies | |
| run: | | |
| YQ_VERSION=v4.48.2 | |
| BUILDX_VERSION=0.30.1 | |
| COMPOSE_VERSION=2.40.3 | |
| sudo curl -L https://github.com/mikefarah/yq/releases/download/${YQ_VERSION}/yq_linux_amd64 -o /usr/local/bin/yq && \ | |
| sudo chmod +x /usr/local/bin/yq | |
| echo "Updating Docker plugins..." | |
| sudo mkdir -p /usr/local/lib/docker/cli-plugins | |
| echo "Installing Docker Buildx ${BUILDX_VERSION}..." | |
| sudo curl -fLo /usr/local/lib/docker/cli-plugins/docker-buildx \ | |
| "https://github.com/docker/buildx/releases/download/v${BUILDX_VERSION}/buildx-v${BUILDX_VERSION}.linux-amd64" | |
| sudo chmod +x /usr/local/lib/docker/cli-plugins/docker-buildx | |
| echo "Installing Docker Compose ${COMPOSE_VERSION}..." | |
| sudo curl -fLo /usr/local/lib/docker/cli-plugins/docker-compose \ | |
| "https://github.com/docker/compose/releases/download/v${COMPOSE_VERSION}/docker-compose-linux-x86_64" | |
| sudo chmod +x /usr/local/lib/docker/cli-plugins/docker-compose | |
| echo "Installed versions:" | |
| yq --version | |
| docker --version | |
| docker buildx version | |
| docker compose version | |
| - name: Checkout current version | |
| uses: actions/checkout@v5 | |
| with: | |
| path: current | |
| fetch-depth: 0 | |
| - name: Determine baseline commit | |
| id: baseline | |
| shell: bash | |
| run: | | |
| cd current | |
| if [[ -n "${{ inputs.baseline_ref }}" ]]; then | |
| BASELINE="${{ inputs.baseline_ref }}" | |
| else | |
| BASELINE=$(git merge-base HEAD origin/main) | |
| fi | |
| echo "sha=$BASELINE" >> $GITHUB_OUTPUT | |
| # Try to determine a human-readable ref name for baseline | |
| # Check if baseline is on main | |
| if git merge-base --is-ancestor $BASELINE origin/main && \ | |
| [ "$(git rev-parse origin/main)" = "$BASELINE" ]; then | |
| BASELINE_REF="main" | |
| else | |
| # Try to find a branch containing this commit | |
| BRANCH=$(git branch -r --contains $BASELINE | grep -v HEAD | head -1 | sed 's/.*\///' || echo "") | |
| if [ -n "$BRANCH" ]; then | |
| BASELINE_REF="${BRANCH}@${BASELINE:0:7}" | |
| else | |
| BASELINE_REF="${BASELINE:0:7}" | |
| fi | |
| fi | |
| echo "ref=$BASELINE_REF" >> $GITHUB_OUTPUT | |
| - name: Checkout baseline version | |
| uses: actions/checkout@v5 | |
| with: | |
| ref: ${{ steps.baseline.outputs.sha }} | |
| path: baseline | |
| fetch-depth: 1 | |
| - name: Show Docker versions | |
| run: | | |
| docker --version | |
| docker compose version | |
| - name: Build workload images (current + baseline) | |
| run: | | |
| docker build \ | |
| -f "$GITHUB_WORKSPACE/current/tests/slo/Dockerfile" \ | |
| -t "ydb-app-current" \ | |
| "$GITHUB_WORKSPACE/current" | |
| docker build \ | |
| -f "$GITHUB_WORKSPACE/baseline/tests/slo/Dockerfile" \ | |
| -t "ydb-app-baseline" \ | |
| "$GITHUB_WORKSPACE/baseline" | |
| - name: Initialize YDB SLO | |
| id: ydb_slo | |
| uses: ydb-platform/ydb-slo-action/init@13c687b7d4b2879da79dd12932dee0ed2b65dd1c | |
| with: | |
| github_issue: ${{ github.event.pull_request.number || inputs.github_issue }} | |
| github_token: ${{ secrets.GITHUB_TOKEN }} | |
| workload_name: ydb-python-${{ matrix.workload }} | |
| workload_current_ref: ${{ github.head_ref || github.ref_name }} | |
| workload_baseline_ref: ${{ steps.baseline.outputs.ref }} | |
| - name: Prepare SLO Database | |
| run: | | |
| docker run --rm \ | |
| --network ydb_ydb-net \ | |
| --add-host "ydb:172.28.0.11" \ | |
| --add-host "ydb:172.28.0.12" \ | |
| --add-host "ydb:172.28.0.13" \ | |
| --add-host "ydb:172.28.0.99" \ | |
| -e "WORKLOAD=${{ matrix.workload }}" \ | |
| -e "REF=${{ github.head_ref || github.ref_name }}" \ | |
| ydb-app-current \ | |
| ${{ matrix.prefix }}-create grpc://ydb:2136 /Root/testdb | |
| - name: Run SLO Tests (current + baseline in parallel) | |
| timeout-minutes: 15 | |
| env: | |
| WORKLOAD: ${{ matrix.workload }} | |
| DURATION: ${{ inputs.slo_workload_duration_seconds || 600 }} | |
| READ_RPS: ${{ inputs.slo_workload_read_max_rps || 1000 }} | |
| WRITE_RPS: ${{ inputs.slo_workload_write_max_rps || 100 }} | |
| CURRENT_REF: ${{ github.head_ref || github.ref_name }} | |
| BASELINE_REF: ${{ steps.baseline.outputs.ref }} | |
| run: | | |
| ARGS="${{ matrix.prefix }}-run grpc://ydb:2136 /Root/testdb \ | |
| --otlp-endpoint http://prometheus:9090/api/v1/otlp/v1/metrics \ | |
| --report-period 250 \ | |
| --time ${DURATION} \ | |
| --read-rps ${READ_RPS} \ | |
| --write-rps ${WRITE_RPS} \ | |
| --read-timeout 1000 \ | |
| --write-timeout 1000" | |
| echo "Starting current workload (ref=${CURRENT_REF}, workload=${WORKLOAD})..." | |
| docker run -d \ | |
| --name ydb-app-current \ | |
| --network ydb_ydb-net \ | |
| --add-host "ydb:172.28.0.11" \ | |
| --add-host "ydb:172.28.0.12" \ | |
| --add-host "ydb:172.28.0.13" \ | |
| --add-host "ydb:172.28.0.99" \ | |
| -e "REF=${CURRENT_REF}" \ | |
| -e "WORKLOAD=${WORKLOAD}" \ | |
| ydb-app-current \ | |
| $ARGS | |
| echo "Starting baseline workload (ref=${BASELINE_REF}, workload=${WORKLOAD})..." | |
| docker run -d \ | |
| --name ydb-app-baseline \ | |
| --network ydb_ydb-net \ | |
| --add-host "ydb:172.28.0.11" \ | |
| --add-host "ydb:172.28.0.12" \ | |
| --add-host "ydb:172.28.0.13" \ | |
| --add-host "ydb:172.28.0.99" \ | |
| -e "REF=${BASELINE_REF}" \ | |
| -e "WORKLOAD=${WORKLOAD}" \ | |
| ydb-app-baseline \ | |
| $ARGS | |
| echo "" | |
| echo "==================== INITIAL CURRENT LOGS ====================" | |
| docker logs -n 15 ydb-app-current 2>&1 || echo "No current container" | |
| echo "" | |
| echo "==================== INITIAL BASELINE LOGS ====================" | |
| docker logs -n 15 ydb-app-baseline 2>&1 || echo "No baseline container" | |
| echo "" | |
| echo "Waiting for workloads to complete (${DURATION}s)..." | |
| sleep ${DURATION} | |
| echo "Stopping containers after ${DURATION}s..." | |
| docker stop --timeout=30 ydb-app-current ydb-app-baseline 2>&1 || true | |
| # Force kill if still running | |
| docker kill ydb-app-current ydb-app-baseline 2>&1 || true | |
| # Check exit codes | |
| CURRENT_EXIT=$(docker inspect ydb-app-current --format='{{.State.ExitCode}}' 2>/dev/null || echo "1") | |
| BASELINE_EXIT=$(docker inspect ydb-app-baseline --format='{{.State.ExitCode}}' 2>/dev/null || echo "1") | |
| echo "Current exit code: ${CURRENT_EXIT}" | |
| echo "Baseline exit code: ${BASELINE_EXIT}" | |
| echo "" | |
| echo "==================== FINAL CURRENT LOGS ====================" | |
| docker logs -n 15 ydb-app-current 2>&1 || echo "No current container" | |
| echo "" | |
| echo "==================== FINAL BASELINE LOGS ====================" | |
| docker logs -n 15 ydb-app-baseline 2>&1 || echo "No baseline container" | |
| echo "" | |
| if [[ "${CURRENT_EXIT}" != "0" || "${BASELINE_EXIT}" != "0" ]]; then | |
| echo "One or both workloads failed." | |
| exit 0 | |
| fi | |
| echo "SUCCESS: Workloads completed successfully" | |
| - if: always() | |
| name: Store logs | |
| run: | | |
| docker logs ydb-app-current > current.log 2>&1 || echo "No current container" > current.log | |
| docker logs ydb-app-baseline > baseline.log 2>&1 || echo "No baseline container" > baseline.log | |
| - if: always() | |
| name: Upload logs | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ydb-python-${{ matrix.workload }}-logs | |
| path: | | |
| ./current.log | |
| ./baseline.log | |
| retention-days: 1 |