Skip to content

Retry SessionExpired in topic streams #1348

Retry SessionExpired in topic streams

Retry SessionExpired in topic streams #1348

Workflow file for this run

name: SLO
on:
pull_request:
types: [opened, reopened, synchronize, labeled]
branches:
- main
workflow_dispatch:
inputs:
github_issue:
description: "GitHub issue / PR number where the SLO report will be posted (optional; will be inferred for PR runs)"
required: false
baseline_ref:
description: "Baseline commit/branch/tag to compare against (leave empty to auto-detect merge-base with main)"
required: false
slo_workload_read_max_rps:
description: "Maximum read RPS for the SLO workload"
required: false
default: "1000"
slo_workload_write_max_rps:
description: "Maximum write RPS for the SLO workload"
required: false
default: "100"
slo_workload_duration_seconds:
description: "Duration of the SLO workload in seconds"
required: false
default: "600"
permissions:
contents: read
pull-requests: write
checks: write
jobs:
ydb-slo-action:
name: Run YDB SLO Tests
runs-on: "large-runner-python-sdk"
# Run on PRs only when labeled "SLO"; allow manual runs via workflow_dispatch
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'SLO') }}
strategy:
fail-fast: false
matrix:
include:
- id: sync-table
prefix: table
workload: sync-table
- id: sync-query
prefix: table
workload: sync-query
concurrency:
group: slo-${{ github.ref }}-${{ matrix.workload }}
cancel-in-progress: true
steps:
- name: Install dependencies
run: |
YQ_VERSION=v4.48.2
BUILDX_VERSION=0.30.1
COMPOSE_VERSION=2.40.3
sudo curl -L https://github.com/mikefarah/yq/releases/download/${YQ_VERSION}/yq_linux_amd64 -o /usr/local/bin/yq && \
sudo chmod +x /usr/local/bin/yq
echo "Updating Docker plugins..."
sudo mkdir -p /usr/local/lib/docker/cli-plugins
echo "Installing Docker Buildx ${BUILDX_VERSION}..."
sudo curl -fLo /usr/local/lib/docker/cli-plugins/docker-buildx \
"https://github.com/docker/buildx/releases/download/v${BUILDX_VERSION}/buildx-v${BUILDX_VERSION}.linux-amd64"
sudo chmod +x /usr/local/lib/docker/cli-plugins/docker-buildx
echo "Installing Docker Compose ${COMPOSE_VERSION}..."
sudo curl -fLo /usr/local/lib/docker/cli-plugins/docker-compose \
"https://github.com/docker/compose/releases/download/v${COMPOSE_VERSION}/docker-compose-linux-x86_64"
sudo chmod +x /usr/local/lib/docker/cli-plugins/docker-compose
echo "Installed versions:"
yq --version
docker --version
docker buildx version
docker compose version
- name: Checkout current version
uses: actions/checkout@v5
with:
path: current
fetch-depth: 0
- name: Determine baseline commit
id: baseline
shell: bash
run: |
cd current
if [[ -n "${{ inputs.baseline_ref }}" ]]; then
BASELINE="${{ inputs.baseline_ref }}"
else
BASELINE=$(git merge-base HEAD origin/main)
fi
echo "sha=$BASELINE" >> $GITHUB_OUTPUT
# Try to determine a human-readable ref name for baseline
# Check if baseline is on main
if git merge-base --is-ancestor $BASELINE origin/main && \
[ "$(git rev-parse origin/main)" = "$BASELINE" ]; then
BASELINE_REF="main"
else
# Try to find a branch containing this commit
BRANCH=$(git branch -r --contains $BASELINE | grep -v HEAD | head -1 | sed 's/.*\///' || echo "")
if [ -n "$BRANCH" ]; then
BASELINE_REF="${BRANCH}@${BASELINE:0:7}"
else
BASELINE_REF="${BASELINE:0:7}"
fi
fi
echo "ref=$BASELINE_REF" >> $GITHUB_OUTPUT
- name: Checkout baseline version
uses: actions/checkout@v5
with:
ref: ${{ steps.baseline.outputs.sha }}
path: baseline
fetch-depth: 1
- name: Show Docker versions
run: |
docker --version
docker compose version
- name: Build workload images (current + baseline)
run: |
docker build \
-f "$GITHUB_WORKSPACE/current/tests/slo/Dockerfile" \
-t "ydb-app-current" \
"$GITHUB_WORKSPACE/current"
docker build \
-f "$GITHUB_WORKSPACE/baseline/tests/slo/Dockerfile" \
-t "ydb-app-baseline" \
"$GITHUB_WORKSPACE/baseline"
- name: Initialize YDB SLO
id: ydb_slo
uses: ydb-platform/ydb-slo-action/init@13c687b7d4b2879da79dd12932dee0ed2b65dd1c
with:
github_issue: ${{ github.event.pull_request.number || inputs.github_issue }}
github_token: ${{ secrets.GITHUB_TOKEN }}
workload_name: ydb-python-${{ matrix.workload }}
workload_current_ref: ${{ github.head_ref || github.ref_name }}
workload_baseline_ref: ${{ steps.baseline.outputs.ref }}
- name: Prepare SLO Database
run: |
docker run --rm \
--network ydb_ydb-net \
--add-host "ydb:172.28.0.11" \
--add-host "ydb:172.28.0.12" \
--add-host "ydb:172.28.0.13" \
--add-host "ydb:172.28.0.99" \
-e "WORKLOAD=${{ matrix.workload }}" \
-e "REF=${{ github.head_ref || github.ref_name }}" \
ydb-app-current \
${{ matrix.prefix }}-create grpc://ydb:2136 /Root/testdb
- name: Run SLO Tests (current + baseline in parallel)
timeout-minutes: 15
env:
WORKLOAD: ${{ matrix.workload }}
DURATION: ${{ inputs.slo_workload_duration_seconds || 600 }}
READ_RPS: ${{ inputs.slo_workload_read_max_rps || 1000 }}
WRITE_RPS: ${{ inputs.slo_workload_write_max_rps || 100 }}
CURRENT_REF: ${{ github.head_ref || github.ref_name }}
BASELINE_REF: ${{ steps.baseline.outputs.ref }}
run: |
ARGS="${{ matrix.prefix }}-run grpc://ydb:2136 /Root/testdb \
--otlp-endpoint http://prometheus:9090/api/v1/otlp/v1/metrics \
--report-period 250 \
--time ${DURATION} \
--read-rps ${READ_RPS} \
--write-rps ${WRITE_RPS} \
--read-timeout 1000 \
--write-timeout 1000"
echo "Starting current workload (ref=${CURRENT_REF}, workload=${WORKLOAD})..."
docker run -d \
--name ydb-app-current \
--network ydb_ydb-net \
--add-host "ydb:172.28.0.11" \
--add-host "ydb:172.28.0.12" \
--add-host "ydb:172.28.0.13" \
--add-host "ydb:172.28.0.99" \
-e "REF=${CURRENT_REF}" \
-e "WORKLOAD=${WORKLOAD}" \
ydb-app-current \
$ARGS
echo "Starting baseline workload (ref=${BASELINE_REF}, workload=${WORKLOAD})..."
docker run -d \
--name ydb-app-baseline \
--network ydb_ydb-net \
--add-host "ydb:172.28.0.11" \
--add-host "ydb:172.28.0.12" \
--add-host "ydb:172.28.0.13" \
--add-host "ydb:172.28.0.99" \
-e "REF=${BASELINE_REF}" \
-e "WORKLOAD=${WORKLOAD}" \
ydb-app-baseline \
$ARGS
echo ""
echo "==================== INITIAL CURRENT LOGS ===================="
docker logs -n 15 ydb-app-current 2>&1 || echo "No current container"
echo ""
echo "==================== INITIAL BASELINE LOGS ===================="
docker logs -n 15 ydb-app-baseline 2>&1 || echo "No baseline container"
echo ""
echo "Waiting for workloads to complete (${DURATION}s)..."
sleep ${DURATION}
echo "Stopping containers after ${DURATION}s..."
docker stop --timeout=30 ydb-app-current ydb-app-baseline 2>&1 || true
# Force kill if still running
docker kill ydb-app-current ydb-app-baseline 2>&1 || true
# Check exit codes
CURRENT_EXIT=$(docker inspect ydb-app-current --format='{{.State.ExitCode}}' 2>/dev/null || echo "1")
BASELINE_EXIT=$(docker inspect ydb-app-baseline --format='{{.State.ExitCode}}' 2>/dev/null || echo "1")
echo "Current exit code: ${CURRENT_EXIT}"
echo "Baseline exit code: ${BASELINE_EXIT}"
echo ""
echo "==================== FINAL CURRENT LOGS ===================="
docker logs -n 15 ydb-app-current 2>&1 || echo "No current container"
echo ""
echo "==================== FINAL BASELINE LOGS ===================="
docker logs -n 15 ydb-app-baseline 2>&1 || echo "No baseline container"
echo ""
if [[ "${CURRENT_EXIT}" != "0" || "${BASELINE_EXIT}" != "0" ]]; then
echo "One or both workloads failed."
exit 0
fi
echo "SUCCESS: Workloads completed successfully"
- if: always()
name: Store logs
run: |
docker logs ydb-app-current > current.log 2>&1 || echo "No current container" > current.log
docker logs ydb-app-baseline > baseline.log 2>&1 || echo "No baseline container" > baseline.log
- if: always()
name: Upload logs
uses: actions/upload-artifact@v4
with:
name: ydb-python-${{ matrix.workload }}-logs
path: |
./current.log
./baseline.log
retention-days: 1