Skip to content

Improve error handling in with_retry_on_rate_limit to log non-rate-li… #2628

Improve error handling in with_retry_on_rate_limit to log non-rate-li…

Improve error handling in with_retry_on_rate_limit to log non-rate-li… #2628

Workflow file for this run

name: Tests
on:
push:
branches:
- '**'
tags-ignore:
- 'production-*'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
jobs:
build_base:
name: Build base image
runs-on: hetzner
steps:
- uses: actions/checkout@v4
- uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
logout: false
- name: Build and push base image
env:
WEB_BASE_REPO: quay.io/gumroad/web_base
run: |
set -e
GREEN='\033[0;32m'
NC='\033[0m'
logger() {
echo -e "${GREEN}$(date '+%Y/%m/%d %H:%M:%S') build.sh: $1${NC}"
}
logger "pulling quay.io/gumroad/ruby:$(cat .ruby-version)-slim-bullseye"
docker pull --quiet quay.io/gumroad/ruby:$(cat .ruby-version)-slim-bullseye
WEB_BASE_SHA=$(WEB_BASE_DOCKERFILE_FROM=quay.io/gumroad/ruby:$(cat .ruby-version)-slim-bullseye docker/base/generate_tag_for_web_base.sh)
if ! docker manifest inspect $WEB_BASE_REPO:$WEB_BASE_SHA > /dev/null 2>&1; then
logger "Building $WEB_BASE_REPO:$WEB_BASE_SHA"
NEW_BASE_REPO=$WEB_BASE_REPO \
WEB_BASE_DOCKERFILE_FROM=quay.io/gumroad/ruby:$(cat .ruby-version)-slim-bullseye \
BUNDLE_GEMS__CONTRIBSYS__COM=${{ secrets.BUNDLE_GEMS__CONTRIBSYS__COM }} \
make build_base
logger "Pushing $WEB_BASE_REPO:$WEB_BASE_SHA"
for i in {1..3}; do
logger "Push attempt $i"
if docker push --quiet $WEB_BASE_REPO:$WEB_BASE_SHA; then
logger "Pushed $WEB_BASE_REPO:$WEB_BASE_SHA"
break
elif [ $i -eq 3 ]; then
logger "Failed to push $WEB_BASE_REPO:$WEB_BASE_SHA"
exit 1
else
sleep 5
fi
done
else
logger "$WEB_BASE_REPO:$WEB_BASE_SHA already exists"
fi
build_test:
runs-on: hetzner
name: Build test image
needs: build_base
steps:
- uses: actions/checkout@v4
- uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
logout: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.BUILD_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.BUILD_AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_DEFAULT_REGION }}
- name: Cache S3 credentials files for tests
id: cache-s3-files
uses: actions/cache@v3
with:
path: |
lib/GeoIP2-City.mmdb
config/credentials.yml.enc
config/certs/development_com.GRD.iOSCreator.pem
config/certs/production_com.GRD.iOSCreator.pem
key: s3-files-${{ hashFiles('**/*.mmdb', '**/credentials.yml.enc', '**/development_com.GRD.iOSCreator.pem') }}-2025-06-17
- name: Download files from S3
if: steps.cache-s3-files.outputs.cache-hit != 'true'
run: |
for file in \
"GeoIP2-City.mmdb:lib/GeoIP2-City.mmdb" \
"credentials.yml.enc:config/credentials.yml.enc" \
"certs/development_com.GRD.iOSCreator.pem:config/certs/development_com.GRD.iOSCreator.pem" \
"certs/development_com.GRD.iOSCreator.pem:config/certs/production_com.GRD.iOSCreator.pem"; do
src="s3://gumroad-secrets/credentials/development/${file%:*}"
dst="${file#*:}"
aws s3 cp "$src" "$dst"
done
- name: Build and push test image
env:
WEB_BASE_REPO: quay.io/gumroad/web_base
WEB_BASE_TEST_REPO: quay.io/gumroad/web_base_test
WEB_REPO: quay.io/gumroad/web
REVISION: ${{ github.sha }}
run: |
set -e
GREEN='\033[0;32m'
NC='\033[0m'
logger() {
echo -e "${GREEN}$(date '+%Y/%m/%d %H:%M:%S') build.sh: $1${NC}"
}
docker pull --quiet quay.io/gumroad/ruby:$(cat .ruby-version)-slim-bullseye
WEB_BASE_TEST_SHA=$(WEB_BASE_DOCKERFILE_FROM=quay.io/gumroad/ruby:$(cat .ruby-version)-slim-bullseye docker/base/generate_tag_for_web_base_test.sh)
build_and_push() {
local repo=$1
local tag=$2
local make_target=$3
if ! docker manifest inspect $repo:$tag > /dev/null 2>&1; then
logger "building $repo:$tag"
NEW_BASE_REPO=$WEB_BASE_REPO NEW_WEB_BASE_TEST_REPO=$WEB_BASE_TEST_REPO NEW_WEB_REPO=$WEB_REPO \
WEB_BASE_DOCKERFILE_FROM=quay.io/gumroad/ruby:$(cat .ruby-version)-slim-bullseye \
BRANCH_CACHE_UPLOAD_ENABLED=false \
BRANCH_CACHE_RESTORE_ENABLED=false \
NEW_WEB_TAG=$REVISION \
COMPOSE_PROJECT_NAME=web_${{ github.run_id }}_${{ github.run_attempt }} \
RAILS_MASTER_KEY=${{ secrets.RAILS_MASTER_KEY }} \
make $make_target
logger "pushing $repo:$tag"
for i in {1..3}; do
logger "Push attempt $i"
if docker push --quiet $repo:$tag; then
logger "Pushed $repo:$tag"
break
elif [ $i -eq 3 ]; then
logger "Failed to push $repo:$tag"
exit 1
else
sleep 5
fi
done
else
logger "$repo:$tag already exists"
fi
}
build_and_push $WEB_BASE_TEST_REPO $WEB_BASE_TEST_SHA build_base_test
build_and_push $WEB_REPO test-$REVISION build_test
test_fast:
name: Test Fast ${{ matrix.ci_node_index }}
env:
COMPOSE_PROJECT_NAME: web_${{ github.run_id }}_${{ github.run_attempt }}_fast_${{ matrix.ci_node_index }}
runs-on: hetzner
needs: build_test
strategy:
fail-fast: false
matrix:
ci_node_total: [15]
ci_node_index: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
steps:
- uses: actions/checkout@v4
- uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
logout: false
- name: Start services
env:
COMPOSE_HTTP_TIMEOUT: 300
run: docker compose -f docker/docker-compose-test-and-ci.yml up -d
- name: Wait for services
env:
WEB_REPO: quay.io/gumroad/web
run: |
docker run --rm --network ${{ env.COMPOSE_PROJECT_NAME }}_default \
$WEB_REPO:test-${{ github.sha }} \
docker/ci/wait_on_connection.sh db_test 3306
timeout-minutes: 5
- name: Setup test database
env:
WEB_REPO: quay.io/gumroad/web
run: |
docker run --rm --network ${{ env.COMPOSE_PROJECT_NAME }}_default \
-e RAILS_ENV=test \
-e RAILS_MASTER_KEY=${{ secrets.RAILS_MASTER_KEY }} \
$WEB_REPO:test-${{ github.sha }} \
bundle exec rake db:setup
- name: Run tests
env:
WEB_REPO: quay.io/gumroad/web
run: |
docker run --rm --network ${{ env.COMPOSE_PROJECT_NAME }}_default \
-e RAILS_MASTER_KEY=${{ secrets.RAILS_MASTER_KEY }} \
-e RUBY_YJIT_ENABLE=1 \
-e KNAPSACK_PRO_TEST_SUITE_TOKEN_RSPEC=${{ secrets.KNAPSACK_PRO_TEST_SUITE_TOKEN_RSPEC_FAST }} \
-e KNAPSACK_PRO_CI_NODE_TOTAL=${{ matrix.ci_node_total }} \
-e KNAPSACK_PRO_CI_NODE_INDEX=${{ matrix.ci_node_index }} \
-e KNAPSACK_PRO_LOG_LEVEL=info \
-e KNAPSACK_PRO_RSPEC_SPLIT_BY_TEST_EXAMPLES=true \
-e KNAPSACK_PRO_TEST_FILE_PATTERN="spec/**/*_spec.rb" \
-e KNAPSACK_PRO_TEST_FILE_EXCLUDE_PATTERN="spec/requests/**/*_spec.rb" \
-e KNAPSACK_PRO_COMMIT_HASH=${{ github.sha }} \
-e KNAPSACK_PRO_BRANCH=${{ github.ref_name }} \
-e KNAPSACK_PRO_CI_NODE_BUILD_ID=${{ github.run_id }} \
-e KNAPSACK_PRO_CI_NODE_RETRY_COUNT=${{ github.run_attempt }} \
-e KNAPSACK_PRO_PROJECT_DIR=/app \
-e KNAPSACK_PRO_FIXED_QUEUE_SPLIT=true \
-e S3_DELETER_ACCESS_KEY_ID=${{ secrets.S3_DELETER_ACCESS_KEY_ID }} \
-e S3_DELETER_SECRET_ACCESS_KEY=${{ secrets.S3_DELETER_SECRET_ACCESS_KEY }} \
-e CI=true \
-e IN_DOCKER=true \
$WEB_REPO:test-${{ github.sha }} \
/usr/local/bin/gosu app bundle exec rake "knapsack_pro:queue:rspec[--format RSpec::Github::Formatter --tag ~skip --format progress]"
timeout-minutes: 15
- name: Clean up
if: always()
run: |
docker compose -f docker/docker-compose-test-and-ci.yml down -v 2>/dev/null || true
docker rm -f $(docker ps -aq -f name=${{ env.COMPOSE_PROJECT_NAME }}) 2>/dev/null || true
docker volume rm $(docker volume ls -q -f name=${{ env.COMPOSE_PROJECT_NAME }}) 2>/dev/null || true
docker network rm ${{ env.COMPOSE_PROJECT_NAME }}_default 2>/dev/null || true
test_slow:
name: Test Slow ${{ matrix.ci_node_index }}
env:
COMPOSE_PROJECT_NAME: web_${{ github.run_id }}_${{ github.run_attempt }}_slow_${{ matrix.ci_node_index }}
WEB_REPO: quay.io/gumroad/web
runs-on: ubicloud-standard-4
needs: build_test
strategy:
fail-fast: false
matrix:
ci_node_total: [45]
ci_node_index:
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
]
steps:
- uses: actions/checkout@v4
- uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
logout: false
- name: Start services
env:
COMPOSE_HTTP_TIMEOUT: 300
run: docker compose -f docker/docker-compose-test-and-ci.yml up -d
- name: Wait for services
env:
WEB_REPO: quay.io/gumroad/web
run: |
docker run --rm --network ${{ env.COMPOSE_PROJECT_NAME }}_default \
$WEB_REPO:test-${{ github.sha }} \
docker/ci/wait_on_connection.sh db_test 3306
timeout-minutes: 5
- name: Setup test database
env:
WEB_REPO: quay.io/gumroad/web
run: |
docker run --rm --network ${{ env.COMPOSE_PROJECT_NAME }}_default \
-e RAILS_ENV=test \
-e RAILS_MASTER_KEY=${{ secrets.RAILS_MASTER_KEY }} \
$WEB_REPO:test-${{ github.sha }} \
bundle exec rake db:setup
- name: Run tests
env:
RAILS_MASTER_KEY: ${{ secrets.RAILS_MASTER_KEY }}
RUBY_YJIT_ENABLE: 1
KNAPSACK_PRO_TEST_SUITE_TOKEN_RSPEC: ${{ secrets.KNAPSACK_PRO_TEST_SUITE_TOKEN_RSPEC_FAST }}
KNAPSACK_PRO_CI_NODE_TOTAL: ${{ matrix.ci_node_total }}
KNAPSACK_PRO_CI_NODE_INDEX: ${{ matrix.ci_node_index }}
KNAPSACK_PRO_LOG_LEVEL: info
KNAPSACK_PRO_RSPEC_SPLIT_BY_TEST_EXAMPLES: true
KNAPSACK_PRO_TEST_FILE_PATTERN: "spec/requests/**/*_spec.rb"
KNAPSACK_PRO_COMMIT_HASH: ${{ github.sha }}
KNAPSACK_PRO_BRANCH: ${{ github.ref_name }}
KNAPSACK_PRO_CI_NODE_BUILD_ID: ${{ github.run_id }}
KNAPSACK_PRO_CI_NODE_RETRY_COUNT: ${{ github.run_attempt }}
KNAPSACK_PRO_PROJECT_DIR: /app
KNAPSACK_PRO_FIXED_QUEUE_SPLIT: true
CI: true
IN_DOCKER: true
WEB_REPO: quay.io/gumroad/web
run: |
# Create local directory for artifacts
mkdir -p ./capybara-artifacts
chmod 777 ./capybara-artifacts
# Run tests with volume mount to capture capybara artifacts
docker run --rm --network ${{ env.COMPOSE_PROJECT_NAME }}_default \
-v "$(pwd)/capybara-artifacts:/app/tmp/capybara" \
-e RAILS_MASTER_KEY \
-e RUBY_YJIT_ENABLE \
-e KNAPSACK_PRO_TEST_SUITE_TOKEN_RSPEC \
-e KNAPSACK_PRO_CI_NODE_TOTAL \
-e KNAPSACK_PRO_CI_NODE_INDEX \
-e KNAPSACK_PRO_LOG_LEVEL \
-e KNAPSACK_PRO_RSPEC_SPLIT_BY_TEST_EXAMPLES \
-e KNAPSACK_PRO_TEST_FILE_PATTERN \
-e KNAPSACK_PRO_COMMIT_HASH \
-e KNAPSACK_PRO_BRANCH \
-e KNAPSACK_PRO_CI_NODE_BUILD_ID \
-e KNAPSACK_PRO_CI_NODE_RETRY_COUNT \
-e KNAPSACK_PRO_PROJECT_DIR \
-e KNAPSACK_PRO_FIXED_QUEUE_SPLIT \
-e CI \
-e IN_DOCKER \
$WEB_REPO:test-${{ github.sha }} \
/usr/local/bin/gosu app bundle exec rake "knapsack_pro:queue:rspec[--format RSpec::Github::Formatter --tag ~skip --format progress]"
timeout-minutes: 20
- name: Archive capybara artifacts
if: failure()
uses: actions/upload-artifact@v4
with:
name: capybara-screenshots-slow-${{ matrix.ci_node_index }}-attempt-${{ github.run_attempt }}
path: ./capybara-artifacts/*.png
retention-days: 7
if-no-files-found: ignore
- name: Clean up
if: always()
run: |
docker compose -f docker/docker-compose-test-and-ci.yml down -v 2>/dev/null || true
docker rm -f $(docker ps -aq -f name=${{ env.COMPOSE_PROJECT_NAME }}) 2>/dev/null || true
docker volume rm $(docker volume ls -q -f name=${{ env.COMPOSE_PROJECT_NAME }}) 2>/dev/null || true
docker network rm ${{ env.COMPOSE_PROJECT_NAME }}_default 2>/dev/null || true
unblock_deployment_from_buildkite:
name: Unblock deployment from Buildkite
runs-on: ubicloud-standard-4
needs: [test_fast, test_slow]
if: success() && github.ref == 'refs/heads/main'
steps:
- name: Unblock corresponding Buildkite build
uses: nick-fields/retry@v3
with:
timeout_seconds: 20
retry_wait_seconds: 300
max_attempts: 3
command: |
echo "Looking for Buildkite build for commit $COMMIT_SHA on pipeline $ORG_SLUG/$PIPELINE_SLUG..."
# Fetch builds for the specific commit, including jobs
BUILD_DATA=$(curl -s -H "Authorization: Bearer $BUILDKITE_API_TOKEN" \
"https://api.buildkite.com/v2/organizations/$ORG_SLUG/pipelines/$PIPELINE_SLUG/builds?commit=$COMMIT_SHA&include_retried_jobs=true")
# Extract the latest build number that is currently blocked for this commit
BUILD_NUMBER=$(echo "$BUILD_DATA" | jq -r --arg commit "$COMMIT_SHA" '[.[] | select(.commit == $commit and .blocked == true)] | sort_by(.created_at) | last | .number // empty')
if [ -z "$BUILD_NUMBER" ]; then
echo "No blocked Buildkite build found for commit $COMMIT_SHA on pipeline $ORG_SLUG/$PIPELINE_SLUG."
exit 1
fi
echo "Found blocked Buildkite build number: $BUILD_NUMBER. Fetching job details..."
# Fetch detailed data for the specific build to get job IDs
BUILD_DETAIL_DATA=$(curl -s -H "Authorization: Bearer $BUILDKITE_API_TOKEN" \
"https://api.buildkite.com/v2/organizations/$ORG_SLUG/pipelines/$PIPELINE_SLUG/builds/$BUILD_NUMBER")
# Extract the job ID for the block step with key "require-approval"
BLOCK_JOB_ID=$(echo "$BUILD_DETAIL_DATA" | jq -r '.jobs[] | select(.type == "manual" and .step_key == "require-approval") | .id // empty')
if [ -z "$BLOCK_JOB_ID" ]; then
echo "No block job with key 'require-approval' found in build $BUILD_NUMBER."
exit 1
fi
echo "Found block job ID: $BLOCK_JOB_ID for build $BUILD_NUMBER. Attempting to unblock job..."
# Send the unblock request using PUT to the job-specific endpoint
RESPONSE_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X PUT \
-H "Authorization: Bearer $BUILDKITE_API_TOKEN" \
-H "Content-Type: application/json" \
-d '{}' \
"https://api.buildkite.com/v2/organizations/$ORG_SLUG/pipelines/$PIPELINE_SLUG/builds/$BUILD_NUMBER/jobs/$BLOCK_JOB_ID/unblock")
# Check the response code
if [ "$RESPONSE_CODE" -ge 200 ] && [ "$RESPONSE_CODE" -lt 300 ]; then
echo "Successfully sent unblock request for job $BLOCK_JOB_ID in build $BUILD_NUMBER. Response code: $RESPONSE_CODE"
else
echo "Error sending unblock request for job $BLOCK_JOB_ID in build $BUILD_NUMBER. Response code: $RESPONSE_CODE"
# Attempt to fetch error details from Buildkite if available
ERROR_DETAILS=$(curl -s -H "Authorization: Bearer $BUILDKITE_API_TOKEN" \
"https://api.buildkite.com/v2/organizations/$ORG_SLUG/pipelines/$PIPELINE_SLUG/builds/$BUILD_NUMBER/jobs/$BLOCK_JOB_ID")
echo "API Response details for failed unblock: $ERROR_DETAILS"
exit 1 # Exit with error if the API call failed
fi
env:
BUILDKITE_API_TOKEN: ${{ secrets.BUILDKITE_API_TOKEN }}
COMMIT_SHA: ${{ github.sha }}
ORG_SLUG: gumroad-inc
PIPELINE_SLUG: gumroad