Skip to content

feat(i18n): translate onboarding to German (CAT-2326) #59254

feat(i18n): translate onboarding to German (CAT-2326)

feat(i18n): translate onboarding to German (CAT-2326) #59254

name: Docker Build, Scan, Test
on:
workflow_dispatch:
inputs:
profileName:
description: "Profile name for the smoke-test. Defaults to quickstart-consumers if not specified"
required: false
default: "quickstart-consumers"
type: string
playwright_shard_count:
description: "Number of parallel shards for Playwright E2E tests"
required: false
default: "5"
type: string
push:
branches:
- master
- releases/**
pull_request:
types: [opened, synchronize, reopened]
branches:
- "**"
paths-ignore:
- ".github/**"
- "!.github/workflows/docker-unified.yml"
- "!.github/scripts/send_failed_tests_to_posthog.py"
- "!.github/scripts/docker_helpers.sh"
- "!.github/actions/ci-optimization"
- "!.github/actions/restore-dependency-caches"
- "!.github/scripts/check_python_package.py"
- "!.github/scripts/parse_failed_cypress_tests.py"
- "!.github/scripts/parse_failed_pytest_tests.py"
- "!.github/scripts/docker_logs.sh"
- "!.github/actions/smoke-test-retry"
release:
types: [published]
concurrency:
# Using `github.run_id` (unique val) instead of `github.ref` here
# because we don't want to cancel this workflow on master only for PRs
# as that makes reproducing issues easier
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}
cancel-in-progress: true
env:
DOCKER_REGISTRY: "acryldata"
PROFILE_NAME: "${{ github.event.inputs.profileName || 'quickstart-consumers' }}"
DOCKER_CACHE: "DEPOT"
DEPOT_PROJECT_ID: "${{ vars.DEPOT_PROJECT_ID }}"
HAS_DEPOT_LABEL: ${{ github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'depot') }}
IS_FORK: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository }}
DEPOT_TOKEN: "${{ secrets.DEPOT_TOKEN }}"
PLAYWRIGHT_SHARD_COUNT: "5"
permissions:
contents: read
id-token: write
jobs:
setup:
runs-on: depot-ubuntu-24.04-small
outputs:
# TODO: Many of the vars below should not be required anymore.
tag: ${{ steps.tag.outputs.tag }}
slim_tag: ${{ steps.tag.outputs.slim_tag }}
full_tag: ${{ steps.tag.outputs.full_tag }}
short_sha: ${{ steps.tag.outputs.short_sha }} # needed for auto-deploy
unique_tag: ${{ steps.tag.outputs.unique_tag }}
unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
docker-login: ${{ steps.docker-login.outputs.docker-login }}
publish: ${{ steps.publish.outputs.publish }}
pr-publish: ${{ steps.pr-publish.outputs.publish }}
python_release_version: ${{ steps.tag.outputs.python_release_version }}
branch_name: ${{ steps.tag.outputs.branch_name }}
repository_name: ${{ steps.tag.outputs.repository_name }}
frontend_change: ${{ steps.ci-optimize.outputs.frontend-change == 'true' || github.event_name != 'pull_request' }}
actions_change: ${{ steps.ci-optimize.outputs.actions-change == 'true' || github.event_name != 'pull_request'}}
ingestion_change: ${{ steps.ci-optimize.outputs.ingestion-change == 'true' || github.event_name != 'pull_request' }}
ingestion_base_change: ${{ steps.ci-optimize.outputs.ingestion-base-change == 'true' }}
backend_change: ${{ steps.ci-optimize.outputs.backend-change == 'true' || github.event_name != 'pull_request' }}
frontend_only: ${{ steps.ci-optimize.outputs.frontend-only == 'true' }}
ingestion_only: ${{ steps.ci-optimize.outputs.ingestion-only == 'true' }}
connector_source_only: ${{ steps.ci-optimize.outputs.connector-source-only == 'true' }}
backend_only: ${{ steps.ci-optimize.outputs.backend-only == 'true' }}
kafka_setup_change: ${{ steps.ci-optimize.outputs.kafka-setup-change == 'true' }}
smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }}
java_client_change: ${{ steps.ci-optimize.outputs.java-client-change == 'true' || github.event_name != 'pull_request' }}
integrations_service_change: "false"
datahub_executor_change: "false"
build_runner_type: ${{ steps.set-runner.outputs.build_runner_type }}
test_runner_type: ${{ steps.set-runner.outputs.test_runner_type }}
test_runner_type_small: ${{ steps.set-runner.outputs.test_runner_type_small }}
use_depot_cache: ${{ steps.set-runner.outputs.use_depot_cache }}
uv_cache_key: ${{ steps.uv-cache-key.outputs.uv_cache_key }}
uv_cache_key_prefix: ${{ steps.uv-cache-key.outputs.uv_cache_key_prefix }}
yarn_cache_key: ${{ steps.yarn-cache-key.outputs.yarn_cache_key }}
yarn_cache_key_prefix: ${{ steps.yarn-cache-key.outputs.yarn_cache_key_prefix }}
playwright_yarn_cache_key: ${{ steps.playwright-yarn-cache-key.outputs.playwright_yarn_cache_key }}
playwright_yarn_cache_key_prefix: ${{ steps.playwright-yarn-cache-key.outputs.playwright_yarn_cache_key_prefix }}
playwright_matrix: ${{ steps.set-playwright-matrix.outputs.matrix }}
playwright_change: ${{ steps.ci-optimize.outputs.playwright-change == 'true' }}
smoke_build_task: ${{ steps.smoke-profile.outputs.smoke_build_task }}
smoke_profile_name: ${{ steps.smoke-profile.outputs.smoke_profile_name }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7
# Explicitly checkout head commit, as tag computation depends on it
with:
checkout-head-only: true
- name: Compute Tag
id: tag
env:
GITHUB_REF_FALLBACK: ${{ github.event_name == 'release' && format('refs/tags/{0}', github.event.release.tag_name) || github.ref}}
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
run: |
source .github/scripts/docker_helpers.sh
{
echo "short_sha=${SHORT_SHA}"
echo "tag=$(get_tag)"
echo "slim_tag=$(get_tag_slim)"
echo "full_tag=$(get_tag_full)"
echo "unique_tag=$(get_unique_tag)"
echo "unique_slim_tag=$(get_unique_tag_slim)"
echo "unique_full_tag=$(get_unique_tag_full)"
echo "python_release_version=$(get_python_docker_release_v)"
echo "branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}"
echo "repository_name=${GITHUB_REPOSITORY#*/}"
} >> "$GITHUB_OUTPUT"
- name: Check whether docker login is possible
id: docker-login
env:
ENABLE_DOCKER_LOGIN: ${{ secrets.ACRYL_DOCKER_PASSWORD != '' }}
run: |
echo "Enable Docker Login: ${{ env.ENABLE_DOCKER_LOGIN }}"
echo "docker-login=${{ env.ENABLE_DOCKER_LOGIN }}" >> "$GITHUB_OUTPUT"
- name: Check whether publishing enabled
id: publish
env:
ENABLE_PUBLISH: >-
${{
(github.event_name == 'release' || ((github.event_name == 'workflow_dispatch' || github.event_name == 'push') && github.ref == 'refs/heads/master'))
&& ( secrets.ACRYL_DOCKER_PASSWORD != '' )
}}
run: |
echo "Enable publish: ${{ env.ENABLE_PUBLISH }}"
echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"
- name: Check whether PR publishing enabled
id: pr-publish
env:
ENABLE_PUBLISH: >-
${{
(github.event_name == 'pull_request' && (contains(github.event.pull_request.labels.*.name, 'publish') || contains(github.event.pull_request.labels.*.name, 'publish-docker')))
&& ( secrets.ACRYL_DOCKER_PASSWORD != '' )
}}
run: |
echo "Enable PR publish: ${{ env.ENABLE_PUBLISH }}"
echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"
- name: Resolve smoke test profile from PR label
id: smoke-profile
if: ${{ github.event_name == 'pull_request' }}
env:
PR_LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }}
run: |
CONFIG_KEY=$(echo "$PR_LABELS" | jq -r '[.[] | select(startswith("smoke:"))] | first // empty' | sed 's/^smoke://')
if [[ -z "$CONFIG_KEY" ]]; then
echo "No smoke: label found, using defaults"
exit 0
fi
echo "Found smoke label config key: $CONFIG_KEY"
PROFILE_NAME=$(./gradlew :docker:resolveQuickstartProfile -PconfigKey="$CONFIG_KEY" -q 2>&1 | tail -1)
if [[ -z "$PROFILE_NAME" ]]; then
echo "::error::Failed to resolve profile for config key: $CONFIG_KEY"
exit 1
fi
echo "Resolved compose profile: $PROFILE_NAME"
{
echo "smoke_build_task=:docker:buildImages${CONFIG_KEY}"
echo "smoke_profile_name=${PROFILE_NAME}"
} >> "$GITHUB_OUTPUT"
- uses: ./.github/actions/ci-optimization
id: ci-optimize
- name: Determine runner type
id: set-runner
# This needs to handle two scenarios:
# 1. Running on a PR from a fork. We use github runners, unless the "depot" label exists -- in which case, we run
# it on depotNote, concurrency is lower when using github runners, queue times can be longer, test time is longer
# due to fewer parallel jobs.
# 3. Running on a PR from a branch in the datahub-project org and push/schedule events on master.
# Depot is used here for remote container builds in base_build and also for all runners. Depot runners support unlimited concurrency
# and hence short queue times and higher parallelism of smoke tests
run: |
if [[ "${{ env.DOCKER_CACHE }}" == "DEPOT" && "${{ env.IS_FORK }}" == "false" ]]; then
{
echo "build_runner_type=depot-ubuntu-24.04-4"
echo "test_runner_type=depot-ubuntu-24.04-4"
echo "test_runner_type_small=depot-ubuntu-24.04-small"
echo "use_depot_cache=true"
} >> "$GITHUB_OUTPUT"
else
echo "build_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT"
if [[ "${{ env.HAS_DEPOT_LABEL }}" == "true" ]]; then
echo "test_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT"
else
echo "test_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT"
fi
{
echo "test_runner_type_small=ubuntu-latest"
echo "use_depot_cache=false"
} >> "$GITHUB_OUTPUT"
# publishing is currently only supported via depot
fi
- name: Compute UV Cache Key
id: uv-cache-key
run: |
echo "uv_cache_key=docker-unified-${{ runner.os }}-uv-${{ hashFiles(
'./datahub-actions/pyproject.toml',
'./datahub-actions/setup.py',
'./smoke-test/requirements.txt',
'./smoke-test/pyproject.toml',
'./metadata-ingestion/pyproject.toml',
'./metadata-ingestion/setup.py') }}" >> "$GITHUB_OUTPUT"
echo "uv_cache_key_prefix=docker-unified-${{ runner.os }}-uv-" >> "$GITHUB_OUTPUT"
- name: Compute Yarn Cache Key
id: yarn-cache-key
run: |
echo "yarn_cache_key=docker-unified-${{ runner.os }}-yarn-${{ hashFiles('./smoke-test/tests/cypress/yarn.lock', './datahub-web-react/yarn.lock') }}" >> "$GITHUB_OUTPUT"
echo "yarn_cache_key_prefix=docker-unified-${{ runner.os }}-yarn-" >> "$GITHUB_OUTPUT"
- name: Compute Playwright Yarn Cache Key
id: playwright-yarn-cache-key
run: |
echo "playwright_yarn_cache_key=docker-unified-playwright-${{ runner.os }}-yarn-${{ hashFiles('./e2e-test/ui/playwright/package.json') }}" >> "$GITHUB_OUTPUT"
echo "playwright_yarn_cache_key_prefix=docker-unified-playwright-${{ runner.os }}-yarn-" >> "$GITHUB_OUTPUT"
- name: Build Playwright shard matrix
id: set-playwright-matrix
if: ${{ steps.publish.outputs.publish == 'true' || steps.ci-optimize.outputs.backend-change == 'true' || steps.ci-optimize.outputs.frontend-change == 'true' || steps.ci-optimize.outputs.playwright-change == 'true' }}
run: |
shard_count=$(( ${{ github.event.inputs.playwright_shard_count || env.PLAYWRIGHT_SHARD_COUNT }} ))
matrix=''
for i in $(seq 1 "$shard_count"); do
if [[ -n "$matrix" ]]; then
matrix="$matrix,"
fi
matrix="${matrix}{\"shard\":\"$i\",\"shard_count\":\"$shard_count\"}"
done
echo "matrix={\"include\":[$matrix]}" >> "$GITHUB_OUTPUT"
{
echo "## Playwright Test Matrix"
echo ""
echo "| Strategy | Total Shards |"
echo "|----------|--------------|"
echo "| playwright | $shard_count |"
} >> "$GITHUB_STEP_SUMMARY"
base_build:
name: Build all images
runs-on: ${{ needs.setup.outputs.build_runner_type }}
needs: setup
timeout-minutes: 60
if: ${{ needs.setup.outputs.use_depot_cache == 'true' }} # On fork, smoke test job does the build since depot cache is not available
outputs:
build_id: ${{ steps.capture-build-id.outputs.build_id }}
matrix: ${{ steps.capture-build-id.outputs.matrix }}
steps:
- name: Set up JDK 21
uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
with:
distribution: "zulu"
java-version: 21
- name: Check out the repo
uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7
- uses: ./.github/actions/restore-dependency-caches
with:
uv_cache_key: ${{ needs.setup.outputs.uv_cache_key }}
uv_cache_key_prefix: ${{ needs.setup.outputs.uv_cache_key_prefix }}
yarn_cache_key: ${{ needs.setup.outputs.yarn_cache_key }}
yarn_cache_key_prefix: ${{ needs.setup.outputs.yarn_cache_key_prefix }}
restore_gradle: true
- name: Set up Depot CLI
if: ${{ env.DOCKER_CACHE == 'DEPOT' }}
uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: "3.11"
cache: "pip"
- name: Login to DockerHub
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
if: ${{ needs.setup.outputs.docker-login == 'true' }}
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
- name: Build all Images (For Smoke tests)
if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' }}
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
BUILD_TASK=":docker:buildImagesAll"
elif [[ -n "${{ needs.setup.outputs.smoke_build_task }}" ]]; then
BUILD_TASK="${{ needs.setup.outputs.smoke_build_task }}"
else
BUILD_TASK=":docker:buildImagesQuickstart"
fi
echo "Using build task: $BUILD_TASK"
./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }}
- name: Build all Images (Publish)
if: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
# Push immutable tags (sha-*, pr*, release v*) to the registry during the coordinated depot bake.
# The floating quickstart tag is applied only after smoke tests pass (see publish_images).
run: |
./gradlew :docker:buildImagesAll -PmatrixBuild=true -Ptag=${{ needs.setup.outputs.tag }} -PshaTag=${{ needs.setup.outputs.unique_tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }} -PdockerPush=true
- name: Capture build Id
id: capture-build-id
run: |
pip install jq
DEPOT_BUILD_ID=$(jq -r '.["depot.build"]?.buildID' "${{ github.workspace }}/build/build-metadata.json")
echo "build_id=${DEPOT_BUILD_ID}" >> "$GITHUB_OUTPUT"
echo "matrix=$(jq -c '{"target":.["depot.build"].targets}' "${{ github.workspace }}/build/build-metadata.json")" >> "$GITHUB_OUTPUT"
- name: Save build Metadata
if: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: build-metadata-${{ needs.setup.outputs.tag }}
path: |
${{ github.workspace }}/build/build-metadata.json
${{ github.workspace }}/build/bake-spec-allImages.json
- uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
if: ${{ github.ref == 'refs/heads/master' }}
with:
path: |
~/.cache/uv
key: ${{ needs.setup.outputs.uv_cache_key }}
- uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
if: ${{ github.ref == 'refs/heads/master' }}
with:
path: |
~/.cache/yarn
key: ${{ needs.setup.outputs.yarn_cache_key }}
- uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
if: ${{ github.ref == 'refs/heads/master' }}
with:
path: |
~/.gradle/wrapper
~/.gradle/caches/modules-2
~/.gradle/caches/jars-*
~/.gradle/caches/transforms-*
key: gradle-plugins-cache
comment_pr_images:
name: Comment PR image tags
runs-on: ubuntu-latest
needs: [setup, base_build]
if: ${{ github.event_name == 'pull_request' && needs.setup.outputs.pr-publish == 'true' }}
permissions:
pull-requests: write
steps:
- name: Upsert PR image comment
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
env:
PR_TAG: ${{ needs.setup.outputs.tag }}
SHORT_SHA: ${{ needs.setup.outputs.short_sha }}
REGISTRY: ${{ env.DOCKER_REGISTRY }}
with:
script: |
const marker = '<!-- pr-docker-images -->';
const { PR_TAG, SHORT_SHA, REGISTRY } = process.env;
const body = [
marker,
'## 🐳 Docker Images Published for Testing',
'',
`Images for this PR have been published to the [\`${REGISTRY}\`](https://hub.docker.com/u/${REGISTRY}) Docker Hub registry, tagged \`${PR_TAG}\`.`,
'',
'Run DataHub quickstart with these images:',
'',
'```bash',
`DATAHUB_VERSION=${PR_TAG} datahub docker quickstart`,
'```',
'',
'Or pull an individual image, e.g.:',
'',
'```bash',
`docker pull ${REGISTRY}/datahub-gms:${PR_TAG}`,
'```',
'',
`Pinned to commit \`${SHORT_SHA}\`.`,
].join('\n');
const { owner, repo } = context.repo;
const issue_number = context.payload.pull_request.number;
const comments = await github.paginate(github.rest.issues.listComments, {
owner,
repo,
issue_number,
});
const existing = comments.find(
(c) => c.user?.type === 'Bot' && c.body?.includes(marker),
);
if (existing) {
await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body });
} else {
await github.rest.issues.createComment({ owner, repo, issue_number, body });
}
smoke_test_matrix:
runs-on: ${{ needs.setup.outputs.test_runner_type_small }}
needs: setup
outputs:
cypress_matrix: ${{ steps.set-matrix.outputs.cypress_matrix }}
pytest_matrix: ${{ steps.set-matrix.outputs.pytest_matrix }}
steps:
- id: set-batch-count
# Tests are split simply to ensure the configured number of batches for parallelization. This may need some
# increase as a new tests added increase the duration where an additional parallel batch helps.
# python_batch_count is used to split pytests in the smoke-test (batches of actual test functions)
# cypress_batch_count is used to split the collection of cypress test specs into batches.
run: |
if [[ "${{ env.IS_FORK }}" == "true" ]]; then
echo "cypress_batch_count=5" >> "$GITHUB_OUTPUT"
echo "python_batch_count=3" >> "$GITHUB_OUTPUT"
else
echo "cypress_batch_count=8" >> "$GITHUB_OUTPUT"
echo "python_batch_count=7" >> "$GITHUB_OUTPUT"
fi
- id: set-matrix
run: |
python_batch_count=${{ steps.set-batch-count.outputs.python_batch_count }}
pytest_items='{"batch":"0","batch_count":"'"$python_batch_count"'"}'
for ((i=1;i<python_batch_count;i++)); do
pytest_items="$pytest_items"',{"batch_count":"'"$python_batch_count"'","batch":"'"$i"'"}'
done
cypress_batch_count=${{ steps.set-batch-count.outputs.cypress_batch_count }}
cypress_items='{"batch":"0","batch_count":"'"$cypress_batch_count"'"}'
for ((i=1;i<cypress_batch_count;i++)); do
cypress_items="$cypress_items"',{"batch_count":"'"$cypress_batch_count"'","batch":"'"$i"'"}'
done
run_both=false
run_pytest=false
run_cypress=false
if [[ "${{ needs.setup.outputs.backend_change }}" == 'true' || "${{ needs.setup.outputs.smoke_test_change }}" == 'true' || "${{ needs.setup.outputs.publish }}" == 'true' ]]; then
run_both=true
elif [[ "${{ needs.setup.outputs.frontend_only }}" == 'true' ]]; then
run_cypress=true
elif [[ "${{ needs.setup.outputs.connector_source_only }}" == 'true' ]]; then
# Connector-only changes (source implementations, their tests, docs) don't affect
# smoke tests. Smoke tests exercise the platform via CLI/SDK/APIs, not individual
# connectors. Skip to save ~140 min of compute.
: # both remain false → empty matrices → both jobs skipped
elif [[ "${{ needs.setup.outputs.ingestion_only }}" == 'true' ]]; then
run_pytest=true
fi
if [[ "$run_both" == 'true' || "$run_pytest" == 'true' ]]; then
echo "pytest_matrix={\"include\":[$pytest_items]}" >> "$GITHUB_OUTPUT"
else
echo "pytest_matrix={\"include\":[]}" >> "$GITHUB_OUTPUT"
fi
if [[ "$run_both" == 'true' || "$run_cypress" == 'true' ]]; then
echo "cypress_matrix={\"include\":[$cypress_items]}" >> "$GITHUB_OUTPUT"
else
echo "cypress_matrix={\"include\":[]}" >> "$GITHUB_OUTPUT"
fi
java_integration_tests:
name: Java SDK V2 Integration Tests
runs-on: ${{ needs.setup.outputs.test_runner_type }}
needs: [setup, base_build]
if: ${{ always() && !failure() && !cancelled() && (needs.setup.outputs.backend_change == 'true' || needs.setup.outputs.java_client_change == 'true') }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7
- uses: ./.github/actions/restore-dependency-caches
with:
uv_cache_key: ${{ needs.setup.outputs.uv_cache_key }}
uv_cache_key_prefix: ${{ needs.setup.outputs.uv_cache_key_prefix }}
yarn_cache_key: ${{ needs.setup.outputs.yarn_cache_key }}
yarn_cache_key_prefix: ${{ needs.setup.outputs.yarn_cache_key_prefix }}
- name: Set up JDK 21
uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
with:
distribution: "zulu"
java-version: 21
- name: Free up disk space
uses: ./.github/actions/free-disk-space
- name: Set up Depot CLI
if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: "3.11"
cache: "pip"
- uses: gradle/actions/setup-gradle@0723195856401067f7a2779048b490ace7a47d7c # v5.0.2
if: ${{ needs.setup.outputs.use_depot_cache != 'true' }}
- name: Login to DockerHub
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
if: ${{ needs.setup.outputs.docker-login == 'true' }}
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
- name: Disk Space Analysis
run: |
echo "=== Disk Usage Overview ==="
df -h
echo -e "\n=== Docker Disk Usage ==="
docker system df -v
- name: Build images
timeout-minutes: 60
if: ${{ needs.setup.outputs.use_depot_cache != 'true' }}
env:
DOCKER_CACHE: GITHUB
run: |
BUILD_TASK="${{ needs.setup.outputs.smoke_build_task || ':docker:buildImagesQuickstartDebugConsumers' }}"
echo "Using build task: $BUILD_TASK"
./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }}
docker images
- name: Pull images from depot
if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
run: |
depot pull --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}"
docker images
- name: Disk Space Analysis
run: |
echo "=== Disk Usage Overview ==="
df -h
echo -e "\n=== Docker Disk Usage ==="
docker system df -v
- name: Run quickstart
env:
DATAHUB_TELEMETRY_ENABLED: false
DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_ACTIONS_IMAGE }}
ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions"
ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
PROFILE_NAME: ${{ needs.setup.outputs.smoke_profile_name || env.PROFILE_NAME }}
run: |
echo "Using compose profile: $PROFILE_NAME"
./smoke-test/run-quickstart.sh
- name: Disk Check
run: df -h . && docker images
- name: Disable ES Disk Threshold
run: |
curl -XPUT "http://localhost:9200/_cluster/settings" \
-H 'Content-Type: application/json' -d'{
"persistent": {
"cluster": {
"routing": {
"allocation.disk.threshold_enabled": false
}
}
}
}'
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Build datahub cli
run: ./gradlew :metadata-ingestion:install
- name: Java SDK V2 Integration Tests
env:
DATAHUB_SERVER: http://localhost:8080
ADMIN_USERNAME: datahub
ADMIN_PASSWORD: datahub
run: |
echo "Running Java SDK V2 integration tests against running DataHub instance..."
./gradlew :metadata-integration:java:datahub-client:test --tests "*Integration*"
- name: Upload Java SDK V2 coverage to Codecov
if: ${{ always() }}
uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
with:
token: ${{ secrets.CODECOV_TOKEN }}
directory: ./build/coverage-reports/metadata-integration/java/datahub-client/
flags: metadata-integration
name: java-sdk-v2-integration
fail_ci_if_error: false
verbose: true
override_branch: ${{ github.head_ref || github.ref_name }}
- name: Report test results
if: (!cancelled())
uses: ./.github/actions/report-test-results
with:
artifact-name: Test Results (java integration tests)
test-results-paths: |
**/build/test-results/**/*.xml
!**/binary/**
junit-file-globs: |
**/build/test-results/**/*.xml
- name: Upload test results to Codecov
if: ${{ !cancelled() }}
uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
with:
token: ${{ secrets.CODECOV_TOKEN }}
report_type: test_results
override_branch: ${{ github.head_ref || github.ref_name }}
- name: Store logs
if: failure()
run: |
docker ps -a
TEST_STRATEGY="-java-integration"
source .github/scripts/docker_logs.sh
- name: Upload logs
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: failure()
with:
name: docker-logs-java-integration
path: "docker_logs/*.log"
retention-days: 5
pytest_tests:
name: Pytest Smoke Tests (Batch ${{ matrix.batch }}/${{ matrix.batch_count }})
runs-on: ${{ needs.setup.outputs.test_runner_type }}
needs: [setup, smoke_test_matrix, base_build]
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.smoke_test_matrix.outputs.pytest_matrix || '{"include":[]}') }}
if: ${{ needs.smoke_test_matrix.outputs.pytest_matrix != '' && needs.smoke_test_matrix.outputs.pytest_matrix != '{"include":[]}' }}
env:
MIXPANEL_API_SECRET: ${{ secrets.MIXPANEL_API_SECRET }}
MIXPANEL_PROJECT_ID: ${{ secrets.MIXPANEL_PROJECT_ID }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7
- uses: ./.github/actions/restore-dependency-caches
with:
uv_cache_key: ${{ needs.setup.outputs.uv_cache_key }}
uv_cache_key_prefix: ${{ needs.setup.outputs.uv_cache_key_prefix }}
yarn_cache_key: ${{ needs.setup.outputs.yarn_cache_key }}
yarn_cache_key_prefix: ${{ needs.setup.outputs.yarn_cache_key_prefix }}
- name: Set up JDK 21
uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
with:
distribution: "zulu"
java-version: 21
- name: Free up disk space
uses: ./.github/actions/free-disk-space
- name: Set up Depot CLI
if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: "3.11"
cache: "pip"
- name: Parse previous results for retry
id: retry-check
if: github.run_attempt > 1
uses: ./.github/actions/smoke-test-retry
with:
test_strategy: pytests
batch: ${{ matrix.batch }}
run_id: ${{ github.run_id }}
github_token: ${{ github.token }}
- uses: gradle/actions/setup-gradle@0723195856401067f7a2779048b490ace7a47d7c # v5.0.2
if: ${{ needs.setup.outputs.use_depot_cache != 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
- name: Login to DockerHub
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
if: ${{ needs.setup.outputs.docker-login == 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
- name: Disk Space Analysis
if: steps.retry-check.outputs.parse_result != 'all_passed'
run: |
echo "=== Disk Usage Overview ==="
df -h
echo -e "\n=== Docker Disk Usage ==="
docker system df -v
- name: Build images
timeout-minutes: 60
if: ${{ needs.setup.outputs.use_depot_cache != 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
env:
DOCKER_CACHE: GITHUB
run: |
BUILD_TASK="${{ needs.setup.outputs.smoke_build_task || ':docker:buildImagesQuickstartDebugConsumers' }}"
echo "Using build task: $BUILD_TASK"
./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }}
docker images
- name: Pull images from depot
if: ${{ needs.setup.outputs.use_depot_cache == 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
run: |
depot pull --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}"
docker images
- name: Disk Space Analysis
if: steps.retry-check.outputs.parse_result != 'all_passed'
run: |
echo "=== Disk Usage Overview ==="
df -h
echo -e "\n=== Docker Disk Usage ==="
docker system df -v
- name: Run quickstart
if: steps.retry-check.outputs.parse_result != 'all_passed'
env:
DATAHUB_TELEMETRY_ENABLED: false
DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_ACTIONS_IMAGE }}
ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions"
ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
PROFILE_NAME: ${{ needs.setup.outputs.smoke_profile_name || env.PROFILE_NAME }}
run: |
echo "Using compose profile: $PROFILE_NAME"
./smoke-test/run-quickstart.sh
- name: Disk Check
if: steps.retry-check.outputs.parse_result != 'all_passed'
run: df -h . && docker images
- name: Disable ES Disk Threshold
if: steps.retry-check.outputs.parse_result != 'all_passed'
run: |
curl -XPUT "http://localhost:9200/_cluster/settings" \
-H 'Content-Type: application/json' -d'{
"persistent": {
"cluster": {
"routing": {
"allocation.disk.threshold_enabled": false
}
}
}
}'
- name: Install dependencies
if: steps.retry-check.outputs.parse_result != 'all_passed'
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Build datahub cli
if: steps.retry-check.outputs.parse_result != 'all_passed'
run: ./gradlew :metadata-ingestion:install
- name: Pytest smoke tests
if: steps.retry-check.outputs.parse_result != 'all_passed'
env:
RUN_QUICKSTART: false
DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
CLEANUP_DATA: "false"
TEST_STRATEGY: pytests
BATCH_COUNT: ${{ matrix.batch_count }}
BATCH_NUMBER: ${{ matrix.batch }}
FILTERED_TESTS: ${{ steps.retry-check.outputs.filtered_tests_file || '' }}
run: |
if [[ -n "$FILTERED_TESTS" && -f "$FILTERED_TESTS" ]]; then
echo "=========================================="
echo "RETRY MODE: Running only failed pytest modules"
echo "=========================================="
echo "Failed modules to retry:"
cat "$FILTERED_TESTS"
echo "=========================================="
elif (( ${{ github.run_attempt }} > 1 )); then
echo "RETRY MODE: Running all tests (fallback)"
fi
echo "$DATAHUB_VERSION"
./gradlew --stop
./smoke-test/smoke.sh
- name: Disk Check
run: df -h . && docker images
- name: Store logs
if: failure()
run: |
docker ps -a
TEST_STRATEGY="-pytests-${{ matrix.batch }}"
source .github/scripts/docker_logs.sh
- name: Upload logs
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: failure()
with:
name: docker-logs-pytests-${{ matrix.batch }}
path: "docker_logs/*.log"
retention-days: 5
- name: Report test results
if: (!cancelled())
uses: ./.github/actions/report-test-results
with:
artifact-name: Test Results (smoke tests) pytests ${{ matrix.batch }}
test-results-paths: |
**/junit.*.xml
!**/binary/**
junit-file-globs: |
**/junit.*.xml
- name: Send failed test metrics to PostHog
if: failure()
continue-on-error: true
env:
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
GH_HEAD_REF: ${{ github.head_ref || github.ref_name }}
run: |
if [ -z "$POSTHOG_API_KEY" ]; then
echo "⚠️ POSTHOG_API_KEY not configured, skipping test failure metrics"
exit 0
fi
TEMP_DIR=$(mktemp -d)
mkdir -p "$TEMP_DIR/test-results"
find . -name "junit.*.xml" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null || true
python3 .github/scripts/send_failed_tests_to_posthog.py \
--input-dir "$TEMP_DIR/test-results" \
--posthog-api-key "$POSTHOG_API_KEY" \
--posthog-host "${POSTHOG_HOST:-https://app.posthog.com}" \
--repository "${{ github.repository }}" \
--workflow-name "${{ github.workflow }}" \
--branch "${GH_HEAD_REF}" \
--run-id "${{ github.run_id }}" \
--run-attempt "${{ github.run_attempt }}" \
--batch "${{ matrix.batch }}" \
--batch-count "${{ strategy.job-total }}" \
--test-strategy "pytests"
rm -rf "$TEMP_DIR"
- name: Upload test results to Codecov
if: ${{ !cancelled() }}
uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
with:
token: ${{ secrets.CODECOV_TOKEN }}
report_type: test_results
override_branch: ${{ github.head_ref || github.ref_name }}
- uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
if: ${{ github.ref == 'refs/heads/master' && matrix.batch == '0' }}
with:
path: ~/.cache/uv
key: ${{ needs.setup.outputs.uv_cache_key }}
- uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
if: ${{ github.ref == 'refs/heads/master' && matrix.batch == '0' }}
with:
path: ~/.cache/yarn
key: ${{ needs.setup.outputs.yarn_cache_key }}
cypress_tests:
name: Cypress Smoke Tests (Batch ${{ matrix.batch }}/${{ matrix.batch_count }})
runs-on: ${{ needs.setup.outputs.test_runner_type }}
needs: [setup, smoke_test_matrix, base_build]
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.smoke_test_matrix.outputs.cypress_matrix || '{"include":[]}') }}
if: ${{ needs.smoke_test_matrix.outputs.cypress_matrix != '' && needs.smoke_test_matrix.outputs.cypress_matrix != '{"include":[]}' }}
env:
MIXPANEL_API_SECRET: ${{ secrets.MIXPANEL_API_SECRET }}
MIXPANEL_PROJECT_ID: ${{ secrets.MIXPANEL_PROJECT_ID }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7
- uses: ./.github/actions/restore-dependency-caches
with:
uv_cache_key: ${{ needs.setup.outputs.uv_cache_key }}
uv_cache_key_prefix: ${{ needs.setup.outputs.uv_cache_key_prefix }}
yarn_cache_key: ${{ needs.setup.outputs.yarn_cache_key }}
yarn_cache_key_prefix: ${{ needs.setup.outputs.yarn_cache_key_prefix }}
- name: Set up JDK 21
uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
with:
distribution: "zulu"
java-version: 21
- name: Free up disk space
uses: ./.github/actions/free-disk-space
- name: Set up Depot CLI
if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: "3.11"
cache: "pip"
- name: Parse previous results for retry
id: retry-check
if: github.run_attempt > 1
uses: ./.github/actions/smoke-test-retry
with:
test_strategy: cypress
batch: ${{ matrix.batch }}
run_id: ${{ github.run_id }}
github_token: ${{ github.token }}
- uses: gradle/actions/setup-gradle@0723195856401067f7a2779048b490ace7a47d7c # v5.0.2
if: ${{ needs.setup.outputs.use_depot_cache != 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
- name: Login to DockerHub
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
if: ${{ needs.setup.outputs.docker-login == 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
- name: Disk Space Analysis
if: steps.retry-check.outputs.parse_result != 'all_passed'
run: |
echo "=== Disk Usage Overview ==="
df -h
echo -e "\n=== Docker Disk Usage ==="
docker system df -v
- name: Build images
timeout-minutes: 60
if: ${{ needs.setup.outputs.use_depot_cache != 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
env:
DOCKER_CACHE: GITHUB
run: |
BUILD_TASK="${{ needs.setup.outputs.smoke_build_task || ':docker:buildImagesQuickstartDebugConsumers' }}"
echo "Using build task: $BUILD_TASK"
./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }}
docker images
- name: Pull images from depot
if: ${{ needs.setup.outputs.use_depot_cache == 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
run: |
depot pull --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}"
docker images
- name: Disk Space Analysis
if: steps.retry-check.outputs.parse_result != 'all_passed'
run: |
echo "=== Disk Usage Overview ==="
df -h
echo -e "\n=== Docker Disk Usage ==="
docker system df -v
- name: Run quickstart
if: steps.retry-check.outputs.parse_result != 'all_passed'
env:
DATAHUB_TELEMETRY_ENABLED: false
DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_ACTIONS_IMAGE }}
ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions"
ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
PROFILE_NAME: ${{ needs.setup.outputs.smoke_profile_name || env.PROFILE_NAME }}
run: |
echo "Using compose profile: $PROFILE_NAME"
./smoke-test/run-quickstart.sh
- name: Disk Check
if: steps.retry-check.outputs.parse_result != 'all_passed'
run: df -h . && docker images
- name: Disable ES Disk Threshold
if: steps.retry-check.outputs.parse_result != 'all_passed'
run: |
curl -XPUT "http://localhost:9200/_cluster/settings" \
-H 'Content-Type: application/json' -d'{
"persistent": {
"cluster": {
"routing": {
"allocation.disk.threshold_enabled": false
}
}
}
}'
- name: Install dependencies
if: steps.retry-check.outputs.parse_result != 'all_passed'
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Build datahub cli
if: steps.retry-check.outputs.parse_result != 'all_passed'
run: ./gradlew :metadata-ingestion:install
- name: Cypress smoke tests
if: steps.retry-check.outputs.parse_result != 'all_passed'
env:
RUN_QUICKSTART: false
DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
CYPRESS_RECORD_KEY: ${{ secrets.CYPRESS_RECORD_KEY }}
CLEANUP_DATA: "false"
TEST_STRATEGY: cypress
BATCH_COUNT: ${{ matrix.batch_count }}
BATCH_NUMBER: ${{ matrix.batch }}
FILTERED_TESTS: ${{ steps.retry-check.outputs.filtered_tests_file || '' }}
run: |
if [[ -n "$FILTERED_TESTS" && -f "$FILTERED_TESTS" ]]; then
echo "=========================================="
echo "RETRY MODE: Running only failed Cypress tests"
echo "=========================================="
echo "Failed tests to retry:"
cat "$FILTERED_TESTS"
echo "=========================================="
elif (( ${{ github.run_attempt }} > 1 )); then
echo "RETRY MODE: Running all tests (fallback)"
fi
echo "$DATAHUB_VERSION"
./gradlew --stop
./smoke-test/smoke.sh
- name: Disk Check
run: df -h . && docker images
- name: Store logs
if: failure()
run: |
docker ps -a
TEST_STRATEGY="-cypress-${{ matrix.batch }}"
source .github/scripts/docker_logs.sh
- name: Upload logs
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: failure()
with:
name: docker-logs-cypress-${{ matrix.batch }}
path: "docker_logs/*.log"
retention-days: 5
- name: Stage Cypress screenshots for artifact upload
# Copy screenshots alongside the mochawesome JSONs so the post-matrix
# cypress_html_report job can embed them into the unified HTML report.
if: always()
run: |
if [ -d smoke-test/tests/cypress/cypress/screenshots ]; then
cp -r smoke-test/tests/cypress/cypress/screenshots \
smoke-test/tests/cypress/build/mochawesome-report/screenshots
fi
- name: Upload Cypress mochawesome JSON results
# cypress-mochawesome-reporter writes one JSON per spec to .jsons/ (a hidden subdir)
# via the Cypress after:spec hook — survives Electron crashes. include-hidden-files is
# required so upload-artifact picks up that dot-prefixed directory.
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: always()
with:
name: cypress-mochawesome-json-${{ matrix.batch }}
path: smoke-test/tests/cypress/build/mochawesome-report/
include-hidden-files: true
retention-days: 1
- name: Upload screenshots
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: failure()
with:
name: cypress-snapshots-cypress-${{ matrix.batch }}
path: smoke-test/tests/cypress/cypress/screenshots/
- name: Report test results
if: (!cancelled())
uses: ./.github/actions/report-test-results
with:
artifact-name: Test Results (smoke tests) cypress ${{ matrix.batch }}
test-results-paths: |
**/smoke-test-results/cypress-test-*.xml
!**/binary/**
junit-file-globs: |
**/smoke-test-results/cypress-test-*.xml
- name: Send failed test metrics to PostHog
if: failure()
continue-on-error: true
env:
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
GH_HEAD_REF: ${{ github.head_ref || github.ref_name }}
run: |
if [ -z "$POSTHOG_API_KEY" ]; then
echo "⚠️ POSTHOG_API_KEY not configured, skipping test failure metrics"
exit 0
fi
TEMP_DIR=$(mktemp -d)
mkdir -p "$TEMP_DIR/test-results"
find . -name "cypress-test-*.xml" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null || true
python3 .github/scripts/send_failed_tests_to_posthog.py \
--input-dir "$TEMP_DIR/test-results" \
--posthog-api-key "$POSTHOG_API_KEY" \
--posthog-host "${POSTHOG_HOST:-https://app.posthog.com}" \
--repository "${{ github.repository }}" \
--workflow-name "${{ github.workflow }}" \
--branch "${GH_HEAD_REF}" \
--run-id "${{ github.run_id }}" \
--run-attempt "${{ github.run_attempt }}" \
--batch "${{ matrix.batch }}" \
--batch-count "${{ strategy.job-total }}" \
--test-strategy "cypress"
rm -rf "$TEMP_DIR"
- name: Upload test results to Codecov
if: ${{ !cancelled() }}
uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
with:
token: ${{ secrets.CODECOV_TOKEN }}
report_type: test_results
override_branch: ${{ github.head_ref || github.ref_name }}
- uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
if: ${{ github.ref == 'refs/heads/master' && matrix.batch == '0' }}
with:
path: ~/.cache/uv
key: ${{ needs.setup.outputs.uv_cache_key }}
- uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
if: ${{ github.ref == 'refs/heads/master' && matrix.batch == '0' }}
with:
path: ~/.cache/yarn
key: ${{ needs.setup.outputs.yarn_cache_key }}
cypress_html_report:
name: Generate unified Cypress HTML report
runs-on: ubuntu-latest
needs: [cypress_tests]
if: ${{ !cancelled() && needs.cypress_tests.result != 'skipped' }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Download all Cypress mochawesome JSON artifacts
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
with:
pattern: cypress-mochawesome-json-*
path: mochawesome-results
# Do NOT use merge-multiple: all batches write files with the same names
# (mochawesome.json, mochawesome_001.json, ...). merge-multiple would cause
# later batches to overwrite earlier ones in the same directory. Without it,
# each artifact extracts to its own subdirectory:
# mochawesome-results/cypress-mochawesome-json-0/.jsons/mochawesome.json
# mochawesome-results/cypress-mochawesome-json-1/.jsons/mochawesome.json
# ...
- name: Generate HTML report
run: |
# Each artifact extracts to its own subdirectory, so per-spec JSONs land at:
# mochawesome-results/cypress-mochawesome-json-<N>/.jsons/mochawesome_NNN.json
mapfile -t JSON_FILES < <(find mochawesome-results -path "*/.jsons/*.json" 2>/dev/null)
if [[ ${#JSON_FILES[@]} -gt 0 ]]; then
# Collect screenshots from all batch subdirectories into a flat dir.
# enhanceReport resolves paths relative to this directory.
mkdir -p combined-screenshots
find mochawesome-results -mindepth 1 -maxdepth 1 -type d | while read -r batch_dir; do
if [ -d "$batch_dir/screenshots" ]; then
cp -rn "$batch_dir/screenshots/." combined-screenshots/
fi
done
printf '%s\n' "${JSON_FILES[@]}" > json-file-list.txt
npm install --no-save \
mochawesome-merge \
mochawesome-report-generator \
cypress-mochawesome-reporter \
fs-extra
node .github/scripts/generate-cypress-report.js
else
echo "No mochawesome JSON files found – all Cypress batches may have been skipped or crashed before any spec completed"
fi
- name: Upload unified Cypress HTML report
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: cypress-html-report
path: cypress-html-report/
retention-days: 5
playwright_test:
name: Playwright E2E Tests (Shard ${{ matrix.shard }}/${{ matrix.shard_count }})
runs-on: ${{ needs.setup.outputs.test_runner_type }}
needs: [setup, base_build]
timeout-minutes: 20
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.setup.outputs.playwright_matrix || '{"include":[]}') }}
if: ${{ always() && !failure() && !cancelled() && needs.setup.outputs.playwright_matrix != '' && needs.setup.outputs.playwright_matrix != '{"include":[]}' }}
steps:
- uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
with:
path: ~/.cache/yarn
key: ${{ needs.setup.outputs.playwright_yarn_cache_key }}
restore-keys: ${{ needs.setup.outputs.playwright_yarn_cache_key_prefix }}
- name: Check out the repo
uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7
- name: Free up disk space
uses: ./.github/actions/free-disk-space
- name: Set up JDK 21
uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
with:
distribution: "zulu"
java-version: 21
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: "3.11"
cache: "pip"
- name: Set up Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: "22"
- name: Set up Depot CLI
if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1
- name: Login to DockerHub
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
if: ${{ needs.setup.outputs.docker-login == 'true' }}
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
- name: Build images (fork — no depot cache)
timeout-minutes: 60
if: ${{ needs.setup.outputs.use_depot_cache != 'true' }}
run: |
BUILD_TASK="${{ needs.setup.outputs.smoke_build_task || ':docker:buildImagesQuickstartDebugConsumers' }}"
echo "Using build task: $BUILD_TASK"
./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }}
docker images
env:
DOCKER_CACHE: GITHUB
- name: Pull images from depot
if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
run: |
depot pull --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}"
docker images
- name: Run quickstart
env:
DATAHUB_TELEMETRY_ENABLED: false
DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
SKIP_INSTALL_DEV: "true"
ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions"
ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
# Must match smoke_test and base_build (e.g. smoke:quickstartPg → quickstart-postgres).
# Default quickstart-consumers requires mae/mce images that are not built for pg-only profiles.
PROFILE_NAME: ${{ needs.setup.outputs.smoke_profile_name || env.PROFILE_NAME }}
run: |
echo "Using compose profile: $PROFILE_NAME"
./smoke-test/run-quickstart.sh
- name: Disable ES Disk Threshold
run: |
curl -XPUT "http://localhost:9200/_cluster/settings" \
-H 'Content-Type: application/json' -d'{
"persistent": {
"cluster": {
"routing": {
"allocation.disk.threshold_enabled": false
}
}
}
}'
- name: Install Playwright dependencies
working-directory: e2e-test/ui/playwright
run: |
yarn install --frozen-lockfile
npx playwright install --with-deps chromium
- name: Run Playwright tests
working-directory: e2e-test/ui/playwright
env:
BASE_URL: "http://localhost:9002"
PLAYWRIGHT_REUSE_SERVER: "1"
run: |
npx playwright test \
--shard=${{ matrix.shard }}/${{ matrix.shard_count }}
- name: Upload Playwright blob report
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: always()
with:
name: playwright-blob-report-${{ matrix.shard }}
path: e2e-test/ui/playwright/blob-report/
retention-days: 1
- name: Upload Playwright JUnit results
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: always()
with:
name: playwright-junit-${{ matrix.shard }}
path: e2e-test/ui/playwright/test-results/junit.xml
retention-days: 5
- name: Store logs on failure
if: failure()
run: |
docker ps -a
TEST_STRATEGY="-playwright-${{ matrix.shard }}"
source .github/scripts/docker_logs.sh
- name: Upload Docker logs on failure
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: failure()
with:
name: docker-logs-playwright-${{ matrix.shard }}
path: "docker_logs/*.log"
retention-days: 5
- name: Upload Playwright traces and screenshots on failure
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
if: failure()
with:
name: playwright-traces-${{ matrix.shard }}
path: e2e-test/ui/playwright/test-results/
retention-days: 5
- name: Report test results
if: "!cancelled()"
uses: ./.github/actions/report-test-results
with:
artifact-name: "Test Results (playwright) shard-${{ matrix.shard }}"
test-results-paths: |
**/e2e-test/ui/playwright/test-results/junit.xml
junit-file-globs: |
**/e2e-test/ui/playwright/test-results/junit.xml
- name: Upload test results to Codecov
if: "!cancelled()"
uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
with:
token: ${{ secrets.CODECOV_TOKEN }}
report_type: test_results
override_branch: ${{ github.head_ref || github.ref_name }}
- uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
if: ${{ github.ref == 'refs/heads/master' && matrix.shard == '1' }}
with:
path: ~/.cache/yarn
key: ${{ needs.setup.outputs.playwright_yarn_cache_key }}
playwright_report:
name: Merge Playwright HTML report
runs-on: ubuntu-latest
needs: [setup, playwright_test]
if: ${{ !cancelled() && needs.playwright_test.result != 'skipped' }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: "22"
- uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
with:
path: ~/.cache/yarn
key: ${{ needs.setup.outputs.playwright_yarn_cache_key }}
restore-keys: ${{ needs.setup.outputs.playwright_yarn_cache_key_prefix }}
- name: Install Playwright dependencies
working-directory: e2e-test/ui/playwright
run: yarn install --frozen-lockfile
- name: Download all Playwright blob reports
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
with:
pattern: playwright-blob-report-*
path: playwright-blob-reports
- name: Merge blob reports into unified HTML report
run: |
# download-artifact v4+ places each artifact in its own subfolder under
# playwright-blob-reports/. Flatten all .zip files into a single dir so
# `playwright merge-reports` can find them.
mkdir -p playwright-blobs-flat
# Playwright blob filenames include the shard index, so cp overwrites are safe.
find playwright-blob-reports -name "*.zip" -exec cp {} playwright-blobs-flat/ \;
if compgen -G "playwright-blobs-flat/*.zip" > /dev/null 2>&1; then
./e2e-test/ui/playwright/node_modules/.bin/playwright merge-reports --reporter html ./playwright-blobs-flat
else
echo "No blob reports found — all shards may have been skipped or crashed"
exit 1
fi
- name: Upload merged Playwright HTML report
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: playwright-html-report
path: playwright-report/
retention-days: 5
- name: Configure AWS credentials for S3 report upload
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5
if: always()
continue-on-error: true
with:
role-to-assume: ${{ secrets.PLAYWRIGHT_REPORTS_OSS_ROLE_ARN }}
aws-region: us-west-2
- name: Publish Playwright report to S3
id: publish-s3-report
if: always()
continue-on-error: true
env:
GH_WORKFLOW: ${{ github.workflow }}
GH_REPOSITORY_OWNER: ${{ github.repository_owner }}
GH_EVENT_REPOSITORY_NAME: ${{ github.event.repository.name }}
GH_EVENT_NAME: ${{ github.event_name }}
GH_RUN_ID: ${{ github.run_id }}
GH_RUN_ATTEMPT: ${{ github.run_attempt }}
run: |
if [[ ! -d playwright-report ]] || [[ -z "$(ls -A playwright-report 2>/dev/null)" ]]; then
echo "No playwright-report directory — skipping S3 upload"
exit 0
fi
BUCKET="datahub-oss-ci-build-artifacts.dev.dh-int.zone"
WORKFLOW_NAME=$(echo "$GH_WORKFLOW" \
| tr '[:upper:]' '[:lower:]' \
| sed 's/[^a-z0-9]/-/g' \
| sed -E 's/-+/-/g' \
| sed 's/^-//;s/-$//')
S3_PATH="${GH_REPOSITORY_OWNER}/${GH_EVENT_REPOSITORY_NAME}/workflows/${GH_EVENT_NAME}/${GH_RUN_ID}_${GH_RUN_ATTEMPT}/${WORKFLOW_NAME}/playwright-report"
aws s3 sync playwright-report/ "s3://${BUCKET}/${S3_PATH}/" \
--cache-control "no-cache, no-store, must-revalidate" \
--sse AES256 \
--delete
REPORT_URL="https://${BUCKET}/${S3_PATH}/index.html"
echo "report_url=${REPORT_URL}" >> "$GITHUB_OUTPUT"
echo "Published Playwright report to: ${REPORT_URL}"
- name: Add report link to GitHub Summary
if: ${{ always() && steps.publish-s3-report.outputs.report_url != '' }}
run: |
{
echo "## Playwright HTML Report"
echo ""
echo "| Resource | Link |"
echo "|----------|------|"
echo "| HTML Report (incl. Traces) | ${{ steps.publish-s3-report.outputs.report_url }} |"
echo ""
echo "> [!NOTE]"
echo "> Accessible from the DataHub VPN only."
} >> "$GITHUB_STEP_SUMMARY"
publish_images:
name: Update quickstart tag after tests pass on master
runs-on: ${{ needs.setup.outputs.test_runner_type_small || 'ubuntu-latest' }}
needs:
[
setup,
base_build,
java_integration_tests,
pytest_tests,
cypress_tests,
playwright_test,
]
if: ${{ always() && !failure() && !cancelled() && needs.setup.result != 'skipped' && github.ref == 'refs/heads/master' }}
steps:
- name: Set up Depot CLI
if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1
- name: Login to DockerHub
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
if: ${{ needs.setup.outputs.docker-login == 'true' }}
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
- name: Check out the repo
if: ${{ needs.setup.outputs.use_depot_cache == 'true' && needs.setup.outputs.publish == 'true' }}
uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7
with:
checkout-head-only: true
- name: Download build Metadata
if: ${{ needs.setup.outputs.publish == 'true' }}
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: build-metadata-${{ needs.setup.outputs.tag }}
path: ${{ github.workspace }}/build
- name: Set up Docker Buildx
if: ${{ needs.setup.outputs.use_depot_cache == 'true' && needs.setup.outputs.publish == 'true' }}
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
- name: Tag images with quickstart (coordinated floating tag)
if: ${{ needs.setup.outputs.use_depot_cache == 'true' && needs.setup.outputs.publish == 'true' }}
run: |
set -euo pipefail
source .github/scripts/docker_helpers.sh
# :sha-* images were pushed during the coordinated depot bake; add :quickstart here only.
depot bake -f "${{ github.workspace }}/build/bake-spec-allImages.json" --print | jq -cr '.target[].tags[]' | while IFS= read -r image; do
if [[ -z "$image" ]]; then
continue
fi
if [[ "$image" != *":${SHA_TAG}" && "$image" != *":${SHA_TAG}-"* ]]; then
continue
fi
quickstartImage="${image/${SHA_TAG}/${QUICKSTART_TAG}}"
echo "Tagging $image as $quickstartImage"
docker buildx imagetools create -t "$quickstartImage" "$image"
done
deploy_datahub_head:
name: Deploy to Datahub HEAD
runs-on: ubuntu-latest
needs:
[
setup,
java_integration_tests,
pytest_tests,
cypress_tests,
playwright_test,
publish_images,
]
if: ${{ needs.publish_images.result == 'success' && github.repository_owner == 'datahub-project' && needs.setup.outputs.repository_name == 'datahub' }}
steps:
- uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6.0.0
with:
aws-access-key-id: ${{ secrets.AWS_SQS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SQS_ACCESS_KEY }}
aws-region: us-west-2
- uses: isbang/sqs-action@7cdb8b5d1328c6af489ef4614fbafb364bf096ea # v0.2.0
with:
sqs-url: ${{ secrets.DATAHUB_HEAD_SYNC_QUEUE }}
message: '{ "command": "git-sync", "args" : {"repoName": "${{ needs.setup.outputs.repository_name }}", "repoOrg": "${{ github.repository_owner }}", "repoBranch": "${{ needs.setup.outputs.branch_name }}", "repoShaShort": "${{ needs.setup.outputs.short_sha }}", "repoShaTag": "${{ needs.setup.outputs.tag }}" }}'