feat(i18n): translate onboarding to German (CAT-2326) #59254

Workflow file for this run

.github/workflows/docker-unified.yml at 5618ce2

	name: Docker Build, Scan, Test
	on:
	workflow_dispatch:
	inputs:
	profileName:
	description: "Profile name for the smoke-test. Defaults to quickstart-consumers if not specified"
	required: false
	default: "quickstart-consumers"
	type: string
	playwright_shard_count:
	description: "Number of parallel shards for Playwright E2E tests"
	required: false
	default: "5"
	type: string
	push:
	branches:
	- master
	- releases/**
	pull_request:
	types: [opened, synchronize, reopened]
	branches:
	- "**"
	paths-ignore:
	- ".github/**"
	- "!.github/workflows/docker-unified.yml"
	- "!.github/scripts/send_failed_tests_to_posthog.py"
	- "!.github/scripts/docker_helpers.sh"
	- "!.github/actions/ci-optimization"
	- "!.github/actions/restore-dependency-caches"
	- "!.github/scripts/check_python_package.py"
	- "!.github/scripts/parse_failed_cypress_tests.py"
	- "!.github/scripts/parse_failed_pytest_tests.py"
	- "!.github/scripts/docker_logs.sh"
	- "!.github/actions/smoke-test-retry"
	release:
	types: [published]

	concurrency:
	# Using `github.run_id` (unique val) instead of `github.ref` here
	# because we don't want to cancel this workflow on master only for PRs
	# as that makes reproducing issues easier
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.run_id }}
	cancel-in-progress: true

	env:
	DOCKER_REGISTRY: "acryldata"
	PROFILE_NAME: "${{ github.event.inputs.profileName \|\| 'quickstart-consumers' }}"

	DOCKER_CACHE: "DEPOT"
	DEPOT_PROJECT_ID: "${{ vars.DEPOT_PROJECT_ID }}"
	HAS_DEPOT_LABEL: ${{ github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'depot') }}
	IS_FORK: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository }}
	DEPOT_TOKEN: "${{ secrets.DEPOT_TOKEN }}"
	PLAYWRIGHT_SHARD_COUNT: "5"

	permissions:
	contents: read
	id-token: write

	jobs:
	setup:
	runs-on: depot-ubuntu-24.04-small
	outputs:
	# TODO: Many of the vars below should not be required anymore.
	tag: ${{ steps.tag.outputs.tag }}
	slim_tag: ${{ steps.tag.outputs.slim_tag }}
	full_tag: ${{ steps.tag.outputs.full_tag }}
	short_sha: ${{ steps.tag.outputs.short_sha }} # needed for auto-deploy
	unique_tag: ${{ steps.tag.outputs.unique_tag }}
	unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
	unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
	docker-login: ${{ steps.docker-login.outputs.docker-login }}
	publish: ${{ steps.publish.outputs.publish }}
	pr-publish: ${{ steps.pr-publish.outputs.publish }}
	python_release_version: ${{ steps.tag.outputs.python_release_version }}
	branch_name: ${{ steps.tag.outputs.branch_name }}
	repository_name: ${{ steps.tag.outputs.repository_name }}
	frontend_change: ${{ steps.ci-optimize.outputs.frontend-change == 'true' \|\| github.event_name != 'pull_request' }}
	actions_change: ${{ steps.ci-optimize.outputs.actions-change == 'true' \|\| github.event_name != 'pull_request'}}
	ingestion_change: ${{ steps.ci-optimize.outputs.ingestion-change == 'true' \|\| github.event_name != 'pull_request' }}
	ingestion_base_change: ${{ steps.ci-optimize.outputs.ingestion-base-change == 'true' }}
	backend_change: ${{ steps.ci-optimize.outputs.backend-change == 'true' \|\| github.event_name != 'pull_request' }}
	frontend_only: ${{ steps.ci-optimize.outputs.frontend-only == 'true' }}
	ingestion_only: ${{ steps.ci-optimize.outputs.ingestion-only == 'true' }}
	connector_source_only: ${{ steps.ci-optimize.outputs.connector-source-only == 'true' }}
	backend_only: ${{ steps.ci-optimize.outputs.backend-only == 'true' }}
	kafka_setup_change: ${{ steps.ci-optimize.outputs.kafka-setup-change == 'true' }}
	smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }}
	java_client_change: ${{ steps.ci-optimize.outputs.java-client-change == 'true' \|\| github.event_name != 'pull_request' }}
	integrations_service_change: "false"
	datahub_executor_change: "false"

	build_runner_type: ${{ steps.set-runner.outputs.build_runner_type }}
	test_runner_type: ${{ steps.set-runner.outputs.test_runner_type }}
	test_runner_type_small: ${{ steps.set-runner.outputs.test_runner_type_small }}
	use_depot_cache: ${{ steps.set-runner.outputs.use_depot_cache }}
	uv_cache_key: ${{ steps.uv-cache-key.outputs.uv_cache_key }}
	uv_cache_key_prefix: ${{ steps.uv-cache-key.outputs.uv_cache_key_prefix }}
	yarn_cache_key: ${{ steps.yarn-cache-key.outputs.yarn_cache_key }}
	yarn_cache_key_prefix: ${{ steps.yarn-cache-key.outputs.yarn_cache_key_prefix }}
	playwright_yarn_cache_key: ${{ steps.playwright-yarn-cache-key.outputs.playwright_yarn_cache_key }}
	playwright_yarn_cache_key_prefix: ${{ steps.playwright-yarn-cache-key.outputs.playwright_yarn_cache_key_prefix }}
	playwright_matrix: ${{ steps.set-playwright-matrix.outputs.matrix }}
	playwright_change: ${{ steps.ci-optimize.outputs.playwright-change == 'true' }}
	smoke_build_task: ${{ steps.smoke-profile.outputs.smoke_build_task }}
	smoke_profile_name: ${{ steps.smoke-profile.outputs.smoke_profile_name }}
	steps:
	- name: Check out the repo
	uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7
	# Explicitly checkout head commit, as tag computation depends on it
	with:
	checkout-head-only: true
	- name: Compute Tag
	id: tag
	env:
	GITHUB_REF_FALLBACK: ${{ github.event_name == 'release' && format('refs/tags/{0}', github.event.release.tag_name) \|\| github.ref}}
	GITHUB_EVENT_NAME: ${{ github.event_name }}
	GITHUB_PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
	run: \|
	source .github/scripts/docker_helpers.sh
	{
	echo "short_sha=${SHORT_SHA}"
	echo "tag=$(get_tag)"
	echo "slim_tag=$(get_tag_slim)"
	echo "full_tag=$(get_tag_full)"
	echo "unique_tag=$(get_unique_tag)"
	echo "unique_slim_tag=$(get_unique_tag_slim)"
	echo "unique_full_tag=$(get_unique_tag_full)"
	echo "python_release_version=$(get_python_docker_release_v)"
	echo "branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}"
	echo "repository_name=${GITHUB_REPOSITORY#*/}"
	} >> "$GITHUB_OUTPUT"
	- name: Check whether docker login is possible
	id: docker-login
	env:
	ENABLE_DOCKER_LOGIN: ${{ secrets.ACRYL_DOCKER_PASSWORD != '' }}
	run: \|
	echo "Enable Docker Login: ${{ env.ENABLE_DOCKER_LOGIN }}"
	echo "docker-login=${{ env.ENABLE_DOCKER_LOGIN }}" >> "$GITHUB_OUTPUT"
	- name: Check whether publishing enabled
	id: publish
	env:
	ENABLE_PUBLISH: >-
	${{
	(github.event_name == 'release' \|\| ((github.event_name == 'workflow_dispatch' \|\| github.event_name == 'push') && github.ref == 'refs/heads/master'))
	&& ( secrets.ACRYL_DOCKER_PASSWORD != '' )
	}}
	run: \|
	echo "Enable publish: ${{ env.ENABLE_PUBLISH }}"
	echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"
	- name: Check whether PR publishing enabled
	id: pr-publish
	env:
	ENABLE_PUBLISH: >-
	${{
	(github.event_name == 'pull_request' && (contains(github.event.pull_request.labels..name, 'publish') \|\| contains(github.event.pull_request.labels..name, 'publish-docker')))
	&& ( secrets.ACRYL_DOCKER_PASSWORD != '' )
	}}
	run: \|
	echo "Enable PR publish: ${{ env.ENABLE_PUBLISH }}"
	echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"

	- name: Resolve smoke test profile from PR label
	id: smoke-profile
	if: ${{ github.event_name == 'pull_request' }}
	env:
	PR_LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }}
	run: \|
	CONFIG_KEY=$(echo "$PR_LABELS" \| jq -r '[.[] \| select(startswith("smoke:"))] \| first // empty' \| sed 's/^smoke://')
	if [[ -z "$CONFIG_KEY" ]]; then
	echo "No smoke: label found, using defaults"
	exit 0
	fi
	echo "Found smoke label config key: $CONFIG_KEY"
	PROFILE_NAME=$(./gradlew :docker:resolveQuickstartProfile -PconfigKey="$CONFIG_KEY" -q 2>&1 \| tail -1)
	if [[ -z "$PROFILE_NAME" ]]; then
	echo "::error::Failed to resolve profile for config key: $CONFIG_KEY"
	exit 1
	fi
	echo "Resolved compose profile: $PROFILE_NAME"
	{
	echo "smoke_build_task=:docker:buildImages${CONFIG_KEY}"
	echo "smoke_profile_name=${PROFILE_NAME}"
	} >> "$GITHUB_OUTPUT"

	- uses: ./.github/actions/ci-optimization
	id: ci-optimize

	- name: Determine runner type
	id: set-runner
	# This needs to handle two scenarios:
	# 1. Running on a PR from a fork. We use github runners, unless the "depot" label exists -- in which case, we run
	# it on depotNote, concurrency is lower when using github runners, queue times can be longer, test time is longer
	# due to fewer parallel jobs.
	# 3. Running on a PR from a branch in the datahub-project org and push/schedule events on master.
	# Depot is used here for remote container builds in base_build and also for all runners. Depot runners support unlimited concurrency
	# and hence short queue times and higher parallelism of smoke tests
	run: \|
	if [[ "${{ env.DOCKER_CACHE }}" == "DEPOT" && "${{ env.IS_FORK }}" == "false" ]]; then
	{
	echo "build_runner_type=depot-ubuntu-24.04-4"
	echo "test_runner_type=depot-ubuntu-24.04-4"
	echo "test_runner_type_small=depot-ubuntu-24.04-small"
	echo "use_depot_cache=true"
	} >> "$GITHUB_OUTPUT"
	else
	echo "build_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT"
	if [[ "${{ env.HAS_DEPOT_LABEL }}" == "true" ]]; then
	echo "test_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT"
	else
	echo "test_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT"
	fi
	{
	echo "test_runner_type_small=ubuntu-latest"
	echo "use_depot_cache=false"
	} >> "$GITHUB_OUTPUT"
	# publishing is currently only supported via depot
	fi

	- name: Compute UV Cache Key
	id: uv-cache-key
	run: \|
	echo "uv_cache_key=docker-unified-${{ runner.os }}-uv-${{ hashFiles(
	'./datahub-actions/pyproject.toml',
	'./datahub-actions/setup.py',
	'./smoke-test/requirements.txt',
	'./smoke-test/pyproject.toml',
	'./metadata-ingestion/pyproject.toml',
	'./metadata-ingestion/setup.py') }}" >> "$GITHUB_OUTPUT"
	echo "uv_cache_key_prefix=docker-unified-${{ runner.os }}-uv-" >> "$GITHUB_OUTPUT"

	- name: Compute Yarn Cache Key
	id: yarn-cache-key
	run: \|
	echo "yarn_cache_key=docker-unified-${{ runner.os }}-yarn-${{ hashFiles('./smoke-test/tests/cypress/yarn.lock', './datahub-web-react/yarn.lock') }}" >> "$GITHUB_OUTPUT"
	echo "yarn_cache_key_prefix=docker-unified-${{ runner.os }}-yarn-" >> "$GITHUB_OUTPUT"

	- name: Compute Playwright Yarn Cache Key
	id: playwright-yarn-cache-key
	run: \|
	echo "playwright_yarn_cache_key=docker-unified-playwright-${{ runner.os }}-yarn-${{ hashFiles('./e2e-test/ui/playwright/package.json') }}" >> "$GITHUB_OUTPUT"
	echo "playwright_yarn_cache_key_prefix=docker-unified-playwright-${{ runner.os }}-yarn-" >> "$GITHUB_OUTPUT"

	- name: Build Playwright shard matrix
	id: set-playwright-matrix
	if: ${{ steps.publish.outputs.publish == 'true' \|\| steps.ci-optimize.outputs.backend-change == 'true' \|\| steps.ci-optimize.outputs.frontend-change == 'true' \|\| steps.ci-optimize.outputs.playwright-change == 'true' }}
	run: \|
	shard_count=$(( ${{ github.event.inputs.playwright_shard_count \|\| env.PLAYWRIGHT_SHARD_COUNT }} ))
	matrix=''
	for i in $(seq 1 "$shard_count"); do
	if [[ -n "$matrix" ]]; then
	matrix="$matrix,"
	fi
	matrix="${matrix}{\"shard\":\"$i\",\"shard_count\":\"$shard_count\"}"
	done
	echo "matrix={\"include\":[$matrix]}" >> "$GITHUB_OUTPUT"
	{
	echo "## Playwright Test Matrix"
	echo ""
	echo "\| Strategy \| Total Shards \|"
	echo "\|----------\|--------------\|"
	echo "\| playwright \| $shard_count \|"
	} >> "$GITHUB_STEP_SUMMARY"

	base_build:
	name: Build all images
	runs-on: ${{ needs.setup.outputs.build_runner_type }}
	needs: setup
	timeout-minutes: 60
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' }} # On fork, smoke test job does the build since depot cache is not available
	outputs:
	build_id: ${{ steps.capture-build-id.outputs.build_id }}
	matrix: ${{ steps.capture-build-id.outputs.matrix }}
	steps:
	- name: Set up JDK 21
	uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
	with:
	distribution: "zulu"
	java-version: 21

	- name: Check out the repo
	uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7

	- uses: ./.github/actions/restore-dependency-caches
	with:
	uv_cache_key: ${{ needs.setup.outputs.uv_cache_key }}
	uv_cache_key_prefix: ${{ needs.setup.outputs.uv_cache_key_prefix }}
	yarn_cache_key: ${{ needs.setup.outputs.yarn_cache_key }}
	yarn_cache_key_prefix: ${{ needs.setup.outputs.yarn_cache_key_prefix }}
	restore_gradle: true

	- name: Set up Depot CLI
	if: ${{ env.DOCKER_CACHE == 'DEPOT' }}
	uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1

	- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
	with:
	python-version: "3.11"
	cache: "pip"

	- name: Login to DockerHub
	uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
	if: ${{ needs.setup.outputs.docker-login == 'true' }}
	with:
	username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
	password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}

	- name: Build all Images (For Smoke tests)
	if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' }}
	run: \|
	if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
	BUILD_TASK=":docker:buildImagesAll"
	elif [[ -n "${{ needs.setup.outputs.smoke_build_task }}" ]]; then
	BUILD_TASK="${{ needs.setup.outputs.smoke_build_task }}"
	else
	BUILD_TASK=":docker:buildImagesQuickstart"
	fi
	echo "Using build task: $BUILD_TASK"
	./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }}
	- name: Build all Images (Publish)
	if: ${{ needs.setup.outputs.publish == 'true' \|\| needs.setup.outputs.pr-publish == 'true' }}
	# Push immutable tags (sha-, pr, release v*) to the registry during the coordinated depot bake.
	# The floating quickstart tag is applied only after smoke tests pass (see publish_images).
	run: \|
	./gradlew :docker:buildImagesAll -PmatrixBuild=true -Ptag=${{ needs.setup.outputs.tag }} -PshaTag=${{ needs.setup.outputs.unique_tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }} -PdockerPush=true
	- name: Capture build Id
	id: capture-build-id
	run: \|
	pip install jq
	DEPOT_BUILD_ID=$(jq -r '.["depot.build"]?.buildID' "${{ github.workspace }}/build/build-metadata.json")

	echo "build_id=${DEPOT_BUILD_ID}" >> "$GITHUB_OUTPUT"
	echo "matrix=$(jq -c '{"target":.["depot.build"].targets}' "${{ github.workspace }}/build/build-metadata.json")" >> "$GITHUB_OUTPUT"

	- name: Save build Metadata
	if: ${{ needs.setup.outputs.publish == 'true' \|\| needs.setup.outputs.pr-publish == 'true' }}
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
	with:
	name: build-metadata-${{ needs.setup.outputs.tag }}
	path: \|
	${{ github.workspace }}/build/build-metadata.json
	${{ github.workspace }}/build/bake-spec-allImages.json

	- uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
	if: ${{ github.ref == 'refs/heads/master' }}
	with:
	path: \|
	~/.cache/uv
	key: ${{ needs.setup.outputs.uv_cache_key }}

	- uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
	if: ${{ github.ref == 'refs/heads/master' }}
	with:
	path: \|
	~/.cache/yarn
	key: ${{ needs.setup.outputs.yarn_cache_key }}
	- uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
	if: ${{ github.ref == 'refs/heads/master' }}
	with:
	path: \|
	~/.gradle/wrapper
	~/.gradle/caches/modules-2
	~/.gradle/caches/jars-*
	~/.gradle/caches/transforms-*
	key: gradle-plugins-cache

	comment_pr_images:
	name: Comment PR image tags
	runs-on: ubuntu-latest
	needs: [setup, base_build]
	if: ${{ github.event_name == 'pull_request' && needs.setup.outputs.pr-publish == 'true' }}
	permissions:
	pull-requests: write
	steps:
	- name: Upsert PR image comment
	uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
	env:
	PR_TAG: ${{ needs.setup.outputs.tag }}
	SHORT_SHA: ${{ needs.setup.outputs.short_sha }}
	REGISTRY: ${{ env.DOCKER_REGISTRY }}
	with:
	script: \|
	const marker = '<!-- pr-docker-images -->';
	const { PR_TAG, SHORT_SHA, REGISTRY } = process.env;
	const body = [
	marker,
	'## 🐳 Docker Images Published for Testing',
	'',
	`Images for this PR have been published to the [\`${REGISTRY}\`](https://hub.docker.com/u/${REGISTRY}) Docker Hub registry, tagged \`${PR_TAG}\`.`,
	'',
	'Run DataHub quickstart with these images:',
	'',
	'```bash',
	`DATAHUB_VERSION=${PR_TAG} datahub docker quickstart`,
	'```',
	'',
	'Or pull an individual image, e.g.:',
	'',
	'```bash',
	`docker pull ${REGISTRY}/datahub-gms:${PR_TAG}`,
	'```',
	'',
	`Pinned to commit \`${SHORT_SHA}\`.`,
	].join('\n');

	const { owner, repo } = context.repo;
	const issue_number = context.payload.pull_request.number;
	const comments = await github.paginate(github.rest.issues.listComments, {
	owner,
	repo,
	issue_number,
	});
	const existing = comments.find(
	(c) => c.user?.type === 'Bot' && c.body?.includes(marker),
	);
	if (existing) {
	await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body });
	} else {
	await github.rest.issues.createComment({ owner, repo, issue_number, body });
	}

	smoke_test_matrix:
	runs-on: ${{ needs.setup.outputs.test_runner_type_small }}
	needs: setup
	outputs:
	cypress_matrix: ${{ steps.set-matrix.outputs.cypress_matrix }}
	pytest_matrix: ${{ steps.set-matrix.outputs.pytest_matrix }}
	steps:
	- id: set-batch-count
	# Tests are split simply to ensure the configured number of batches for parallelization. This may need some
	# increase as a new tests added increase the duration where an additional parallel batch helps.
	# python_batch_count is used to split pytests in the smoke-test (batches of actual test functions)
	# cypress_batch_count is used to split the collection of cypress test specs into batches.
	run: \|
	if [[ "${{ env.IS_FORK }}" == "true" ]]; then
	echo "cypress_batch_count=5" >> "$GITHUB_OUTPUT"
	echo "python_batch_count=3" >> "$GITHUB_OUTPUT"
	else
	echo "cypress_batch_count=8" >> "$GITHUB_OUTPUT"
	echo "python_batch_count=7" >> "$GITHUB_OUTPUT"
	fi

	- id: set-matrix
	run: \|
	python_batch_count=${{ steps.set-batch-count.outputs.python_batch_count }}
	pytest_items='{"batch":"0","batch_count":"'"$python_batch_count"'"}'
	for ((i=1;i<python_batch_count;i++)); do
	pytest_items="$pytest_items"',{"batch_count":"'"$python_batch_count"'","batch":"'"$i"'"}'
	done

	cypress_batch_count=${{ steps.set-batch-count.outputs.cypress_batch_count }}
	cypress_items='{"batch":"0","batch_count":"'"$cypress_batch_count"'"}'
	for ((i=1;i<cypress_batch_count;i++)); do
	cypress_items="$cypress_items"',{"batch_count":"'"$cypress_batch_count"'","batch":"'"$i"'"}'
	done

	run_both=false
	run_pytest=false
	run_cypress=false

	if [[ "${{ needs.setup.outputs.backend_change }}" == 'true' \|\| "${{ needs.setup.outputs.smoke_test_change }}" == 'true' \|\| "${{ needs.setup.outputs.publish }}" == 'true' ]]; then
	run_both=true
	elif [[ "${{ needs.setup.outputs.frontend_only }}" == 'true' ]]; then
	run_cypress=true
	elif [[ "${{ needs.setup.outputs.connector_source_only }}" == 'true' ]]; then
	# Connector-only changes (source implementations, their tests, docs) don't affect
	# smoke tests. Smoke tests exercise the platform via CLI/SDK/APIs, not individual
	# connectors. Skip to save ~140 min of compute.
	: # both remain false → empty matrices → both jobs skipped
	elif [[ "${{ needs.setup.outputs.ingestion_only }}" == 'true' ]]; then
	run_pytest=true
	fi

	if [[ "$run_both" == 'true' \|\| "$run_pytest" == 'true' ]]; then
	echo "pytest_matrix={\"include\":[$pytest_items]}" >> "$GITHUB_OUTPUT"
	else
	echo "pytest_matrix={\"include\":[]}" >> "$GITHUB_OUTPUT"
	fi

	if [[ "$run_both" == 'true' \|\| "$run_cypress" == 'true' ]]; then
	echo "cypress_matrix={\"include\":[$cypress_items]}" >> "$GITHUB_OUTPUT"
	else
	echo "cypress_matrix={\"include\":[]}" >> "$GITHUB_OUTPUT"
	fi

	java_integration_tests:
	name: Java SDK V2 Integration Tests
	runs-on: ${{ needs.setup.outputs.test_runner_type }}
	needs: [setup, base_build]
	if: ${{ always() && !failure() && !cancelled() && (needs.setup.outputs.backend_change == 'true' \|\| needs.setup.outputs.java_client_change == 'true') }}
	steps:
	- name: Check out the repo
	uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7

	- uses: ./.github/actions/restore-dependency-caches
	with:
	uv_cache_key: ${{ needs.setup.outputs.uv_cache_key }}
	uv_cache_key_prefix: ${{ needs.setup.outputs.uv_cache_key_prefix }}
	yarn_cache_key: ${{ needs.setup.outputs.yarn_cache_key }}
	yarn_cache_key_prefix: ${{ needs.setup.outputs.yarn_cache_key_prefix }}

	- name: Set up JDK 21
	uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
	with:
	distribution: "zulu"
	java-version: 21

	- name: Free up disk space
	uses: ./.github/actions/free-disk-space

	- name: Set up Depot CLI
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
	uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1

	- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
	with:
	python-version: "3.11"
	cache: "pip"

	- uses: gradle/actions/setup-gradle@0723195856401067f7a2779048b490ace7a47d7c # v5.0.2
	if: ${{ needs.setup.outputs.use_depot_cache != 'true' }}

	- name: Login to DockerHub
	uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
	if: ${{ needs.setup.outputs.docker-login == 'true' }}
	with:
	username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
	password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}

	- name: Disk Space Analysis
	run: \|
	echo "=== Disk Usage Overview ==="
	df -h
	echo -e "\n=== Docker Disk Usage ==="
	docker system df -v

	- name: Build images
	timeout-minutes: 60
	if: ${{ needs.setup.outputs.use_depot_cache != 'true' }}
	env:
	DOCKER_CACHE: GITHUB
	run: \|
	BUILD_TASK="${{ needs.setup.outputs.smoke_build_task \|\| ':docker:buildImagesQuickstartDebugConsumers' }}"
	echo "Using build task: $BUILD_TASK"
	./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }}
	docker images

	- name: Pull images from depot
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
	run: \|
	depot pull --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}"
	docker images

	- name: Disk Space Analysis
	run: \|
	echo "=== Disk Usage Overview ==="
	df -h
	echo -e "\n=== Docker Disk Usage ==="
	docker system df -v

	- name: Run quickstart
	env:
	DATAHUB_TELEMETRY_ENABLED: false
	DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
	DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_ACTIONS_IMAGE }}
	ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions"
	ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
	PROFILE_NAME: ${{ needs.setup.outputs.smoke_profile_name \|\| env.PROFILE_NAME }}
	run: \|
	echo "Using compose profile: $PROFILE_NAME"
	./smoke-test/run-quickstart.sh

	- name: Disk Check
	run: df -h . && docker images

	- name: Disable ES Disk Threshold
	run: \|
	curl -XPUT "http://localhost:9200/_cluster/settings" \
	-H 'Content-Type: application/json' -d'{
	"persistent": {
	"cluster": {
	"routing": {
	"allocation.disk.threshold_enabled": false
	}
	}
	}
	}'

	- name: Install dependencies
	run: ./metadata-ingestion/scripts/install_deps.sh

	- name: Build datahub cli
	run: ./gradlew :metadata-ingestion:install

	- name: Java SDK V2 Integration Tests
	env:
	DATAHUB_SERVER: http://localhost:8080
	ADMIN_USERNAME: datahub
	ADMIN_PASSWORD: datahub
	run: \|
	echo "Running Java SDK V2 integration tests against running DataHub instance..."
	./gradlew :metadata-integration:java:datahub-client:test --tests "Integration"

	- name: Upload Java SDK V2 coverage to Codecov
	if: ${{ always() }}
	uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
	with:
	token: ${{ secrets.CODECOV_TOKEN }}
	directory: ./build/coverage-reports/metadata-integration/java/datahub-client/
	flags: metadata-integration
	name: java-sdk-v2-integration
	fail_ci_if_error: false
	verbose: true
	override_branch: ${{ github.head_ref \|\| github.ref_name }}

	- name: Report test results
	if: (!cancelled())
	uses: ./.github/actions/report-test-results
	with:
	artifact-name: Test Results (java integration tests)
	test-results-paths: \|
	/build/test-results//*.xml
	!/binary/
	junit-file-globs: \|
	/build/test-results//*.xml

	- name: Upload test results to Codecov
	if: ${{ !cancelled() }}
	uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
	with:
	token: ${{ secrets.CODECOV_TOKEN }}
	report_type: test_results
	override_branch: ${{ github.head_ref \|\| github.ref_name }}

	- name: Store logs
	if: failure()
	run: \|
	docker ps -a
	TEST_STRATEGY="-java-integration"
	source .github/scripts/docker_logs.sh

	- name: Upload logs
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
	if: failure()
	with:
	name: docker-logs-java-integration
	path: "docker_logs/*.log"
	retention-days: 5

	pytest_tests:
	name: Pytest Smoke Tests (Batch ${{ matrix.batch }}/${{ matrix.batch_count }})
	runs-on: ${{ needs.setup.outputs.test_runner_type }}
	needs: [setup, smoke_test_matrix, base_build]
	strategy:
	fail-fast: false
	matrix: ${{ fromJson(needs.smoke_test_matrix.outputs.pytest_matrix \|\| '{"include":[]}') }}
	if: ${{ needs.smoke_test_matrix.outputs.pytest_matrix != '' && needs.smoke_test_matrix.outputs.pytest_matrix != '{"include":[]}' }}
	env:
	MIXPANEL_API_SECRET: ${{ secrets.MIXPANEL_API_SECRET }}
	MIXPANEL_PROJECT_ID: ${{ secrets.MIXPANEL_PROJECT_ID }}
	steps:
	- name: Check out the repo
	uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7

	- uses: ./.github/actions/restore-dependency-caches
	with:
	uv_cache_key: ${{ needs.setup.outputs.uv_cache_key }}
	uv_cache_key_prefix: ${{ needs.setup.outputs.uv_cache_key_prefix }}
	yarn_cache_key: ${{ needs.setup.outputs.yarn_cache_key }}
	yarn_cache_key_prefix: ${{ needs.setup.outputs.yarn_cache_key_prefix }}

	- name: Set up JDK 21
	uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
	with:
	distribution: "zulu"
	java-version: 21

	- name: Free up disk space
	uses: ./.github/actions/free-disk-space

	- name: Set up Depot CLI
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
	uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1

	- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
	with:
	python-version: "3.11"
	cache: "pip"

	- name: Parse previous results for retry
	id: retry-check
	if: github.run_attempt > 1
	uses: ./.github/actions/smoke-test-retry
	with:
	test_strategy: pytests
	batch: ${{ matrix.batch }}
	run_id: ${{ github.run_id }}
	github_token: ${{ github.token }}

	- uses: gradle/actions/setup-gradle@0723195856401067f7a2779048b490ace7a47d7c # v5.0.2
	if: ${{ needs.setup.outputs.use_depot_cache != 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}

	- name: Login to DockerHub
	uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
	if: ${{ needs.setup.outputs.docker-login == 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
	with:
	username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
	password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}

	- name: Disk Space Analysis
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	run: \|
	echo "=== Disk Usage Overview ==="
	df -h
	echo -e "\n=== Docker Disk Usage ==="
	docker system df -v

	- name: Build images
	timeout-minutes: 60
	if: ${{ needs.setup.outputs.use_depot_cache != 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
	env:
	DOCKER_CACHE: GITHUB
	run: \|
	BUILD_TASK="${{ needs.setup.outputs.smoke_build_task \|\| ':docker:buildImagesQuickstartDebugConsumers' }}"
	echo "Using build task: $BUILD_TASK"
	./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }}
	docker images

	- name: Pull images from depot
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
	run: \|
	depot pull --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}"
	docker images

	- name: Disk Space Analysis
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	run: \|
	echo "=== Disk Usage Overview ==="
	df -h
	echo -e "\n=== Docker Disk Usage ==="
	docker system df -v

	- name: Run quickstart
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	env:
	DATAHUB_TELEMETRY_ENABLED: false
	DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
	DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_ACTIONS_IMAGE }}
	ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions"
	ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
	PROFILE_NAME: ${{ needs.setup.outputs.smoke_profile_name \|\| env.PROFILE_NAME }}
	run: \|
	echo "Using compose profile: $PROFILE_NAME"
	./smoke-test/run-quickstart.sh

	- name: Disk Check
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	run: df -h . && docker images

	- name: Disable ES Disk Threshold
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	run: \|
	curl -XPUT "http://localhost:9200/_cluster/settings" \
	-H 'Content-Type: application/json' -d'{
	"persistent": {
	"cluster": {
	"routing": {
	"allocation.disk.threshold_enabled": false
	}
	}
	}
	}'

	- name: Install dependencies
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	run: ./metadata-ingestion/scripts/install_deps.sh

	- name: Build datahub cli
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	run: ./gradlew :metadata-ingestion:install

	- name: Pytest smoke tests
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	env:
	RUN_QUICKSTART: false
	DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
	CLEANUP_DATA: "false"
	TEST_STRATEGY: pytests
	BATCH_COUNT: ${{ matrix.batch_count }}
	BATCH_NUMBER: ${{ matrix.batch }}
	FILTERED_TESTS: ${{ steps.retry-check.outputs.filtered_tests_file \|\| '' }}
	run: \|
	if [[ -n "$FILTERED_TESTS" && -f "$FILTERED_TESTS" ]]; then
	echo "=========================================="
	echo "RETRY MODE: Running only failed pytest modules"
	echo "=========================================="
	echo "Failed modules to retry:"
	cat "$FILTERED_TESTS"
	echo "=========================================="
	elif (( ${{ github.run_attempt }} > 1 )); then
	echo "RETRY MODE: Running all tests (fallback)"
	fi
	echo "$DATAHUB_VERSION"
	./gradlew --stop
	./smoke-test/smoke.sh

	- name: Disk Check
	run: df -h . && docker images

	- name: Store logs
	if: failure()
	run: \|
	docker ps -a
	TEST_STRATEGY="-pytests-${{ matrix.batch }}"
	source .github/scripts/docker_logs.sh

	- name: Upload logs
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
	if: failure()
	with:
	name: docker-logs-pytests-${{ matrix.batch }}
	path: "docker_logs/*.log"
	retention-days: 5

	- name: Report test results
	if: (!cancelled())
	uses: ./.github/actions/report-test-results
	with:
	artifact-name: Test Results (smoke tests) pytests ${{ matrix.batch }}
	test-results-paths: \|
	*/junit..xml
	!/binary/
	junit-file-globs: \|
	*/junit..xml

	- name: Send failed test metrics to PostHog
	if: failure()
	continue-on-error: true
	env:
	POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
	POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
	GH_HEAD_REF: ${{ github.head_ref \|\| github.ref_name }}
	run: \|
	if [ -z "$POSTHOG_API_KEY" ]; then
	echo "⚠️ POSTHOG_API_KEY not configured, skipping test failure metrics"
	exit 0
	fi

	TEMP_DIR=$(mktemp -d)
	mkdir -p "$TEMP_DIR/test-results"
	find . -name "junit.*.xml" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null \|\| true

	python3 .github/scripts/send_failed_tests_to_posthog.py \
	--input-dir "$TEMP_DIR/test-results" \
	--posthog-api-key "$POSTHOG_API_KEY" \
	--posthog-host "${POSTHOG_HOST:-https://app.posthog.com}" \
	--repository "${{ github.repository }}" \
	--workflow-name "${{ github.workflow }}" \
	--branch "${GH_HEAD_REF}" \
	--run-id "${{ github.run_id }}" \
	--run-attempt "${{ github.run_attempt }}" \
	--batch "${{ matrix.batch }}" \
	--batch-count "${{ strategy.job-total }}" \
	--test-strategy "pytests"

	rm -rf "$TEMP_DIR"

	- name: Upload test results to Codecov
	if: ${{ !cancelled() }}
	uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
	with:
	token: ${{ secrets.CODECOV_TOKEN }}
	report_type: test_results
	override_branch: ${{ github.head_ref \|\| github.ref_name }}

	- uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
	if: ${{ github.ref == 'refs/heads/master' && matrix.batch == '0' }}
	with:
	path: ~/.cache/uv
	key: ${{ needs.setup.outputs.uv_cache_key }}

	- uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
	if: ${{ github.ref == 'refs/heads/master' && matrix.batch == '0' }}
	with:
	path: ~/.cache/yarn
	key: ${{ needs.setup.outputs.yarn_cache_key }}

	cypress_tests:
	name: Cypress Smoke Tests (Batch ${{ matrix.batch }}/${{ matrix.batch_count }})
	runs-on: ${{ needs.setup.outputs.test_runner_type }}
	needs: [setup, smoke_test_matrix, base_build]
	strategy:
	fail-fast: false
	matrix: ${{ fromJson(needs.smoke_test_matrix.outputs.cypress_matrix \|\| '{"include":[]}') }}
	if: ${{ needs.smoke_test_matrix.outputs.cypress_matrix != '' && needs.smoke_test_matrix.outputs.cypress_matrix != '{"include":[]}' }}
	env:
	MIXPANEL_API_SECRET: ${{ secrets.MIXPANEL_API_SECRET }}
	MIXPANEL_PROJECT_ID: ${{ secrets.MIXPANEL_PROJECT_ID }}
	steps:
	- name: Check out the repo
	uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7

	- uses: ./.github/actions/restore-dependency-caches
	with:
	uv_cache_key: ${{ needs.setup.outputs.uv_cache_key }}
	uv_cache_key_prefix: ${{ needs.setup.outputs.uv_cache_key_prefix }}
	yarn_cache_key: ${{ needs.setup.outputs.yarn_cache_key }}
	yarn_cache_key_prefix: ${{ needs.setup.outputs.yarn_cache_key_prefix }}

	- name: Set up JDK 21
	uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
	with:
	distribution: "zulu"
	java-version: 21

	- name: Free up disk space
	uses: ./.github/actions/free-disk-space

	- name: Set up Depot CLI
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
	uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1

	- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
	with:
	python-version: "3.11"
	cache: "pip"

	- name: Parse previous results for retry
	id: retry-check
	if: github.run_attempt > 1
	uses: ./.github/actions/smoke-test-retry
	with:
	test_strategy: cypress
	batch: ${{ matrix.batch }}
	run_id: ${{ github.run_id }}
	github_token: ${{ github.token }}

	- uses: gradle/actions/setup-gradle@0723195856401067f7a2779048b490ace7a47d7c # v5.0.2
	if: ${{ needs.setup.outputs.use_depot_cache != 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}

	- name: Login to DockerHub
	uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
	if: ${{ needs.setup.outputs.docker-login == 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
	with:
	username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
	password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}

	- name: Disk Space Analysis
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	run: \|
	echo "=== Disk Usage Overview ==="
	df -h
	echo -e "\n=== Docker Disk Usage ==="
	docker system df -v

	- name: Build images
	timeout-minutes: 60
	if: ${{ needs.setup.outputs.use_depot_cache != 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
	env:
	DOCKER_CACHE: GITHUB
	run: \|
	BUILD_TASK="${{ needs.setup.outputs.smoke_build_task \|\| ':docker:buildImagesQuickstartDebugConsumers' }}"
	echo "Using build task: $BUILD_TASK"
	./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }}
	docker images

	- name: Pull images from depot
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' && steps.retry-check.outputs.parse_result != 'all_passed' }}
	run: \|
	depot pull --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}"
	docker images

	- name: Disk Space Analysis
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	run: \|
	echo "=== Disk Usage Overview ==="
	df -h
	echo -e "\n=== Docker Disk Usage ==="
	docker system df -v

	- name: Run quickstart
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	env:
	DATAHUB_TELEMETRY_ENABLED: false
	DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
	DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_ACTIONS_IMAGE }}
	ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions"
	ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
	PROFILE_NAME: ${{ needs.setup.outputs.smoke_profile_name \|\| env.PROFILE_NAME }}
	run: \|
	echo "Using compose profile: $PROFILE_NAME"
	./smoke-test/run-quickstart.sh

	- name: Disk Check
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	run: df -h . && docker images

	- name: Disable ES Disk Threshold
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	run: \|
	curl -XPUT "http://localhost:9200/_cluster/settings" \
	-H 'Content-Type: application/json' -d'{
	"persistent": {
	"cluster": {
	"routing": {
	"allocation.disk.threshold_enabled": false
	}
	}
	}
	}'

	- name: Install dependencies
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	run: ./metadata-ingestion/scripts/install_deps.sh

	- name: Build datahub cli
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	run: ./gradlew :metadata-ingestion:install

	- name: Cypress smoke tests
	if: steps.retry-check.outputs.parse_result != 'all_passed'
	env:
	RUN_QUICKSTART: false
	DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
	CYPRESS_RECORD_KEY: ${{ secrets.CYPRESS_RECORD_KEY }}
	CLEANUP_DATA: "false"
	TEST_STRATEGY: cypress
	BATCH_COUNT: ${{ matrix.batch_count }}
	BATCH_NUMBER: ${{ matrix.batch }}
	FILTERED_TESTS: ${{ steps.retry-check.outputs.filtered_tests_file \|\| '' }}
	run: \|
	if [[ -n "$FILTERED_TESTS" && -f "$FILTERED_TESTS" ]]; then
	echo "=========================================="
	echo "RETRY MODE: Running only failed Cypress tests"
	echo "=========================================="
	echo "Failed tests to retry:"
	cat "$FILTERED_TESTS"
	echo "=========================================="
	elif (( ${{ github.run_attempt }} > 1 )); then
	echo "RETRY MODE: Running all tests (fallback)"
	fi
	echo "$DATAHUB_VERSION"
	./gradlew --stop
	./smoke-test/smoke.sh

	- name: Disk Check
	run: df -h . && docker images

	- name: Store logs
	if: failure()
	run: \|
	docker ps -a
	TEST_STRATEGY="-cypress-${{ matrix.batch }}"
	source .github/scripts/docker_logs.sh

	- name: Upload logs
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
	if: failure()
	with:
	name: docker-logs-cypress-${{ matrix.batch }}
	path: "docker_logs/*.log"
	retention-days: 5

	- name: Stage Cypress screenshots for artifact upload
	# Copy screenshots alongside the mochawesome JSONs so the post-matrix
	# cypress_html_report job can embed them into the unified HTML report.
	if: always()
	run: \|
	if [ -d smoke-test/tests/cypress/cypress/screenshots ]; then
	cp -r smoke-test/tests/cypress/cypress/screenshots \
	smoke-test/tests/cypress/build/mochawesome-report/screenshots
	fi

	- name: Upload Cypress mochawesome JSON results
	# cypress-mochawesome-reporter writes one JSON per spec to .jsons/ (a hidden subdir)
	# via the Cypress after:spec hook — survives Electron crashes. include-hidden-files is
	# required so upload-artifact picks up that dot-prefixed directory.
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
	if: always()
	with:
	name: cypress-mochawesome-json-${{ matrix.batch }}
	path: smoke-test/tests/cypress/build/mochawesome-report/
	include-hidden-files: true
	retention-days: 1

	- name: Upload screenshots
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
	if: failure()
	with:
	name: cypress-snapshots-cypress-${{ matrix.batch }}
	path: smoke-test/tests/cypress/cypress/screenshots/

	- name: Report test results
	if: (!cancelled())
	uses: ./.github/actions/report-test-results
	with:
	artifact-name: Test Results (smoke tests) cypress ${{ matrix.batch }}
	test-results-paths: \|
	*/smoke-test-results/cypress-test-.xml
	!/binary/
	junit-file-globs: \|
	*/smoke-test-results/cypress-test-.xml

	- name: Send failed test metrics to PostHog
	if: failure()
	continue-on-error: true
	env:
	POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
	POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
	GH_HEAD_REF: ${{ github.head_ref \|\| github.ref_name }}
	run: \|
	if [ -z "$POSTHOG_API_KEY" ]; then
	echo "⚠️ POSTHOG_API_KEY not configured, skipping test failure metrics"
	exit 0
	fi

	TEMP_DIR=$(mktemp -d)
	mkdir -p "$TEMP_DIR/test-results"
	find . -name "cypress-test-*.xml" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null \|\| true

	python3 .github/scripts/send_failed_tests_to_posthog.py \
	--input-dir "$TEMP_DIR/test-results" \
	--posthog-api-key "$POSTHOG_API_KEY" \
	--posthog-host "${POSTHOG_HOST:-https://app.posthog.com}" \
	--repository "${{ github.repository }}" \
	--workflow-name "${{ github.workflow }}" \
	--branch "${GH_HEAD_REF}" \
	--run-id "${{ github.run_id }}" \
	--run-attempt "${{ github.run_attempt }}" \
	--batch "${{ matrix.batch }}" \
	--batch-count "${{ strategy.job-total }}" \
	--test-strategy "cypress"

	rm -rf "$TEMP_DIR"

	- name: Upload test results to Codecov
	if: ${{ !cancelled() }}
	uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
	with:
	token: ${{ secrets.CODECOV_TOKEN }}
	report_type: test_results
	override_branch: ${{ github.head_ref \|\| github.ref_name }}

	- uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
	if: ${{ github.ref == 'refs/heads/master' && matrix.batch == '0' }}
	with:
	path: ~/.cache/uv
	key: ${{ needs.setup.outputs.uv_cache_key }}

	- uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
	if: ${{ github.ref == 'refs/heads/master' && matrix.batch == '0' }}
	with:
	path: ~/.cache/yarn
	key: ${{ needs.setup.outputs.yarn_cache_key }}

	cypress_html_report:
	name: Generate unified Cypress HTML report
	runs-on: ubuntu-latest
	needs: [cypress_tests]
	if: ${{ !cancelled() && needs.cypress_tests.result != 'skipped' }}
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	- name: Download all Cypress mochawesome JSON artifacts
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
	with:
	pattern: cypress-mochawesome-json-*
	path: mochawesome-results
	# Do NOT use merge-multiple: all batches write files with the same names
	# (mochawesome.json, mochawesome_001.json, ...). merge-multiple would cause
	# later batches to overwrite earlier ones in the same directory. Without it,
	# each artifact extracts to its own subdirectory:
	# mochawesome-results/cypress-mochawesome-json-0/.jsons/mochawesome.json
	# mochawesome-results/cypress-mochawesome-json-1/.jsons/mochawesome.json
	# ...
	- name: Generate HTML report
	run: \|
	# Each artifact extracts to its own subdirectory, so per-spec JSONs land at:
	# mochawesome-results/cypress-mochawesome-json-<N>/.jsons/mochawesome_NNN.json
	mapfile -t JSON_FILES < <(find mochawesome-results -path "/.jsons/.json" 2>/dev/null)
	if [[ ${#JSON_FILES[@]} -gt 0 ]]; then
	# Collect screenshots from all batch subdirectories into a flat dir.
	# enhanceReport resolves paths relative to this directory.
	mkdir -p combined-screenshots
	find mochawesome-results -mindepth 1 -maxdepth 1 -type d \| while read -r batch_dir; do
	if [ -d "$batch_dir/screenshots" ]; then
	cp -rn "$batch_dir/screenshots/." combined-screenshots/
	fi
	done
	printf '%s\n' "${JSON_FILES[@]}" > json-file-list.txt
	npm install --no-save \
	mochawesome-merge \
	mochawesome-report-generator \
	cypress-mochawesome-reporter \
	fs-extra
	node .github/scripts/generate-cypress-report.js
	else
	echo "No mochawesome JSON files found – all Cypress batches may have been skipped or crashed before any spec completed"
	fi
	- name: Upload unified Cypress HTML report
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
	with:
	name: cypress-html-report
	path: cypress-html-report/
	retention-days: 5

	playwright_test:
	name: Playwright E2E Tests (Shard ${{ matrix.shard }}/${{ matrix.shard_count }})
	runs-on: ${{ needs.setup.outputs.test_runner_type }}
	needs: [setup, base_build]
	timeout-minutes: 20
	strategy:
	fail-fast: false
	matrix: ${{ fromJson(needs.setup.outputs.playwright_matrix \|\| '{"include":[]}') }}
	if: ${{ always() && !failure() && !cancelled() && needs.setup.outputs.playwright_matrix != '' && needs.setup.outputs.playwright_matrix != '{"include":[]}' }}
	steps:
	- uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
	with:
	path: ~/.cache/yarn
	key: ${{ needs.setup.outputs.playwright_yarn_cache_key }}
	restore-keys: ${{ needs.setup.outputs.playwright_yarn_cache_key_prefix }}

	- name: Check out the repo
	uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7

	- name: Free up disk space
	uses: ./.github/actions/free-disk-space

	- name: Set up JDK 21
	uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
	with:
	distribution: "zulu"
	java-version: 21

	- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
	with:
	python-version: "3.11"
	cache: "pip"

	- name: Set up Node.js
	uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
	with:
	node-version: "22"

	- name: Set up Depot CLI
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
	uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1

	- name: Login to DockerHub
	uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
	if: ${{ needs.setup.outputs.docker-login == 'true' }}
	with:
	username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
	password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}

	- name: Build images (fork — no depot cache)
	timeout-minutes: 60
	if: ${{ needs.setup.outputs.use_depot_cache != 'true' }}
	run: \|
	BUILD_TASK="${{ needs.setup.outputs.smoke_build_task \|\| ':docker:buildImagesQuickstartDebugConsumers' }}"
	echo "Using build task: $BUILD_TASK"
	./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }}
	docker images
	env:
	DOCKER_CACHE: GITHUB

	- name: Pull images from depot
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
	run: \|
	depot pull --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}"
	docker images

	- name: Run quickstart
	env:
	DATAHUB_TELEMETRY_ENABLED: false
	DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
	SKIP_INSTALL_DEV: "true"
	ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions"
	ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
	# Must match smoke_test and base_build (e.g. smoke:quickstartPg → quickstart-postgres).
	# Default quickstart-consumers requires mae/mce images that are not built for pg-only profiles.
	PROFILE_NAME: ${{ needs.setup.outputs.smoke_profile_name \|\| env.PROFILE_NAME }}
	run: \|
	echo "Using compose profile: $PROFILE_NAME"
	./smoke-test/run-quickstart.sh

	- name: Disable ES Disk Threshold
	run: \|
	curl -XPUT "http://localhost:9200/_cluster/settings" \
	-H 'Content-Type: application/json' -d'{
	"persistent": {
	"cluster": {
	"routing": {
	"allocation.disk.threshold_enabled": false
	}
	}
	}
	}'

	- name: Install Playwright dependencies
	working-directory: e2e-test/ui/playwright
	run: \|
	yarn install --frozen-lockfile
	npx playwright install --with-deps chromium

	- name: Run Playwright tests
	working-directory: e2e-test/ui/playwright
	env:
	BASE_URL: "http://localhost:9002"
	PLAYWRIGHT_REUSE_SERVER: "1"
	run: \|
	npx playwright test \
	--shard=${{ matrix.shard }}/${{ matrix.shard_count }}

	- name: Upload Playwright blob report
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
	if: always()
	with:
	name: playwright-blob-report-${{ matrix.shard }}
	path: e2e-test/ui/playwright/blob-report/
	retention-days: 1

	- name: Upload Playwright JUnit results
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
	if: always()
	with:
	name: playwright-junit-${{ matrix.shard }}
	path: e2e-test/ui/playwright/test-results/junit.xml
	retention-days: 5

	- name: Store logs on failure
	if: failure()
	run: \|
	docker ps -a
	TEST_STRATEGY="-playwright-${{ matrix.shard }}"
	source .github/scripts/docker_logs.sh

	- name: Upload Docker logs on failure
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
	if: failure()
	with:
	name: docker-logs-playwright-${{ matrix.shard }}
	path: "docker_logs/*.log"
	retention-days: 5

	- name: Upload Playwright traces and screenshots on failure
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
	if: failure()
	with:
	name: playwright-traces-${{ matrix.shard }}
	path: e2e-test/ui/playwright/test-results/
	retention-days: 5

	- name: Report test results
	if: "!cancelled()"
	uses: ./.github/actions/report-test-results
	with:
	artifact-name: "Test Results (playwright) shard-${{ matrix.shard }}"
	test-results-paths: \|
	**/e2e-test/ui/playwright/test-results/junit.xml
	junit-file-globs: \|
	**/e2e-test/ui/playwright/test-results/junit.xml

	- name: Upload test results to Codecov
	if: "!cancelled()"
	uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
	with:
	token: ${{ secrets.CODECOV_TOKEN }}
	report_type: test_results
	override_branch: ${{ github.head_ref \|\| github.ref_name }}

	- uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
	if: ${{ github.ref == 'refs/heads/master' && matrix.shard == '1' }}
	with:
	path: ~/.cache/yarn
	key: ${{ needs.setup.outputs.playwright_yarn_cache_key }}

	playwright_report:
	name: Merge Playwright HTML report
	runs-on: ubuntu-latest
	needs: [setup, playwright_test]
	if: ${{ !cancelled() && needs.playwright_test.result != 'skipped' }}
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

	- name: Set up Node.js
	uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
	with:
	node-version: "22"

	- uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
	with:
	path: ~/.cache/yarn
	key: ${{ needs.setup.outputs.playwright_yarn_cache_key }}
	restore-keys: ${{ needs.setup.outputs.playwright_yarn_cache_key_prefix }}

	- name: Install Playwright dependencies
	working-directory: e2e-test/ui/playwright
	run: yarn install --frozen-lockfile

	- name: Download all Playwright blob reports
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
	with:
	pattern: playwright-blob-report-*
	path: playwright-blob-reports

	- name: Merge blob reports into unified HTML report
	run: \|
	# download-artifact v4+ places each artifact in its own subfolder under
	# playwright-blob-reports/. Flatten all .zip files into a single dir so
	# `playwright merge-reports` can find them.
	mkdir -p playwright-blobs-flat
	# Playwright blob filenames include the shard index, so cp overwrites are safe.
	find playwright-blob-reports -name "*.zip" -exec cp {} playwright-blobs-flat/ \;
	if compgen -G "playwright-blobs-flat/*.zip" > /dev/null 2>&1; then
	./e2e-test/ui/playwright/node_modules/.bin/playwright merge-reports --reporter html ./playwright-blobs-flat
	else
	echo "No blob reports found — all shards may have been skipped or crashed"
	exit 1
	fi

	- name: Upload merged Playwright HTML report
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
	with:
	name: playwright-html-report
	path: playwright-report/
	retention-days: 5

	- name: Configure AWS credentials for S3 report upload
	uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5
	if: always()
	continue-on-error: true
	with:
	role-to-assume: ${{ secrets.PLAYWRIGHT_REPORTS_OSS_ROLE_ARN }}
	aws-region: us-west-2

	- name: Publish Playwright report to S3
	id: publish-s3-report
	if: always()
	continue-on-error: true
	env:
	GH_WORKFLOW: ${{ github.workflow }}
	GH_REPOSITORY_OWNER: ${{ github.repository_owner }}
	GH_EVENT_REPOSITORY_NAME: ${{ github.event.repository.name }}
	GH_EVENT_NAME: ${{ github.event_name }}
	GH_RUN_ID: ${{ github.run_id }}
	GH_RUN_ATTEMPT: ${{ github.run_attempt }}
	run: \|
	if [[ ! -d playwright-report ]] \|\| [[ -z "$(ls -A playwright-report 2>/dev/null)" ]]; then
	echo "No playwright-report directory — skipping S3 upload"
	exit 0
	fi

	BUCKET="datahub-oss-ci-build-artifacts.dev.dh-int.zone"

	WORKFLOW_NAME=$(echo "$GH_WORKFLOW" \
	\| tr '[:upper:]' '[:lower:]' \
	\| sed 's/[^a-z0-9]/-/g' \
	\| sed -E 's/-+/-/g' \
	\| sed 's/^-//;s/-$//')
	S3_PATH="${GH_REPOSITORY_OWNER}/${GH_EVENT_REPOSITORY_NAME}/workflows/${GH_EVENT_NAME}/${GH_RUN_ID}_${GH_RUN_ATTEMPT}/${WORKFLOW_NAME}/playwright-report"

	aws s3 sync playwright-report/ "s3://${BUCKET}/${S3_PATH}/" \
	--cache-control "no-cache, no-store, must-revalidate" \
	--sse AES256 \
	--delete

	REPORT_URL="https://${BUCKET}/${S3_PATH}/index.html"
	echo "report_url=${REPORT_URL}" >> "$GITHUB_OUTPUT"
	echo "Published Playwright report to: ${REPORT_URL}"

	- name: Add report link to GitHub Summary
	if: ${{ always() && steps.publish-s3-report.outputs.report_url != '' }}
	run: \|
	{
	echo "## Playwright HTML Report"
	echo ""
	echo "\| Resource \| Link \|"
	echo "\|----------\|------\|"
	echo "\| HTML Report (incl. Traces) \| ${{ steps.publish-s3-report.outputs.report_url }} \|"
	echo ""
	echo "> [!NOTE]"
	echo "> Accessible from the DataHub VPN only."
	} >> "$GITHUB_STEP_SUMMARY"

	publish_images:
	name: Update quickstart tag after tests pass on master
	runs-on: ${{ needs.setup.outputs.test_runner_type_small \|\| 'ubuntu-latest' }}
	needs:
	[
	setup,
	base_build,
	java_integration_tests,
	pytest_tests,
	cypress_tests,
	playwright_test,
	]
	if: ${{ always() && !failure() && !cancelled() && needs.setup.result != 'skipped' && github.ref == 'refs/heads/master' }}
	steps:
	- name: Set up Depot CLI
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
	uses: depot/setup-action@15c09a5f77a0840ad4bce955686522a257853461 # v1

	- name: Login to DockerHub
	uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
	if: ${{ needs.setup.outputs.docker-login == 'true' }}
	with:
	username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
	password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}

	- name: Check out the repo
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' && needs.setup.outputs.publish == 'true' }}
	uses: acryldata/sane-checkout-action@186e92cc5948a9c3e1cc7a96eaff9f776f3fc8e3 # v7
	with:
	checkout-head-only: true

	- name: Download build Metadata
	if: ${{ needs.setup.outputs.publish == 'true' }}
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
	with:
	name: build-metadata-${{ needs.setup.outputs.tag }}
	path: ${{ github.workspace }}/build

	- name: Set up Docker Buildx
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' && needs.setup.outputs.publish == 'true' }}
	uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3

	- name: Tag images with quickstart (coordinated floating tag)
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' && needs.setup.outputs.publish == 'true' }}
	run: \|
	set -euo pipefail
	source .github/scripts/docker_helpers.sh
	# :sha-* images were pushed during the coordinated depot bake; add :quickstart here only.
	depot bake -f "${{ github.workspace }}/build/bake-spec-allImages.json" --print \| jq -cr '.target[].tags[]' \| while IFS= read -r image; do
	if [[ -z "$image" ]]; then
	continue
	fi
	if [[ "$image" != ":${SHA_TAG}" && "$image" != ":${SHA_TAG}-"* ]]; then
	continue
	fi
	quickstartImage="${image/${SHA_TAG}/${QUICKSTART_TAG}}"
	echo "Tagging $image as $quickstartImage"
	docker buildx imagetools create -t "$quickstartImage" "$image"
	done

	deploy_datahub_head:
	name: Deploy to Datahub HEAD
	runs-on: ubuntu-latest
	needs:
	[
	setup,
	java_integration_tests,
	pytest_tests,
	cypress_tests,
	playwright_test,
	publish_images,
	]
	if: ${{ needs.publish_images.result == 'success' && github.repository_owner == 'datahub-project' && needs.setup.outputs.repository_name == 'datahub' }}
	steps:
	- uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6.0.0
	with:
	aws-access-key-id: ${{ secrets.AWS_SQS_ACCESS_KEY_ID }}
	aws-secret-access-key: ${{ secrets.AWS_SQS_ACCESS_KEY }}
	aws-region: us-west-2
	- uses: isbang/sqs-action@7cdb8b5d1328c6af489ef4614fbafb364bf096ea # v0.2.0
	with:
	sqs-url: ${{ secrets.DATAHUB_HEAD_SYNC_QUEUE }}
	message: '{ "command": "git-sync", "args" : {"repoName": "${{ needs.setup.outputs.repository_name }}", "repoOrg": "${{ github.repository_owner }}", "repoBranch": "${{ needs.setup.outputs.branch_name }}", "repoShaShort": "${{ needs.setup.outputs.short_sha }}", "repoShaTag": "${{ needs.setup.outputs.tag }}" }}'

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat(i18n): translate onboarding to German (CAT-2326) #59254

Workflow file

feat(i18n): translate onboarding to German (CAT-2326) #59254

Uh oh!

Workflow file for this run