diff --git a/.github/ISSUE_TEMPLATE/bug-template.md b/.github/ISSUE_TEMPLATE/bug-template.md
index d33eec3cdea0..234d9b5a3782 100644
--- a/.github/ISSUE_TEMPLATE/bug-template.md
+++ b/.github/ISSUE_TEMPLATE/bug-template.md
@@ -3,6 +3,7 @@ name: Bug Template
 about: Used for describing bugs
 title: ''
 labels: t/bug
+type: Bug
 assignees: ''
 
 ---
diff --git a/.github/ISSUE_TEMPLATE/epic-template.md b/.github/ISSUE_TEMPLATE/epic-template.md
index c442f50fde18..868fd084f151 100644
--- a/.github/ISSUE_TEMPLATE/epic-template.md
+++ b/.github/ISSUE_TEMPLATE/epic-template.md
@@ -4,6 +4,7 @@ about: A set of related tasks contributing towards specific outcome, comprising
   more than 1 week of work.
 title: 'Epic: '
 labels: t/Epic
+type: Epic
 assignees: ''
 
 ---
diff --git a/.github/actionlint.yml b/.github/actionlint.yml
index ecff0cc70b22..2b96ce95da32 100644
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -27,3 +27,4 @@ config-variables:
   - SLACK_ON_CALL_QA_STAGING_STREAM
   - DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
   - SLACK_ON_CALL_STORAGE_STAGING_STREAM
+  - SLACK_CICD_CHANNEL_ID
diff --git a/.github/actions/neon-project-create/action.yml b/.github/actions/neon-project-create/action.yml
index 11f46bce8ef1..c9f6b0832eeb 100644
--- a/.github/actions/neon-project-create/action.yml
+++ b/.github/actions/neon-project-create/action.yml
@@ -41,7 +41,10 @@ inputs:
     description: 'Path to directory containing libpq library - it is caller responsibility to provision the libpq library'
     required: false
     default: '/tmp/neon/pg_install/v16/lib'
-  
+  project_settings:
+    description: 'A JSON object with project settings'
+    required: false
+    default: '{}'
 
 outputs:
   dsn:
@@ -73,7 +76,7 @@ runs:
               \"provisioner\": \"k8s-neonvm\",
               \"autoscaling_limit_min_cu\": ${MIN_CU},
               \"autoscaling_limit_max_cu\": ${MAX_CU},
-              \"settings\": { }
+              \"settings\": ${PROJECT_SETTINGS}
             }
           }")
 
@@ -92,12 +95,12 @@ runs:
         if [ "${SHARD_SPLIT_PROJECT}" = "true" ]; then
           # determine tenant ID
           TENANT_ID=`${PSQL} ${dsn} -t -A -c "SHOW neon.tenant_id"`
-          
+
           echo "Splitting project ${project_id} with tenant_id ${TENANT_ID} into $((SHARD_COUNT)) shards with stripe size $((STRIPE_SIZE))"
 
           echo "Sending PUT request to https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split"
           echo "with body {\"new_shard_count\": $((SHARD_COUNT)), \"new_stripe_size\": $((STRIPE_SIZE))}"
-          
+
           # we need an ADMIN API KEY to invoke storage controller API for shard splitting (bash -u above checks that the variable is set)
           curl -X PUT \
             "https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split" \
@@ -118,3 +121,4 @@ runs:
         STRIPE_SIZE: ${{ inputs.stripe_size }}
         PSQL: ${{ inputs.psql_path }}
         LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }}
+        PROJECT_SETTINGS: ${{ inputs.project_settings }}
diff --git a/.github/file-filters.yaml b/.github/file-filters.yaml
index 886cd3919ac2..02ee383d5ed3 100644
--- a/.github/file-filters.yaml
+++ b/.github/file-filters.yaml
@@ -1,4 +1,5 @@
 rust_code: ['**/*.rs', '**/Cargo.toml', '**/Cargo.lock']
+rust_dependencies: ['**/Cargo.lock']
 
 v14: ['vendor/postgres-v14/**', 'Makefile', 'pgxn/**']
 v15: ['vendor/postgres-v15/**', 'Makefile', 'pgxn/**']
diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml
index f97402a90b92..1dec8106b484 100644
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -267,6 +267,26 @@ jobs:
           path: /tmp/neon
           aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
 
+      - name: Check diesel schema
+        if: inputs.build-type == 'release' && inputs.arch == 'x64'
+        env:
+          DATABASE_URL: postgresql://localhost:1235/storage_controller
+          POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+        run: |
+          /tmp/neon/bin/neon_local init
+          /tmp/neon/bin/neon_local storage_controller start
+
+          diesel print-schema > storage_controller/src/schema.rs
+
+          if [ -n "$(git diff storage_controller/src/schema.rs)" ]; then
+            echo >&2 "Uncommitted changes in diesel schema"
+
+            git diff .
+            exit 1
+          fi
+
+          /tmp/neon/bin/neon_local storage_controller stop
+
       # XXX: keep this after the binaries.list is formed, so the coverage can properly work later
       - name: Merge and upload coverage data
         if: inputs.build-type == 'debug'
diff --git a/.github/workflows/_check-codestyle-rust.yml b/.github/workflows/_check-codestyle-rust.yml
index cbc47c640640..c4c76914aa64 100644
--- a/.github/workflows/_check-codestyle-rust.yml
+++ b/.github/workflows/_check-codestyle-rust.yml
@@ -16,6 +16,9 @@ defaults:
   run:
     shell: bash -euxo pipefail {0}
 
+# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
+permissions: {}
+
 jobs:
   check-codestyle-rust:
     strategy:
@@ -84,8 +87,3 @@ jobs:
         run: |
           cargo hakari generate --diff  # workspace-hack Cargo.toml is up-to-date
           cargo hakari manage-deps --dry-run  # all workspace crates depend on workspace-hack
-
-      # https://github.com/EmbarkStudios/cargo-deny
-      - name: Check rust licenses/bans/advisories/sources
-        if: ${{ !cancelled() }}
-        run: cargo deny check --hide-inclusion-graph
diff --git a/.github/workflows/approved-for-ci-run.yml b/.github/workflows/approved-for-ci-run.yml
index 0a0898d30c1c..fc2f36c74b89 100644
--- a/.github/workflows/approved-for-ci-run.yml
+++ b/.github/workflows/approved-for-ci-run.yml
@@ -94,7 +94,9 @@ jobs:
           echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
           echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}
 
-      - run: gh pr checkout "${PR_NUMBER}"
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
 
       - run: git checkout -b "${BRANCH}"
 
diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index 32747d825caa..b36ac46f3525 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -319,7 +319,7 @@ jobs:
                       { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "50gb","runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned-bookworm" },
                       { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
                       { "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier",       "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
-                      { "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }, 
+                      { "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
                       { "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
                       { "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
         }'
@@ -340,7 +340,7 @@ jobs:
           ],
           "pg_version" : [
             16,17
-          ],
+          ]
         }'
 
         if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
@@ -458,7 +458,7 @@ jobs:
 
         echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
 
-    # we want to compare Neon project OLTP throughput and latency at scale factor 10 GB 
+    # we want to compare Neon project OLTP throughput and latency at scale factor 10 GB
     # without (neonvm-captest-new)
     # and with (neonvm-captest-new-many-tables) many relations in the database
     - name: Create many relations before the run
@@ -590,36 +590,20 @@ jobs:
     steps:
     - uses: actions/checkout@v4
 
-    # until https://github.com/neondatabase/neon/issues/8275 is fixed we temporarily install postgresql-16
-    # instead of using Neon artifacts containing pgbench
-    - name: Install postgresql-16 where pytest expects it
-      run: |
-        # Just to make it easier to test things locally on macOS (with arm64)
-        arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')
-
-        cd /home/nonroot
-        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.2-1.pgdg120+1_${arch}.deb"
-        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.6-1.pgdg120+1_${arch}.deb"
-        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.6-1.pgdg120+1_${arch}.deb"
-        dpkg -x libpq5_17.2-1.pgdg120+1_${arch}.deb pg
-        dpkg -x postgresql-16_16.6-1.pgdg120+1_${arch}.deb pg
-        dpkg -x postgresql-client-16_16.6-1.pgdg120+1_${arch}.deb pg
-
-        mkdir -p /tmp/neon/pg_install/v16/bin
-        mkdir -p /tmp/neon/pg_install/v17/bin
-        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
-        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql    /tmp/neon/pg_install/v16/bin/psql
-        ln -s /home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu     /tmp/neon/pg_install/v16/lib
-        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v17/bin/pgbench
-        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql    /tmp/neon/pg_install/v17/bin/psql
-        ln -s /home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu     /tmp/neon/pg_install/v17/lib
-
-        LD_LIBRARY_PATH="/home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu:${LD_LIBRARY_PATH:-}"
-        export LD_LIBRARY_PATH
-        echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> ${GITHUB_ENV}
-
-        /tmp/neon/pg_install/v16/bin/pgbench --version
-        /tmp/neon/pg_install/v16/bin/psql --version
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours
+
+    - name: Download Neon artifact
+      uses: ./.github/actions/download
+      with:
+        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        path: /tmp/neon/
+        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
 
     - name: Set up Connection String
       id: set-up-connstr
@@ -642,13 +626,6 @@ jobs:
 
         echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
 
-    - name: Configure AWS credentials
-      uses: aws-actions/configure-aws-credentials@v4
-      with:
-        aws-region: eu-central-1
-        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-        role-duration-seconds: 18000 # 5 hours
-
     - name: Benchmark pgvector hnsw indexing
       uses: ./.github/actions/run-python-test-set
       with:
@@ -764,10 +741,10 @@ jobs:
           neonvm-captest-reuse)
             case "${PG_VERSION}" in
               16)
-                CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR_V16 }}
+                CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
                 ;;
               17)
-                CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR_PG17 }}
+                CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_CONNSTR_PG17 }}
                 ;;
               *)
                 echo >&2 "Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}"
@@ -833,7 +810,7 @@ jobs:
     # We might change it after https://github.com/neondatabase/neon/issues/2900.
     #
     # *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
-    if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
+    # if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
     permissions:
       contents: write
       statuses: write
@@ -887,7 +864,7 @@ jobs:
                 CONNSTR_SECRET_NAME="BENCHMARK_CAPTEST_TPCH_S10_CONNSTR"
                 ;;
               17)
-                CONNSTR_SECRET_NAME="BENCHMARK_CAPTEST_CONNSTR_PG17"
+                CONNSTR_SECRET_NAME="BENCHMARK_CAPTEST_TPCH_CONNSTR_PG17"
                 ;;
               *)
                 echo >&2 "Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}"
@@ -906,7 +883,7 @@ jobs:
             exit 1
             ;;
         esac
-        
+
         echo "CONNSTR_SECRET_NAME=${CONNSTR_SECRET_NAME}" >> $GITHUB_ENV
 
     - name: Set up Connection String
@@ -952,7 +929,7 @@ jobs:
         SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
 
   user-examples-compare:
-    if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
+    # if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
     permissions:
       contents: write
       statuses: write
@@ -1007,7 +984,7 @@ jobs:
                 CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
                 ;;
               17)
-                CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR_PG17 }}
+                CONNSTR=${{ secrets.BENCHMARK_CAPTEST_USER_EXAMPLE_CONNSTR_PG17 }}
                 ;;
               *)
                 echo >&2 "Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}"
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index e588fc5a0e88..5a4bdecb994e 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -45,6 +45,26 @@ jobs:
             run cancel-previous-in-concurrency-group.yml \
               --field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
 
+  files-changed:
+    needs: [ check-permissions ]
+    runs-on: [ self-hosted, small ]
+    timeout-minutes: 3
+    outputs:
+      check-rust-dependencies: ${{ steps.files-changed.outputs.rust_dependencies }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: Check for file changes
+        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36  # v3.0.2
+        id: files-changed
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          filters: .github/file-filters.yaml
+
   tag:
     needs: [ check-permissions ]
     runs-on: [ self-hosted, small ]
@@ -170,6 +190,14 @@ jobs:
       archs: '["x64", "arm64"]'
     secrets: inherit
 
+  check-dependencies-rust:
+    needs: [ files-changed, build-build-tools-image ]
+    if: ${{ needs.files-changed.outputs.check-rust-dependencies == 'true' }}
+    uses: ./.github/workflows/cargo-deny.yml
+    with:
+      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
+    secrets: inherit
+
   build-and-test-locally:
     needs: [ tag, build-build-tools-image ]
     strategy:
@@ -654,7 +682,7 @@ jobs:
           push: true
           pull: true
           file: compute/compute-node.Dockerfile
-          target: neon-pg-ext-test
+          target: extension-tests
           cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
           tags: |
             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
@@ -1332,6 +1360,8 @@ jobs:
       - build-and-test-locally
       - check-codestyle-python
       - check-codestyle-rust
+      - check-dependencies-rust
+      - files-changed
       - promote-images-dev
       - test-images
       - trigger-custom-extensions-build-and-wait
@@ -1344,4 +1374,11 @@ jobs:
         if: |
           contains(needs.*.result, 'failure')
           || contains(needs.*.result, 'cancelled')
-          || contains(needs.*.result, 'skipped')
+          || (needs.check-dependencies-rust.result == 'skipped' && needs.files-changed.outputs.check-rust-dependencies == 'true')
+          || needs.build-and-test-locally.result == 'skipped'
+          || needs.check-codestyle-python.result == 'skipped'
+          || needs.check-codestyle-rust.result == 'skipped'
+          || needs.files-changed.result == 'skipped'
+          || needs.promote-images-dev.result == 'skipped'
+          || needs.test-images.result == 'skipped'
+          || needs.trigger-custom-extensions-build-and-wait.result == 'skipped'
diff --git a/.github/workflows/cargo-deny.yml b/.github/workflows/cargo-deny.yml
new file mode 100644
index 000000000000..433b377c327e
--- /dev/null
+++ b/.github/workflows/cargo-deny.yml
@@ -0,0 +1,57 @@
+name: cargo deny checks
+
+on:
+  workflow_call:
+    inputs:
+      build-tools-image:
+        required: false
+        type: string
+  schedule:
+    - cron: '0 0 * * *'
+
+jobs:
+  cargo-deny:
+    strategy:
+      matrix:
+        ref: >-
+          ${{
+            fromJSON(
+              github.event_name == 'schedule'
+                && '["main","release","release-proxy","release-compute"]'
+                || format('["{0}"]', github.sha)
+            )
+          }}
+
+    runs-on: [self-hosted, small]
+
+    container:
+      image: ${{ inputs.build-tools-image || 'neondatabase/build-tools:pinned' }}
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+      options: --init
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ matrix.ref }}
+
+      - name: Check rust licenses/bans/advisories/sources
+        env:
+          CARGO_DENY_TARGET: >-
+            ${{ github.event_name == 'schedule' && 'advisories' || 'all' }}
+        run: cargo deny check --hide-inclusion-graph $CARGO_DENY_TARGET
+
+      - name: Post to a Slack channel
+        if: ${{ github.event_name == 'schedule' && failure() }}
+        uses: slackapi/slack-github-action@v2
+        with:
+          method: chat.postMessage
+          token: ${{ secrets.SLACK_BOT_TOKEN }}
+          payload: |
+            channel: ${{ vars.SLACK_CICD_CHANNEL_ID }}
+            text: |
+              Periodic cargo-deny on ${{ matrix.ref }}: ${{ job.status }}
+              <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
+              Pinging @oncall-devprod.
diff --git a/.github/workflows/pg-clients.yml b/.github/workflows/pg-clients.yml
index 4947907eb068..abc90c7fe1a7 100644
--- a/.github/workflows/pg-clients.yml
+++ b/.github/workflows/pg-clients.yml
@@ -12,8 +12,8 @@ on:
   pull_request:
     paths:
       - '.github/workflows/pg-clients.yml'
-      - 'test_runner/pg_clients/**'
-      - 'test_runner/logical_repl/**'
+      - 'test_runner/pg_clients/**/*.py'
+      - 'test_runner/logical_repl/**/*.py'
       - 'poetry.lock'
   workflow_dispatch:
 
@@ -104,6 +104,8 @@ jobs:
         with:
           api_key: ${{ secrets.NEON_STAGING_API_KEY }}
           postgres_version: ${{ env.DEFAULT_PG_VERSION }}
+          project_settings: >-
+            {"enable_logical_replication": true}
 
       - name: Run tests
         uses: ./.github/actions/run-python-test-set
diff --git a/.github/workflows/pre-merge-checks.yml b/.github/workflows/pre-merge-checks.yml
index e6dfbaeed871..c47b3fe0debb 100644
--- a/.github/workflows/pre-merge-checks.yml
+++ b/.github/workflows/pre-merge-checks.yml
@@ -59,7 +59,10 @@ jobs:
           echo "${RUST_CHANGED_FILES}"
 
   build-build-tools-image:
-    if: needs.get-changed-files.outputs.python-changed == 'true'
+    if: |
+      false
+      || needs.get-changed-files.outputs.python-changed == 'true'
+      || needs.get-changed-files.outputs.rust-changed == 'true'
     needs: [ get-changed-files ]
     uses: ./.github/workflows/build-build-tools-image.yml
     with:
@@ -92,7 +95,8 @@ jobs:
   # - conclusion
   # - neon-cloud-e2e
   conclusion:
-    if: always()
+    # Do not run job on Pull Requests as it interferes with the `conclusion` job from the `build_and_test` workflow
+    if: always() && github.event_name == 'merge_group'
     permissions:
       statuses: write # for `github.repos.createCommitStatus(...)`
       contents: write
@@ -124,6 +128,8 @@ jobs:
       - name: Fail the job if any of the dependencies do not succeed or skipped
         run: exit 1
         if: |
-          (contains(needs.check-codestyle-python.result, 'skipped') && needs.get-changed-files.outputs.python-changed == 'true')
+          false
+          || (needs.check-codestyle-python.result == 'skipped' && needs.get-changed-files.outputs.python-changed == 'true')
+          || (needs.check-codestyle-rust.result   == 'skipped' && needs.get-changed-files.outputs.rust-changed   == 'true')
           || contains(needs.*.result, 'failure')
           || contains(needs.*.result, 'cancelled')
diff --git a/Cargo.lock b/Cargo.lock
index 9ba90355df19..de1b1218cada 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -206,6 +206,16 @@ dependencies = [
  "syn 2.0.90",
 ]
 
+[[package]]
+name = "assert-json-diff"
+version = "2.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "async-channel"
 version = "1.9.0"
@@ -290,9 +300,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 
 [[package]]
 name = "aws-config"
-version = "1.5.10"
+version = "1.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b49afaa341e8dd8577e1a2200468f98956d6eda50bcf4a53246cc00174ba924"
+checksum = "dc47e70fc35d054c8fcd296d47a61711f043ac80534a10b4f741904f81e73a90"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -301,7 +311,7 @@ dependencies = [
  "aws-sdk-sts",
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-json 0.60.7",
+ "aws-smithy-json",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -332,9 +342,9 @@ dependencies = [
 
 [[package]]
 name = "aws-runtime"
-version = "1.4.4"
+version = "1.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5ac934720fbb46206292d2c75b57e67acfc56fe7dfd34fb9a02334af08409ea"
+checksum = "bee7643696e7fdd74c10f9eb42848a87fe469d35eae9c3323f80aa98f350baac"
 dependencies = [
  "aws-credential-types",
  "aws-sigv4",
@@ -366,7 +376,7 @@ dependencies = [
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-json 0.61.1",
+ "aws-smithy-json",
  "aws-smithy-query",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
@@ -389,7 +399,7 @@ dependencies = [
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-json 0.61.1",
+ "aws-smithy-json",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -414,7 +424,7 @@ dependencies = [
  "aws-smithy-checksums",
  "aws-smithy-eventstream",
  "aws-smithy-http",
- "aws-smithy-json 0.61.1",
+ "aws-smithy-json",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -437,15 +447,15 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sso"
-version = "1.50.0"
+version = "1.57.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05ca43a4ef210894f93096039ef1d6fa4ad3edfabb3be92b80908b9f2e4b4eab"
+checksum = "c54bab121fe1881a74c338c5f723d1592bf3b53167f80268a1274f404e1acc38"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-json 0.61.1",
+ "aws-smithy-json",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -459,15 +469,15 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-ssooidc"
-version = "1.51.0"
+version = "1.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "abaf490c2e48eed0bb8e2da2fb08405647bd7f253996e0f93b981958ea0f73b0"
+checksum = "8c8234fd024f7ac61c4e44ea008029bde934250f371efe7d4a39708397b1080c"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-json 0.61.1",
+ "aws-smithy-json",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -481,15 +491,15 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sts"
-version = "1.51.0"
+version = "1.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b68fde0d69c8bfdc1060ea7da21df3e39f6014da316783336deff0a9ec28f4bf"
+checksum = "ba60e1d519d6f23a9df712c04fdeadd7872ac911c84b2f62a8bda92e129b7962"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-json 0.61.1",
+ "aws-smithy-json",
  "aws-smithy-query",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
@@ -504,9 +514,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sigv4"
-version = "1.2.6"
+version = "1.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d3820e0c08d0737872ff3c7c1f21ebbb6693d832312d6152bf18ef50a5471c2"
+checksum = "690118821e46967b3c4501d67d7d52dd75106a9c54cf36cefa1985cedbe94e05"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-eventstream",
@@ -533,9 +543,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-async"
-version = "1.2.1"
+version = "1.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62220bc6e97f946ddd51b5f1361f78996e704677afc518a4ff66b7a72ea1378c"
+checksum = "fa59d1327d8b5053c54bf2eaae63bf629ba9e904434d0835a28ed3c0ed0a614e"
 dependencies = [
  "futures-util",
  "pin-project-lite",
@@ -565,9 +575,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-eventstream"
-version = "0.60.5"
+version = "0.60.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cef7d0a272725f87e51ba2bf89f8c21e4df61b9e49ae1ac367a6d69916ef7c90"
+checksum = "8b18559a41e0c909b77625adf2b8c50de480a8041e5e4a3f5f7d177db70abc5a"
 dependencies = [
  "aws-smithy-types",
  "bytes",
@@ -576,9 +586,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http"
-version = "0.60.11"
+version = "0.60.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c8bc3e8fdc6b8d07d976e301c02fe553f72a39b7a9fea820e023268467d7ab6"
+checksum = "7809c27ad8da6a6a68c454e651d4962479e81472aa19ae99e59f9aba1f9713cc"
 dependencies = [
  "aws-smithy-eventstream",
  "aws-smithy-runtime-api",
@@ -597,18 +607,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-json"
-version = "0.60.7"
+version = "0.61.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4683df9469ef09468dad3473d129960119a0d3593617542b7d52086c8486f2d6"
-dependencies = [
- "aws-smithy-types",
-]
-
-[[package]]
-name = "aws-smithy-json"
-version = "0.61.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ee4e69cc50921eb913c6b662f8d909131bb3e6ad6cb6090d3a39b66fc5c52095"
+checksum = "623a51127f24c30776c8b374295f2df78d92517386f77ba30773f15a30ce1422"
 dependencies = [
  "aws-smithy-types",
 ]
@@ -625,9 +626,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime"
-version = "1.7.4"
+version = "1.7.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9f20685047ca9d6f17b994a07f629c813f08b5bce65523e47124879e60103d45"
+checksum = "865f7050bbc7107a6c98a397a9fcd9413690c27fa718446967cf03b2d3ac517e"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-http",
@@ -669,9 +670,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-types"
-version = "1.2.9"
+version = "1.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fbd94a32b3a7d55d3806fe27d98d3ad393050439dd05eb53ece36ec5e3d3510"
+checksum = "a28f6feb647fb5e0d5b50f0472c19a7db9462b74e2fec01bb0b44eedcc834e97"
 dependencies = [
  "base64-simd",
  "bytes",
@@ -704,9 +705,9 @@ dependencies = [
 
 [[package]]
 name = "aws-types"
-version = "1.3.3"
+version = "1.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5221b91b3e441e6675310829fd8984801b772cb1546ef6c0e54dec9f1ac13fef"
+checksum = "b0df5a18c4f951c645300d365fec53a61418bcf4650f604f85fe2a665bfaa0c2"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-async",
@@ -978,7 +979,7 @@ version = "0.70.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.8.0",
  "cexpr",
  "clang-sys",
  "itertools 0.12.1",
@@ -1006,9 +1007,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.4.1"
+version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
+checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
 
 [[package]]
 name = "block-buffer"
@@ -1019,6 +1020,12 @@ dependencies = [
  "generic-array",
 ]
 
+[[package]]
+name = "boxcar"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2721c3c5a6f0e7f7e607125d963fedeb765f545f67adc9d71ed934693881eb42"
+
 [[package]]
 name = "bstr"
 version = "1.5.0"
@@ -1225,6 +1232,20 @@ version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7"
 
+[[package]]
+name = "clashmap"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93bd59c81e2bd87a775ae2de75f070f7e2bfe97363a6ad652f46824564c23e4d"
+dependencies = [
+ "crossbeam-utils",
+ "hashbrown 0.15.2",
+ "lock_api",
+ "parking_lot_core 0.9.8",
+ "polonius-the-crab",
+ "replace_with",
+]
+
 [[package]]
 name = "colorchoice"
 version = "1.0.0"
@@ -1312,7 +1333,7 @@ dependencies = [
  "tar",
  "thiserror 1.0.69",
  "tokio",
- "tokio-postgres 0.7.9",
+ "tokio-postgres",
  "tokio-stream",
  "tokio-util",
  "tower 0.5.2",
@@ -1421,7 +1442,7 @@ dependencies = [
  "storage_broker",
  "thiserror 1.0.69",
  "tokio",
- "tokio-postgres 0.7.9",
+ "tokio-postgres",
  "tokio-util",
  "toml",
  "toml_edit",
@@ -1561,7 +1582,7 @@ version = "0.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.8.0",
  "crossterm_winapi",
  "libc",
  "parking_lot 0.12.1",
@@ -1792,7 +1813,7 @@ version = "2.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ccf1bedf64cdb9643204a36dd15b19a6ce8e7aa7f7b105868e9f1fad5ffa7d12"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.8.0",
  "byteorder",
  "chrono",
  "diesel_derives",
@@ -1812,7 +1833,7 @@ dependencies = [
  "futures-util",
  "scoped-futures",
  "tokio",
- "tokio-postgres 0.7.12",
+ "tokio-postgres",
 ]
 
 [[package]]
@@ -2428,6 +2449,16 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "gettid"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "397256552fed4a9e577850498071831ec8f18ea83368aecc114cab469dcb43e5"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "gimli"
 version = "0.31.1"
@@ -2556,6 +2587,12 @@ dependencies = [
  "allocator-api2",
 ]
 
+[[package]]
+name = "hashbrown"
+version = "0.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
+
 [[package]]
 name = "hashlink"
 version = "0.9.1"
@@ -2606,6 +2643,15 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46"
 
+[[package]]
+name = "higher-kinded-types"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "561985554c8b8d4808605c90a5f1979cc6c31a5d20b78465cd59501233c6678e"
+dependencies = [
+ "never-say-never",
+]
+
 [[package]]
 name = "hmac"
 version = "0.12.1"
@@ -3084,11 +3130,11 @@ dependencies = [
 
 [[package]]
 name = "inotify"
-version = "0.9.6"
+version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff"
+checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags 2.8.0",
  "inotify-sys",
  "libc",
 ]
@@ -3265,9 +3311,9 @@ dependencies = [
 
 [[package]]
 name = "kqueue"
-version = "1.0.7"
+version = "1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c8fc60ba15bf51257aa9807a48a61013db043fcf3a78cb0d916e8e396dcad98"
+checksum = "7447f1ca1b7b563588a205fe93dea8df60fd981423a768bc1c0ded35ed147d0c"
 dependencies = [
  "kqueue-sys",
  "libc",
@@ -3275,9 +3321,9 @@ dependencies = [
 
 [[package]]
 name = "kqueue-sys"
-version = "1.0.3"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8367585489f01bc55dd27404dcf56b95e6da061a256a666ab23be9ba96a2e587"
+checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b"
 dependencies = [
  "bitflags 1.3.2",
  "libc",
@@ -3304,9 +3350,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.167"
+version = "0.2.169"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc"
+checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
 
 [[package]]
 name = "libloading"
@@ -3553,14 +3599,14 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "0.8.11"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
+checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
 dependencies = [
  "libc",
  "log",
  "wasi 0.11.0+wasi-snapshot-preview1",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -3569,6 +3615,12 @@ version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
 
+[[package]]
+name = "never-say-never"
+version = "6.6.666"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf5a574dadd7941adeaa71823ecba5e28331b8313fb2e1c6a5c7e5981ea53ad6"
+
 [[package]]
 name = "nix"
 version = "0.25.1"
@@ -3600,7 +3652,7 @@ version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.8.0",
  "cfg-if",
  "libc",
  "memoffset 0.9.0",
@@ -3618,12 +3670,11 @@ dependencies = [
 
 [[package]]
 name = "notify"
-version = "6.1.1"
+version = "8.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d"
+checksum = "2fee8403b3d66ac7b26aee6e40a897d85dc5ce26f44da36b8b73e987cc52e943"
 dependencies = [
- "bitflags 2.4.1",
- "crossbeam-channel",
+ "bitflags 2.8.0",
  "filetime",
  "fsevent-sys",
  "inotify",
@@ -3631,10 +3682,17 @@ dependencies = [
  "libc",
  "log",
  "mio",
+ "notify-types",
  "walkdir",
- "windows-sys 0.48.0",
+ "windows-sys 0.59.0",
 ]
 
+[[package]]
+name = "notify-types"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e0826a989adedc2a244799e823aece04662b66609d96af8dff7ac6df9a8925d"
+
 [[package]]
 name = "ntapi"
 version = "0.4.1"
@@ -4060,8 +4118,8 @@ dependencies = [
  "pageserver_compaction",
  "pin-project-lite",
  "postgres",
- "postgres-protocol 0.6.6",
- "postgres-types 0.2.6",
+ "postgres-protocol",
+ "postgres-types",
  "postgres_backend",
  "postgres_connection",
  "postgres_ffi",
@@ -4092,7 +4150,7 @@ dependencies = [
  "tokio",
  "tokio-epoll-uring",
  "tokio-io-timeout",
- "tokio-postgres 0.7.9",
+ "tokio-postgres",
  "tokio-stream",
  "tokio-tar",
  "tokio-util",
@@ -4150,7 +4208,7 @@ dependencies = [
  "serde",
  "thiserror 1.0.69",
  "tokio",
- "tokio-postgres 0.7.9",
+ "tokio-postgres",
  "tokio-stream",
  "tokio-util",
  "utils",
@@ -4180,6 +4238,16 @@ dependencies = [
  "workspace_hack",
 ]
 
+[[package]]
+name = "papaya"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc7c76487f7eaa00a0fc1d7f88dc6b295aec478d11b0fc79f857b62c2874124c"
+dependencies = [
+ "equivalent",
+ "seize",
+]
+
 [[package]]
 name = "parking"
 version = "2.1.1"
@@ -4446,48 +4514,40 @@ dependencies = [
  "plotters-backend",
 ]
 
+[[package]]
+name = "polonius-the-crab"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e97ca2c89572ae41bbec1c99498251f87dd5a94e500c5ec19c382dd593dd5ce9"
+dependencies = [
+ "higher-kinded-types",
+ "never-say-never",
+]
+
 [[package]]
 name = "postgres"
-version = "0.19.6"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
+version = "0.19.7"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
 dependencies = [
  "bytes",
  "fallible-iterator",
  "futures-util",
  "log",
  "tokio",
- "tokio-postgres 0.7.9",
+ "tokio-postgres",
 ]
 
 [[package]]
 name = "postgres-protocol"
 version = "0.6.6"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
-dependencies = [
- "base64 0.21.1",
- "byteorder",
- "bytes",
- "fallible-iterator",
- "hmac",
- "lazy_static",
- "md-5",
- "memchr",
- "rand 0.8.5",
- "sha2",
- "stringprep",
-]
-
-[[package]]
-name = "postgres-protocol"
-version = "0.6.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "acda0ebdebc28befa84bee35e651e4c5f09073d668c7aed4cf7e23c3cda84b23"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
 dependencies = [
  "base64 0.22.1",
  "byteorder",
  "bytes",
  "fallible-iterator",
  "hmac",
+ "lazy_static",
  "md-5",
  "memchr",
  "rand 0.8.5",
@@ -4514,23 +4574,12 @@ dependencies = [
 [[package]]
 name = "postgres-types"
 version = "0.2.6"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
 dependencies = [
  "bytes",
  "chrono",
  "fallible-iterator",
- "postgres-protocol 0.6.6",
-]
-
-[[package]]
-name = "postgres-types"
-version = "0.2.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f66ea23a2d0e5734297357705193335e0a957696f34bed2f2faefacb2fec336f"
-dependencies = [
- "bytes",
- "fallible-iterator",
- "postgres-protocol 0.6.7",
+ "postgres-protocol",
 ]
 
 [[package]]
@@ -4555,7 +4604,7 @@ dependencies = [
  "serde",
  "thiserror 1.0.69",
  "tokio",
- "tokio-postgres 0.7.9",
+ "tokio-postgres",
  "tokio-postgres-rustls",
  "tokio-rustls 0.26.0",
  "tokio-util",
@@ -4570,7 +4619,7 @@ dependencies = [
  "itertools 0.10.5",
  "once_cell",
  "postgres",
- "tokio-postgres 0.7.9",
+ "tokio-postgres",
  "url",
 ]
 
@@ -4664,7 +4713,7 @@ dependencies = [
  "byteorder",
  "bytes",
  "itertools 0.10.5",
- "postgres-protocol 0.6.6",
+ "postgres-protocol",
  "rand 0.8.5",
  "serde",
  "thiserror 1.0.69",
@@ -4705,7 +4754,7 @@ version = "0.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.8.0",
  "chrono",
  "flate2",
  "hex",
@@ -4720,7 +4769,7 @@ version = "0.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.8.0",
  "chrono",
  "hex",
 ]
@@ -4826,6 +4875,7 @@ dependencies = [
  "ahash",
  "anyhow",
  "arc-swap",
+ "assert-json-diff",
  "async-compression",
  "async-trait",
  "atomic-take",
@@ -4833,15 +4883,16 @@ dependencies = [
  "aws-sdk-iam",
  "aws-sigv4",
  "base64 0.13.1",
+ "boxcar",
  "bstr",
  "bytes",
  "camino",
  "camino-tempfile",
  "chrono",
  "clap",
+ "clashmap",
  "compute_api",
  "consumption_metrics",
- "dashmap 5.5.0",
  "ecdsa 0.16.9",
  "ed25519-dalek",
  "env_logger 0.10.2",
@@ -4849,6 +4900,7 @@ dependencies = [
  "flate2",
  "framed-websockets",
  "futures",
+ "gettid",
  "hashbrown 0.14.5",
  "hashlink",
  "hex",
@@ -4871,7 +4923,9 @@ dependencies = [
  "measured",
  "metrics",
  "once_cell",
+ "opentelemetry",
  "p256 0.13.2",
+ "papaya",
  "parking_lot 0.12.1",
  "parquet",
  "parquet_derive",
@@ -4912,12 +4966,15 @@ dependencies = [
  "tikv-jemalloc-ctl",
  "tikv-jemallocator",
  "tokio",
- "tokio-postgres 0.7.9",
+ "tokio-postgres",
  "tokio-postgres2",
  "tokio-rustls 0.26.0",
  "tokio-tungstenite 0.21.0",
  "tokio-util",
  "tracing",
+ "tracing-log",
+ "tracing-opentelemetry",
+ "tracing-serde",
  "tracing-subscriber",
  "tracing-utils",
  "try-lock",
@@ -5249,6 +5306,12 @@ dependencies = [
  "utils",
 ]
 
+[[package]]
+name = "replace_with"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3a8614ee435691de62bcffcf4a66d91b3594bf1428a5722e79103249a095690"
+
 [[package]]
 name = "reqwest"
 version = "0.12.4"
@@ -5528,7 +5591,7 @@ version = "0.38.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.8.0",
  "errno",
  "libc",
  "linux-raw-sys 0.4.14",
@@ -5700,7 +5763,7 @@ dependencies = [
  "pageserver_api",
  "parking_lot 0.12.1",
  "postgres",
- "postgres-protocol 0.6.6",
+ "postgres-protocol",
  "postgres_backend",
  "postgres_ffi",
  "pprof",
@@ -5724,7 +5787,7 @@ dependencies = [
  "tikv-jemallocator",
  "tokio",
  "tokio-io-timeout",
- "tokio-postgres 0.7.9",
+ "tokio-postgres",
  "tokio-stream",
  "tokio-tar",
  "tokio-util",
@@ -5865,6 +5928,16 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "seize"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d84b0c858bdd30cb56f5597f8b3bf702ec23829e652cc636a1e5a7b9de46ae93"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "semver"
 version = "1.0.17"
@@ -6341,6 +6414,8 @@ dependencies = [
  "rand 0.8.5",
  "reqwest",
  "routerify",
+ "rustls 0.23.18",
+ "rustls-native-certs 0.8.0",
  "scoped-futures",
  "scopeguard",
  "serde",
@@ -6349,6 +6424,8 @@ dependencies = [
  "strum_macros",
  "thiserror 1.0.69",
  "tokio",
+ "tokio-postgres",
+ "tokio-postgres-rustls",
  "tokio-util",
  "tracing",
  "utils",
@@ -6394,7 +6471,7 @@ dependencies = [
  "serde_json",
  "storage_controller_client",
  "tokio",
- "tokio-postgres 0.7.9",
+ "tokio-postgres",
  "tokio-postgres-rustls",
  "tokio-stream",
  "tokio-util",
@@ -6591,7 +6668,7 @@ dependencies = [
  "fastrand 2.2.0",
  "once_cell",
  "rustix",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -6803,21 +6880,20 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.38.1"
+version = "1.43.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df"
+checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e"
 dependencies = [
  "backtrace",
  "bytes",
  "libc",
  "mio",
- "num_cpus",
  "parking_lot 0.12.1",
  "pin-project-lite",
  "signal-hook-registry",
  "socket2",
  "tokio-macros",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -6848,9 +6924,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-macros"
-version = "2.3.0"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -6859,34 +6935,8 @@ dependencies = [
 
 [[package]]
 name = "tokio-postgres"
-version = "0.7.9"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
-dependencies = [
- "async-trait",
- "byteorder",
- "bytes",
- "fallible-iterator",
- "futures-channel",
- "futures-util",
- "log",
- "parking_lot 0.12.1",
- "percent-encoding",
- "phf",
- "pin-project-lite",
- "postgres-protocol 0.6.6",
- "postgres-types 0.2.6",
- "rand 0.8.5",
- "socket2",
- "tokio",
- "tokio-util",
- "whoami",
-]
-
-[[package]]
-name = "tokio-postgres"
-version = "0.7.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b5d3742945bc7d7f210693b0c58ae542c6fd47b17adbbda0885f3dcb34a6bdb"
+version = "0.7.10"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
 dependencies = [
  "async-trait",
  "byteorder",
@@ -6899,8 +6949,8 @@ dependencies = [
  "percent-encoding",
  "phf",
  "pin-project-lite",
- "postgres-protocol 0.6.7",
- "postgres-types 0.2.8",
+ "postgres-protocol",
+ "postgres-types",
  "rand 0.8.5",
  "socket2",
  "tokio",
@@ -6917,7 +6967,7 @@ dependencies = [
  "ring",
  "rustls 0.23.18",
  "tokio",
- "tokio-postgres 0.7.9",
+ "tokio-postgres",
  "tokio-rustls 0.26.0",
  "x509-certificate",
 ]
@@ -7161,7 +7211,7 @@ version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "403fa3b783d4b626a8ad51d766ab03cb6d2dbfc46b1c5d4448395e6628dc9697"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.8.0",
  "bytes",
  "http 1.1.0",
  "http-body 1.0.0",
@@ -7595,7 +7645,7 @@ dependencies = [
  "serde_json",
  "sysinfo",
  "tokio",
- "tokio-postgres 0.7.9",
+ "tokio-postgres",
  "tokio-util",
  "tracing",
  "tracing-subscriber",
@@ -7658,9 +7708,9 @@ dependencies = [
 
 [[package]]
 name = "walkdir"
-version = "2.3.3"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
 dependencies = [
  "same-file",
  "winapi-util",
@@ -7912,6 +7962,15 @@ dependencies = [
  "windows-targets 0.52.6",
 ]
 
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows-targets"
 version = "0.48.0"
@@ -8140,6 +8199,7 @@ dependencies = [
  "tower 0.4.13",
  "tracing",
  "tracing-core",
+ "tracing-log",
  "url",
  "zerocopy",
  "zeroize",
diff --git a/Cargo.toml b/Cargo.toml
index 9ccdb45f6d82..76b54ae1d877 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,6 +54,7 @@ async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
 atomic-take = "1.1.0"
 backtrace = "0.3.74"
 flate2 = "1.0.26"
+assert-json-diff = "2"
 async-stream = "0.3"
 async-trait = "0.1"
 aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
@@ -77,10 +78,10 @@ camino = "1.1.6"
 cfg-if = "1.0.0"
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
 clap = { version = "4.0", features = ["derive", "env"] }
+clashmap = { version = "1.0", features = ["raw-api"] }
 comfy-table = "7.1"
 const_format = "0.2"
 crc32c = "0.6"
-dashmap = { version = "5.5.0", features = ["raw-api"] }
 diatomic-waker = { version = "0.2.3" }
 either = "1.8"
 enum-map = "2.4.2"
@@ -123,7 +124,7 @@ measured = { version = "0.0.22", features=["lasso"] }
 measured-process = { version = "0.0.22" }
 memoffset = "0.9"
 nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
-notify = "6.0.0"
+notify = "8.0.0"
 num_cpus = "1.15"
 num-traits = "0.2.15"
 once_cell = "1.13"
@@ -177,7 +178,7 @@ test-context = "0.3"
 thiserror = "1.0"
 tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
-tokio = { version = "1.17", features = ["macros"] }
+tokio = { version = "1.41", features = ["macros"] }
 tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
 tokio-io-timeout = "1.2.0"
 tokio-postgres-rustls = "0.12.0"
@@ -193,7 +194,9 @@ tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
 tower-service = "0.3.3"
 tracing = "0.1"
 tracing-error = "0.2"
+tracing-log = "0.2"
 tracing-opentelemetry = "0.28"
+tracing-serde = "0.2.0"
 tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
 try-lock = "0.2.5"
 twox-hash = { version = "1.6.3", default-features = false }
diff --git a/build-tools.Dockerfile b/build-tools.Dockerfile
index 9c13e480c125..3ade57b175b3 100644
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -3,8 +3,13 @@ ARG DEBIAN_VERSION=bookworm
 FROM debian:bookworm-slim AS pgcopydb_builder
 ARG DEBIAN_VERSION
 
+# Use strict mode for bash to catch errors early
+SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
+
+# By default, /bin/sh used in debian images will treat '\n' as eol,
+# but as we use bash as SHELL, and built-in echo in bash requires '-e' flag for that.
 RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
-    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc \
+    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
     echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
 
 RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
@@ -55,7 +60,8 @@ ARG DEBIAN_VERSION
 
 # Add nonroot user
 RUN useradd -ms /bin/bash nonroot -b /home
-SHELL ["/bin/bash", "-c"]
+# Use strict mode for bash to catch errors early
+SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
 
 RUN mkdir -p /pgcopydb/bin && \
     mkdir -p /pgcopydb/lib && \
@@ -66,7 +72,7 @@ COPY --from=pgcopydb_builder /usr/lib/postgresql/16/bin/pgcopydb /pgcopydb/bin/p
 COPY --from=pgcopydb_builder /pgcopydb/lib/libpq.so.5 /pgcopydb/lib/libpq.so.5
 
 RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
-    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc \
+    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
     echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
 
 # System deps
@@ -190,8 +196,14 @@ RUN set -e \
 # It includes several bug fixes on top on v2.0 release (https://github.com/linux-test-project/lcov/compare/v2.0...master)
 # And patches from us:
 # - Generates json file with code coverage summary (https://github.com/neondatabase/lcov/commit/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz)
-RUN for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JSON::XS Memory::Process Time::HiRes JSON; do yes | perl -MCPAN -e "CPAN::Shell->notest('install', '$package')"; done \
-    && wget https://github.com/neondatabase/lcov/archive/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz -O lcov.tar.gz \
+RUN set +o pipefail && \
+	 for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JSON::XS Memory::Process Time::HiRes JSON; do \
+		yes | perl -MCPAN -e "CPAN::Shell->notest('install', '$package')";\
+	 done && \
+	set -o pipefail
+# Split into separate step to debug flaky failures here
+RUN wget https://github.com/neondatabase/lcov/archive/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz -O lcov.tar.gz \
+    && ls -laht lcov.tar.gz && sha256sum lcov.tar.gz \
     && echo "61a22a62e20908b8b9e27d890bd0ea31f567a7b9668065589266371dcbca0992  lcov.tar.gz" | sha256sum --check \
     && mkdir -p lcov && tar -xzf lcov.tar.gz -C lcov --strip-components=1 \
     && cd lcov \
@@ -253,7 +265,7 @@ WORKDIR /home/nonroot
 
 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.84.0
+ENV RUSTC_VERSION=1.84.1
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 ARG RUSTFILT_VERSION=0.2.1
@@ -261,6 +273,7 @@ ARG CARGO_HAKARI_VERSION=0.9.33
 ARG CARGO_DENY_VERSION=0.16.2
 ARG CARGO_HACK_VERSION=0.6.33
 ARG CARGO_NEXTEST_VERSION=0.9.85
+ARG CARGO_DIESEL_CLI_VERSION=2.2.6
 RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
 	chmod +x rustup-init && \
 	./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
@@ -274,6 +287,8 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
     cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
     cargo install cargo-hack          --version ${CARGO_HACK_VERSION} && \
     cargo install cargo-nextest       --version ${CARGO_NEXTEST_VERSION} && \
+    cargo install diesel_cli          --version ${CARGO_DIESEL_CLI_VERSION} \
+                                      --features postgres-bundled --no-default-features && \
     rm -rf /home/nonroot/.cargo/registry && \
     rm -rf /home/nonroot/.cargo/git
 
diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile
index a428c61f3458..43910f2622b4 100644
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1,3 +1,81 @@
+#
+# This Dockerfile builds the compute image. It is built multiple times to produce
+# different images for each PostgreSQL major version.
+#
+# We use Debian as the base for all the steps. The production images use Debian bookworm
+# for v17, and Debian bullseye for older PostgreSQL versions.
+#
+# ## Intermediary layers
+#
+# build-tools:   This contains Rust compiler toolchain and other tools needed at compile
+#                time. This is also used for the storage builds. This image is defined in
+#                build-tools.Dockerfile.
+#
+# build-deps:    Contains C compiler, other build tools, and compile-time dependencies
+#                needed to compile PostgreSQL and most extensions. (Some extensions need
+#                extra tools and libraries that are not included in this image. They are
+#                installed in the extension-specific build stages.)
+#
+# pg-build:      Result of compiling PostgreSQL. The PostgreSQL binaries are copied from
+#                this to the final image. This is also used as the base for compiling all
+#                the extensions.
+#
+# compute-tools: This contains compute_ctl, the launcher program that starts Postgres
+#                in Neon. It also contains a few other tools that are built from the
+#                sources from this repository and used in compute VMs: 'fast_import' and
+#                'local_proxy'
+#
+# ## Extensions
+#
+# By convention, the build of each extension consists of two layers:
+#
+# {extension}-src:   Contains the source tarball, possible neon-specific patches, and
+#                    the extracted tarball with the patches applied. All of these are
+#                    under the /ext-src/ directory.
+#
+# {extension}-build: Contains the installed extension files, under /usr/local/pgsql
+#                    (in addition to the PostgreSQL binaries inherited from the pg-build
+#                    image). A few extensions need extra libraries or other files
+#                    installed elsewhere in the filesystem. They are installed by ONBUILD
+#                    directives.
+#
+# These are merged together into two layers:
+#
+# all-extensions:    All the extension -build layers merged together
+#
+# extension-tests:   All the extension -src layers merged together. This is used by the
+#                    extension tests. The tests are executed against the compiled image,
+#                    but the tests need test scripts, expected result files etc. from the
+#                    original sources, which are not included in the binary image.
+#
+# ## Extra components
+#
+# These are extra included in the compute image, but are not directly used by PostgreSQL
+# itself.
+#
+# pgbouncer:         pgbouncer and its configuration
+#
+# sql_exporter:      Metrics exporter daemon.
+#
+# postgres_exporter: Another metrics exporter daemon, for different sets of metrics.
+#
+# The configuration files for the metrics exporters are under etc/ directory. We use
+# a templating system to handle variations between different PostgreSQL versions,
+# building slightly different config files for each PostgreSQL version.
+#
+#
+# ## Final image
+#
+# The final image puts together the PostgreSQL binaries (pg-build), the compute tools
+# (compute-tools), all the extensions (all-extensions) and the extra components into
+# one image.
+#
+# VM image: The final image built by this dockerfile isn't actually the final image that
+# we use in computes VMs. There's an extra step that adds some files and makes other
+# small adjustments, and builds the QCOV2 filesystem image suitable for using in a VM.
+# That step is done by the 'vm-builder' tool. See the vm-compute-node-image job in the
+# build_and_test.yml github workflow for how that's done.
+
 ARG PG_VERSION
 ARG REPOSITORY=neondatabase
 ARG IMAGE=build-tools
@@ -7,6 +85,10 @@ ARG DEBIAN_VERSION=bookworm
 ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
 ARG ALPINE_CURL_VERSION=8.11.1
 
+# By default, build all PostgreSQL extensions. For quick local testing when you don't
+# care about the extensions, pass EXTENSIONS=none or EXTENSIONS=minimal
+ARG EXTENSIONS=all
+
 #########################################################################################
 #
 # Layer "build-deps"
@@ -18,8 +100,10 @@ ARG DEBIAN_VERSION
 # Use strict mode for bash to catch errors early
 SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
 
+# By default, /bin/sh used in debian images will treat '\n' as eol,
+# but as we use bash as SHELL, and built-in echo in bash requires '-e' flag for that.
 RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
-    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc \
+    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
     echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
 
 RUN case $DEBIAN_VERSION in \
@@ -122,17 +206,9 @@ ENV PATH="/usr/local/pgsql/bin:$PATH"
 # Build PostGIS from the upstream PostGIS mirror.
 #
 #########################################################################################
-FROM pg-build AS postgis-build
+FROM build-deps AS postgis-src
 ARG DEBIAN_VERSION
 ARG PG_VERSION
-RUN apt update && \
-    apt install --no-install-recommends --no-install-suggests -y \
-    gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
-    libboost-system-dev libboost-iostreams-dev libboost-program-options-dev libboost-timer-dev \
-    libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \
-    protobuf-c-compiler xsltproc \
-    && apt clean && rm -rf /var/lib/apt/lists/*
-
 
 # Postgis 3.5.0 requires SFCGAL 1.4+
 #
@@ -141,6 +217,7 @@ RUN apt update && \
 # and also we must check backward compatibility with older versions of PostGIS.
 #
 # Use new version only for v17
+WORKDIR /ext-src
 RUN case "${DEBIAN_VERSION}" in \
     "bookworm") \
         export SFCGAL_VERSION=1.4.1 \
@@ -154,15 +231,12 @@ RUN case "${DEBIAN_VERSION}" in \
         echo "unexpected PostgreSQL version" && exit 1 \
     ;; \
     esac && \
-    mkdir -p /sfcgal && \
     wget https://gitlab.com/sfcgal/SFCGAL/-/archive/v${SFCGAL_VERSION}/SFCGAL-v${SFCGAL_VERSION}.tar.gz -O SFCGAL.tar.gz && \
     echo "${SFCGAL_CHECKSUM} SFCGAL.tar.gz" | sha256sum --check && \
-    mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
-    cmake -DCMAKE_BUILD_TYPE=Release -GNinja . && ninja -j $(getconf _NPROCESSORS_ONLN) && \
-    DESTDIR=/sfcgal ninja install -j $(getconf _NPROCESSORS_ONLN) && \
-    ninja clean && cp -R /sfcgal/* /
+    mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C .
 
 # Postgis 3.5.0 supports v17
+WORKDIR /ext-src
 RUN case "${PG_VERSION}" in \
     "v17") \
         export POSTGIS_VERSION=3.5.0 \
@@ -178,8 +252,27 @@ RUN case "${PG_VERSION}" in \
     esac && \
     wget https://download.osgeo.org/postgis/source/postgis-${POSTGIS_VERSION}.tar.gz -O postgis.tar.gz && \
     echo "${POSTGIS_CHECKSUM} postgis.tar.gz" | sha256sum --check && \
-    mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \
-    ./autogen.sh && \
+    mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C .
+
+# This is reused for pgrouting
+FROM pg-build AS postgis-build-deps
+RUN apt update && \
+    apt install --no-install-recommends --no-install-suggests -y \
+    gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
+    libboost-system-dev libboost-iostreams-dev libboost-program-options-dev libboost-timer-dev \
+    libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \
+    protobuf-c-compiler xsltproc \
+    && apt clean && rm -rf /var/lib/apt/lists/*
+
+FROM postgis-build-deps AS postgis-build
+COPY --from=postgis-src /ext-src/ /ext-src/
+WORKDIR /ext-src/sfcgal-src
+RUN cmake -DCMAKE_BUILD_TYPE=Release -GNinja . && ninja -j $(getconf _NPROCESSORS_ONLN) && \
+    DESTDIR=/sfcgal ninja install -j $(getconf _NPROCESSORS_ONLN) && \
+    ninja clean && cp -R /sfcgal/* /
+
+WORKDIR /ext-src/postgis-src
+RUN ./autogen.sh && \
     ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
     make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -202,12 +295,23 @@ RUN case "${PG_VERSION}" in \
     cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \
     cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis
 
+#########################################################################################
+#
+# Layer "pgrouting-build"
+# Build pgrouting. Note: This depends on the postgis-build-deps layer built above
+#
+#########################################################################################
+
 # Uses versioned libraries, i.e. libpgrouting-3.4
 # and may introduce function signature changes between releases
 # i.e. release 3.5.0 has new signature for pg_dijkstra function
 #
 # Use new version only for v17
 # last release v3.6.2 - Mar 30, 2024
+FROM build-deps AS pgrouting-src
+ARG DEBIAN_VERSION
+ARG PG_VERSION
+WORKDIR /ext-src
 RUN case "${PG_VERSION}" in \
     "v17") \
         export PGROUTING_VERSION=3.6.2 \
@@ -223,8 +327,12 @@ RUN case "${PG_VERSION}" in \
     esac && \
     wget https://github.com/pgRouting/pgrouting/archive/v${PGROUTING_VERSION}.tar.gz -O pgrouting.tar.gz && \
     echo "${PGROUTING_CHECKSUM} pgrouting.tar.gz" | sha256sum --check && \
-    mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
-    mkdir build && cd build && \
+    mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C .
+
+FROM postgis-build-deps AS pgrouting-build
+COPY --from=pgrouting-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pgrouting-src
+RUN mkdir build && cd build && \
     cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. && \
     ninja -j $(getconf _NPROCESSORS_ONLN) && \
     ninja -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -236,15 +344,11 @@ RUN case "${PG_VERSION}" in \
 # Build plv8
 #
 #########################################################################################
-FROM pg-build AS plv8-build
+FROM build-deps AS plv8-src
 ARG PG_VERSION
+WORKDIR /ext-src
 
-COPY compute/patches/plv8-3.1.10.patch /plv8-3.1.10.patch
-
-RUN apt update && \
-    apt install --no-install-recommends --no-install-suggests -y \
-    ninja-build python3-dev libncurses5 binutils clang \
-    && apt clean && rm -rf /var/lib/apt/lists/*
+COPY compute/patches/plv8-3.1.10.patch .
 
 # plv8 3.2.3 supports v17
 # last release v3.2.3 - Sep 7, 2024
@@ -268,9 +372,20 @@ RUN case "${PG_VERSION}" in \
     git clone --recurse-submodules --depth 1 --branch ${PLV8_TAG} https://github.com/plv8/plv8.git plv8-src && \
     tar -czf plv8.tar.gz --exclude .git plv8-src && \
     cd plv8-src && \
-    if [[ "${PG_VERSION}" < "v17" ]]; then patch -p1 < /plv8-3.1.10.patch; fi && \
+    if [[ "${PG_VERSION}" < "v17" ]]; then patch -p1 < /ext-src/plv8-3.1.10.patch; fi
+
+FROM pg-build AS plv8-build
+ARG PG_VERSION
+RUN apt update && \
+    apt install --no-install-recommends --no-install-suggests -y \
+    ninja-build python3-dev libncurses5 binutils clang \
+    && apt clean && rm -rf /var/lib/apt/lists/*
+
+COPY --from=plv8-src /ext-src/ /ext-src/
+WORKDIR /ext-src/plv8-src
+RUN \
     # generate and copy upgrade scripts
-    mkdir -p upgrade && ./generate_upgrade.sh ${PLV8_TAG#v} && \
+    make generate_upgrades && \
     cp upgrade/* /usr/local/pgsql/share/extension/ && \
     make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
     rm -rf /plv8-* && \
@@ -298,16 +413,28 @@ RUN case "${PG_VERSION}" in \
 # Build h3_pg
 #
 #########################################################################################
-FROM pg-build AS h3-pg-build
+FROM build-deps AS h3-pg-src
 ARG PG_VERSION
+WORKDIR /ext-src
 
 # not version-specific
 # last release v4.1.0 - Jan 18, 2023
 RUN mkdir -p /h3/usr/ && \
     wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
     echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
-    mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
-    mkdir build && cd build && \
+    mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C .
+
+# not version-specific
+# last release v4.1.3 - Jul 26, 2023
+WORKDIR /ext-src
+RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
+    echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
+    mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS h3-pg-build
+COPY --from=h3-pg-src /ext-src/ /ext-src/
+WORKDIR /ext-src/h3-src
+RUN mkdir build && cd build && \
     cmake .. -GNinja -DBUILD_BENCHMARKS=0 -DCMAKE_BUILD_TYPE=Release \
         -DBUILD_FUZZERS=0 -DBUILD_FILTERS=0 -DBUILD_GENERATORS=0 -DBUILD_TESTING=0 \
     && ninja -j $(getconf _NPROCESSORS_ONLN) && \
@@ -315,11 +442,8 @@ RUN mkdir -p /h3/usr/ && \
     cp -R /h3/usr / && \
     rm -rf build
 
-# not version-specific
-# last release v4.1.3 - Jul 26, 2023
-RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
-    echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
-    mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \
+WORKDIR /ext-src/h3-pg-src
+RUN ls -l && \
     make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
@@ -327,19 +451,24 @@ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3
 
 #########################################################################################
 #
-# Layer "unit-pg-build"
+# Layer "postgresql-unit-build"
 # compile unit extension
 #
 #########################################################################################
-FROM pg-build AS unit-pg-build
+FROM build-deps AS postgresql-unit-src
 ARG PG_VERSION
 
 # not version-specific
 # last release 7.9 - Sep 15, 2024
+WORKDIR /ext-src
 RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.9.tar.gz -O postgresql-unit.tar.gz && \
     echo "e46de6245dcc8b2c2ecf29873dbd43b2b346773f31dd5ce4b8315895a052b456 postgresql-unit.tar.gz" | sha256sum --check && \
-    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS postgresql-unit-build
+COPY --from=postgresql-unit-src /ext-src/ /ext-src/
+WORKDIR /ext-src/postgresql-unit-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     # unit extension's "create extension" script relies on absolute install path to fill some reference tables.
     # We move the extension from '/usr/local/pgsql/' to '/usr/local/'  after it is build. So we need to adjust the path.
@@ -350,14 +479,15 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.9.tar.gz -
 
 #########################################################################################
 #
-# Layer "vector-pg-build"
+# Layer "pgvector-build"
 # compile pgvector extension
 #
 #########################################################################################
-FROM pg-build AS vector-pg-build
+FROM build-deps AS pgvector-src
 ARG PG_VERSION
 
-COPY compute/patches/pgvector.patch /pgvector.patch
+WORKDIR /ext-src
+COPY compute/patches/pgvector.patch .
 
 # By default, pgvector Makefile uses `-march=native`. We don't want that,
 # because we build the images on different machines than where we run them.
@@ -370,74 +500,94 @@ RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.8.0.tar.gz -O
     mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
     wget https://github.com/pgvector/pgvector/raw/refs/tags/v0.7.4/sql/vector.sql -O ./sql/vector--0.7.4.sql && \
     echo "10218d05dc02299562252a9484775178b14a1d8edb92a2d1672ef488530f7778 ./sql/vector--0.7.4.sql" | sha256sum --check && \
-    patch -p1 < /pgvector.patch && \
-    make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" && \
+    patch -p1 < /ext-src/pgvector.patch
+
+FROM pg-build AS pgvector-build
+COPY --from=pgvector-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pgvector-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" && \
     make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control
 
 #########################################################################################
 #
-# Layer "pgjwt-pg-build"
+# Layer "pgjwt-build"
 # compile pgjwt extension
 #
 #########################################################################################
-FROM pg-build AS pgjwt-pg-build
+FROM build-deps AS pgjwt-src
 ARG PG_VERSION
 
 # not version-specific
 # doesn't use releases, last commit f3d82fd - Mar 2, 2023
+WORKDIR /ext-src
 RUN wget https://github.com/michelp/pgjwt/archive/f3d82fd30151e754e19ce5d6a06c71c20689ce3d.tar.gz -O pgjwt.tar.gz && \
     echo "dae8ed99eebb7593b43013f6532d772b12dfecd55548d2673f2dfd0163f6d2b9 pgjwt.tar.gz" | sha256sum --check && \
-    mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pgjwt-build
+COPY --from=pgjwt-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pgjwt-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
 
 #########################################################################################
 #
-# Layer "hypopg-pg-build"
+# Layer "hypopg-build"
 # compile hypopg extension
 #
 #########################################################################################
-FROM pg-build AS hypopg-pg-build
+FROM build-deps AS hypopg-src
 ARG PG_VERSION
 
 # HypoPG 1.4.1 supports v17
 # last release 1.4.1 - Apr 28, 2024
+WORKDIR /ext-src
 RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.1.tar.gz -O hypopg.tar.gz && \
     echo "9afe6357fd389d8d33fad81703038ce520b09275ec00153c6c89282bcdedd6bc hypopg.tar.gz" | sha256sum --check && \
-    mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS hypopg-build
+COPY --from=hypopg-src /ext-src/ /ext-src/
+WORKDIR /ext-src/hypopg-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
 
 #########################################################################################
 #
-# Layer "pg-hashids-pg-build"
+# Layer "pg_hashids-build"
 # compile pg_hashids extension
 #
 #########################################################################################
-FROM pg-build AS pg-hashids-pg-build
+FROM build-deps AS pg_hashids-src
 ARG PG_VERSION
 
 # not version-specific
 # last release v1.2.1 -Jan 12, 2018
+WORKDIR /ext-src
 RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
     echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
-    mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) USE_PGXS=1 && \
+    mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pg_hashids-build
+COPY --from=pg_hashids-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_hashids-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) USE_PGXS=1 && \
     make -j $(getconf _NPROCESSORS_ONLN) install USE_PGXS=1 && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
 
 #########################################################################################
 #
-# Layer "rum-pg-build"
+# Layer "rum-build"
 # compile rum extension
 #
 #########################################################################################
-FROM pg-build AS rum-pg-build
+FROM build-deps AS rum-src
 ARG PG_VERSION
 
-COPY compute/patches/rum.patch /rum.patch
+WORKDIR /ext-src
+COPY compute/patches/rum.patch .
 
 # supports v17 since https://github.com/postgrespro/rum/commit/cb1edffc57736cd2a4455f8d0feab0d69928da25
 # doesn't use releases since 1.3.13 - Sep 19, 2022
@@ -445,110 +595,140 @@ COPY compute/patches/rum.patch /rum.patch
 RUN wget https://github.com/postgrespro/rum/archive/cb1edffc57736cd2a4455f8d0feab0d69928da25.tar.gz -O rum.tar.gz && \
     echo "65e0a752e99f4c3226400c9b899f997049e93503db8bf5c8072efa136d32fd83 rum.tar.gz" | sha256sum --check && \
     mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
-    patch -p1 < /rum.patch && \
-    make -j $(getconf _NPROCESSORS_ONLN) USE_PGXS=1 && \
+    patch -p1 < /ext-src/rum.patch
+
+FROM pg-build AS rum-build
+COPY --from=rum-src /ext-src/ /ext-src/
+WORKDIR /ext-src/rum-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) USE_PGXS=1 && \
     make -j $(getconf _NPROCESSORS_ONLN) install USE_PGXS=1 && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
 
 #########################################################################################
 #
-# Layer "pgtap-pg-build"
+# Layer "pgtap-build"
 # compile pgTAP extension
 #
 #########################################################################################
-FROM pg-build AS pgtap-pg-build
+FROM build-deps AS pgtap-src
 ARG PG_VERSION
 
 # pgtap 1.3.3 supports v17
 # last release v1.3.3 - Apr 8, 2024
+WORKDIR /ext-src
 RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.3.3.tar.gz -O pgtap.tar.gz && \
     echo "325ea79d0d2515bce96bce43f6823dcd3effbd6c54cb2a4d6c2384fffa3a14c7 pgtap.tar.gz" | sha256sum --check && \
-    mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pgtap-build
+COPY --from=pgtap-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pgtap-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control
 
 #########################################################################################
 #
-# Layer "ip4r-pg-build"
+# Layer "ip4r-build"
 # compile ip4r extension
 #
 #########################################################################################
-FROM pg-build AS ip4r-pg-build
+FROM build-deps AS ip4r-src
 ARG PG_VERSION
 
 # not version-specific
 # last release v2.4.2 - Jul 29, 2023
+WORKDIR /ext-src
 RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
     echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
-    mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS ip4r-build
+COPY --from=ip4r-src /ext-src/ /ext-src/
+WORKDIR /ext-src/ip4r-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control
 
 #########################################################################################
 #
-# Layer "prefix-pg-build"
+# Layer "prefix-build"
 # compile Prefix extension
 #
 #########################################################################################
-FROM pg-build AS prefix-pg-build
+FROM build-deps AS prefix-src
 ARG PG_VERSION
 
 # not version-specific
 # last release v1.2.10  - Jul 5, 2023
+WORKDIR /ext-src
 RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
     echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
-    mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS prefix-build
+COPY --from=prefix-src /ext-src/ /ext-src/
+WORKDIR /ext-src/prefix-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control
 
 #########################################################################################
 #
-# Layer "hll-pg-build"
+# Layer "hll-build"
 # compile hll extension
 #
 #########################################################################################
-FROM pg-build AS hll-pg-build
+FROM build-deps AS hll-src
 ARG PG_VERSION
 
 # not version-specific
 # last release v2.18 - Aug 29, 2023
+WORKDIR /ext-src
 RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
     echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
-    mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS hll-build
+COPY --from=hll-src /ext-src/ /ext-src/
+WORKDIR /ext-src/hll-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control
 
 #########################################################################################
 #
-# Layer "plpgsql-check-pg-build"
+# Layer "plpgsql_check-build"
 # compile plpgsql_check extension
 #
 #########################################################################################
-FROM pg-build AS plpgsql-check-pg-build
+FROM build-deps AS plpgsql_check-src
 ARG PG_VERSION
 
 # plpgsql_check v2.7.11 supports v17
 # last release v2.7.11 - Sep 16, 2024
+WORKDIR /ext-src
 RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.7.11.tar.gz -O plpgsql_check.tar.gz && \
     echo "208933f8dbe8e0d2628eb3851e9f52e6892b8e280c63700c0f1ce7883625d172 plpgsql_check.tar.gz" | sha256sum --check && \
-    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) USE_PGXS=1 && \
+    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS plpgsql_check-build
+COPY --from=plpgsql_check-src /ext-src/ /ext-src/
+WORKDIR /ext-src/plpgsql_check-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) USE_PGXS=1 && \
     make -j $(getconf _NPROCESSORS_ONLN) install USE_PGXS=1 && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control
 
 #########################################################################################
 #
-# Layer "timescaledb-pg-build"
+# Layer "timescaledb-build"
 # compile timescaledb extension
 #
 #########################################################################################
-FROM pg-build AS timescaledb-pg-build
+FROM build-deps AS timescaledb-src
 ARG PG_VERSION
 
+WORKDIR /ext-src
 RUN case "${PG_VERSION}" in \
       "v14" | "v15") \
         export TIMESCALEDB_VERSION=2.10.1 \
@@ -565,8 +745,12 @@ RUN case "${PG_VERSION}" in \
     esac && \
     wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \
     echo "${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz" | sha256sum --check && \
-    mkdir timescaledb-src && cd timescaledb-src && tar xzf ../timescaledb.tar.gz --strip-components=1 -C . && \
-    ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
+    mkdir timescaledb-src && cd timescaledb-src && tar xzf ../timescaledb.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS timescaledb-build
+COPY --from=timescaledb-src /ext-src/ /ext-src/
+WORKDIR /ext-src/timescaledb-src
+RUN ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
     cd build && \
     make -j $(getconf _NPROCESSORS_ONLN) && \
     make install -j $(getconf _NPROCESSORS_ONLN) && \
@@ -574,14 +758,15 @@ RUN case "${PG_VERSION}" in \
 
 #########################################################################################
 #
-# Layer "pg-hint-plan-pg-build"
+# Layer "pg_hint_plan-build"
 # compile pg_hint_plan extension
 #
 #########################################################################################
-FROM pg-build AS pg-hint-plan-pg-build
+FROM build-deps AS pg_hint_plan-src
 ARG PG_VERSION
 
 # version-specific, has separate releases for each version
+WORKDIR /ext-src
 RUN case "${PG_VERSION}" in \
       "v14") \
         export PG_HINT_PLAN_VERSION=14_1_4_1 \
@@ -605,50 +790,51 @@ RUN case "${PG_VERSION}" in \
     esac && \
     wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \
     echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \
-    mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xzf ../pg_hint_plan.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pg_hint_plan-build
+COPY --from=pg_hint_plan-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_hint_plan-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make install -j $(getconf _NPROCESSORS_ONLN) && \
     echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control
 
 
 #########################################################################################
 #
-# Layer "pg-cron-pg-build"
+# Layer "pg_cron-build"
 # compile pg_cron extension
 #
 #########################################################################################
-FROM pg-build AS pg-cron-pg-build
+FROM build-deps AS pg_cron-src
 ARG PG_VERSION
 
 # This is an experimental extension that we do not support on prod yet.
 # !Do not remove!
 # We set it in shared_preload_libraries and computes will fail to start if library is not found.
+WORKDIR /ext-src
+COPY compute/patches/pg_cron.patch .
 RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.4.tar.gz -O pg_cron.tar.gz && \
     echo "52d1850ee7beb85a4cb7185731ef4e5a90d1de216709d8988324b0d02e76af61 pg_cron.tar.gz" | sha256sum --check && \
     mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    patch < /ext-src/pg_cron.patch
+
+FROM pg-build AS pg_cron-build
+COPY --from=pg_cron-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_cron-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control
 
 #########################################################################################
 #
-# Layer "rdkit-pg-build"
+# Layer "rdkit-build"
 # compile rdkit extension
 #
 #########################################################################################
-FROM pg-build AS rdkit-pg-build
+FROM build-deps AS rdkit-src
 ARG PG_VERSION
 
-RUN apt update && \
-    apt install --no-install-recommends --no-install-suggests -y \
-        libboost-iostreams1.74-dev \
-        libboost-regex1.74-dev \
-        libboost-serialization1.74-dev \
-        libboost-system1.74-dev \
-        libeigen3-dev \
-        libboost-all-dev \
-    && apt clean && rm -rf /var/lib/apt/lists/*
-
 # rdkit Release_2024_09_1 supports v17
 # last release Release_2024_09_1 - Sep 27, 2024
 #
@@ -656,12 +842,7 @@ RUN apt update && \
 # because Release_2024_09_1 has some backward incompatible changes
 # https://github.com/rdkit/rdkit/releases/tag/Release_2024_09_1
 
-# XXX: /usr/local/pgsql/bin is already in PATH, and that should be enough to find
-# pg_config. For some reason the rdkit cmake script doesn't work with just that,
-# however. By also adding /usr/local/pgsql, it works, which is weird because there
-# are no executables in that directory.
-ENV PATH="/usr/local/pgsql:$PATH"
-
+WORKDIR /ext-src
 RUN case "${PG_VERSION}" in \
     "v17") \
         export RDKIT_VERSION=Release_2024_09_1 \
@@ -677,8 +858,28 @@ RUN case "${PG_VERSION}" in \
     esac && \
     wget https://github.com/rdkit/rdkit/archive/refs/tags/${RDKIT_VERSION}.tar.gz -O rdkit.tar.gz && \
     echo "${RDKIT_CHECKSUM} rdkit.tar.gz" | sha256sum --check && \
-    mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
-    cmake \
+    mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS rdkit-build
+RUN apt update && \
+    apt install --no-install-recommends --no-install-suggests -y \
+        libboost-iostreams1.74-dev \
+        libboost-regex1.74-dev \
+        libboost-serialization1.74-dev \
+        libboost-system1.74-dev \
+        libeigen3-dev \
+        libboost-all-dev \
+    && apt clean && rm -rf /var/lib/apt/lists/*
+
+COPY --from=rdkit-src /ext-src/ /ext-src/
+WORKDIR /ext-src/rdkit-src
+
+# XXX: /usr/local/pgsql/bin is already in PATH, and that should be enough to find
+# pg_config. For some reason the rdkit cmake script doesn't work with just that,
+# however. By also adding /usr/local/pgsql, it works, which is weird because there
+# are no executables in that directory.
+ENV PATH="/usr/local/pgsql:$PATH"
+RUN cmake \
         -D RDK_BUILD_CAIRO_SUPPORT=OFF \
         -D RDK_BUILD_INCHI_SUPPORT=ON \
         -D RDK_BUILD_AVALON_SUPPORT=ON \
@@ -710,47 +911,57 @@ RUN case "${PG_VERSION}" in \
 
 #########################################################################################
 #
-# Layer "pg-uuidv7-pg-build"
+# Layer "pg_uuidv7-build"
 # compile pg_uuidv7 extension
 #
 #########################################################################################
-FROM pg-build AS pg-uuidv7-pg-build
+FROM build-deps AS pg_uuidv7-src
 ARG PG_VERSION
 
 # not version-specific
 # last release v1.6.0 - Oct 9, 2024
+WORKDIR /ext-src
 RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.6.0.tar.gz -O pg_uuidv7.tar.gz && \
     echo "0fa6c710929d003f6ce276a7de7a864e9d1667b2d78be3dc2c07f2409eb55867 pg_uuidv7.tar.gz" | sha256sum --check && \
-    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pg_uuidv7-build
+COPY --from=pg_uuidv7-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_uuidv7-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control
 
 #########################################################################################
 #
-# Layer "pg-roaringbitmap-pg-build"
+# Layer "pg_roaringbitmap-build"
 # compile pg_roaringbitmap extension
 #
 #########################################################################################
-FROM pg-build AS pg-roaringbitmap-pg-build
+FROM build-deps AS pg_roaringbitmap-src
 ARG PG_VERSION
 
 # not version-specific
 # last release v0.5.4 - Jun 28, 2022
+WORKDIR /ext-src
 RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
     echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
-    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pg_roaringbitmap-build
+COPY --from=pg_roaringbitmap-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_roaringbitmap-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control
 
 #########################################################################################
 #
-# Layer "pg-semver-pg-build"
+# Layer "pg_semver-build"
 # compile pg_semver extension
 #
 #########################################################################################
-FROM pg-build AS pg-semver-pg-build
+FROM build-deps AS pg_semver-src
 ARG PG_VERSION
 
 # Release 0.40.0 breaks backward compatibility with previous versions
@@ -758,6 +969,7 @@ ARG PG_VERSION
 # Use new version only for v17
 #
 # last release v0.40.0 - Jul 22, 2024
+WORKDIR /ext-src
 RUN case "${PG_VERSION}" in \
     "v17") \
         export SEMVER_VERSION=0.40.0 \
@@ -773,22 +985,27 @@ RUN case "${PG_VERSION}" in \
     esac && \
     wget https://github.com/theory/pg-semver/archive/refs/tags/v${SEMVER_VERSION}.tar.gz -O pg_semver.tar.gz && \
     echo "${SEMVER_CHECKSUM} pg_semver.tar.gz" | sha256sum --check && \
-    mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pg_semver-build
+COPY --from=pg_semver-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_semver-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/semver.control
 
 #########################################################################################
 #
-# Layer "pg-embedding-pg-build"
+# Layer "pg_embedding-build"
 # compile pg_embedding extension
 #
 #########################################################################################
-FROM pg-build AS pg-embedding-pg-build
+FROM build-deps AS pg_embedding-src
+ARG PG_VERSION
 
 # This is our extension, support stopped in favor of pgvector
 # TODO: deprecate it
-ARG PG_VERSION
+WORKDIR /ext-src
 RUN case "${PG_VERSION}" in \
       "v14" | "v15") \
         export PG_EMBEDDING_VERSION=0.3.5 \
@@ -799,37 +1016,52 @@ RUN case "${PG_VERSION}" in \
     esac && \
     wget https://github.com/neondatabase/pg_embedding/archive/refs/tags/${PG_EMBEDDING_VERSION}.tar.gz -O pg_embedding.tar.gz && \
     echo "${PG_EMBEDDING_CHECKSUM} pg_embedding.tar.gz" | sha256sum --check && \
-    mkdir pg_embedding-src && cd pg_embedding-src && tar xzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install
+    mkdir pg_embedding-src && cd pg_embedding-src && tar xzf ../pg_embedding.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pg_embedding-build
+COPY --from=pg_embedding-src /ext-src/ /ext-src/
+WORKDIR /ext-src/
+RUN  if [ -d pg_embedding-src ]; then \
+        cd pg_embedding-src && \
+        make -j $(getconf _NPROCESSORS_ONLN) && \
+        make -j $(getconf _NPROCESSORS_ONLN) install; \
+    fi
 
 #########################################################################################
 #
-# Layer "pg-anon-pg-build"
+# Layer "pg_anon-build"
 # compile anon extension
 #
 #########################################################################################
-FROM pg-build AS pg-anon-pg-build
+FROM build-deps AS pg_anon-src
 ARG PG_VERSION
 
 # This is an experimental extension, never got to real production.
 # !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
+WORKDIR /ext-src
 RUN case "${PG_VERSION}" in "v17") \
     echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
     esac && \
     wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
     echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9  pg_anon.tar.gz" | sha256sum --check && \
-    mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control
+    mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pg_anon-build
+COPY --from=pg_anon-src /ext-src/ /ext-src/
+WORKDIR /ext-src
+RUN if [ -d pg_anon-src ]; then \
+        cd pg_anon-src && \
+        make -j $(getconf _NPROCESSORS_ONLN) install && \
+        echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control; \
+    fi
 
 #########################################################################################
 #
-# Layer "rust extensions"
-# This layer is used to build `pgrx` deps
+# Layer "pg build with nonroot user and cargo installed"
+# This layer is base and common for layers with `pgrx`
 #
 #########################################################################################
-FROM pg-build AS rust-extensions-build
+FROM pg-build AS pg-build-nonroot-with-cargo
 ARG PG_VERSION
 
 RUN apt update && \
@@ -842,13 +1074,24 @@ ENV PATH="/home/nonroot/.cargo/bin:$PATH"
 USER nonroot
 WORKDIR /home/nonroot
 
+# See comment on the top of the file regading `echo` and `\n`
 RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /home/nonroot/.curlrc
 
 RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
     chmod +x rustup-init && \
     ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
-    rm rustup-init && \
-    case "${PG_VERSION}" in \
+    rm rustup-init
+
+#########################################################################################
+#
+# Layer "rust extensions"
+# This layer is used to build `pgrx` deps
+#
+#########################################################################################
+FROM pg-build-nonroot-with-cargo AS rust-extensions-build
+ARG PG_VERSION
+
+RUN case "${PG_VERSION}" in \
         'v17') \
             echo 'v17 is not supported yet by pgrx. Quit' && exit 0;; \
     esac && \
@@ -867,76 +1110,67 @@ USER root
 # and eventually get merged with `rust-extensions-build`
 #
 #########################################################################################
-FROM pg-build AS rust-extensions-build-pgrx12
+FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx12
 ARG PG_VERSION
 
-RUN apt update && \
-    apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \
-    apt clean && rm -rf /var/lib/apt/lists/* && \
-    useradd -ms /bin/bash nonroot -b /home
-
-ENV HOME=/home/nonroot
-ENV PATH="/home/nonroot/.cargo/bin:$PATH"
-USER nonroot
-WORKDIR /home/nonroot
-
-RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /home/nonroot/.curlrc
-
-RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
-    chmod +x rustup-init && \
-    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
-    rm rustup-init && \
-    cargo install --locked --version 0.12.9 cargo-pgrx && \
+RUN cargo install --locked --version 0.12.9 cargo-pgrx && \
     /bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
 
 USER root
 
 #########################################################################################
 #
-# Layers "pg-onnx-build" and "pgrag-pg-build"
+# Layers "pg-onnx-build" and "pgrag-build"
 # Compile "pgrag" extensions
 #
 #########################################################################################
 
-FROM rust-extensions-build-pgrx12 AS pg-onnx-build
+FROM build-deps AS pgrag-src
+ARG PG_VERSION
+
+WORKDIR /ext-src
+RUN wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.gz -O onnxruntime.tar.gz && \
+    mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
+    echo "#nothing to test here" > neon-test.sh
+
+RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.0.0.tar.gz -O pgrag.tar.gz &&  \
+    echo "2cbe394c1e74fc8bcad9b52d5fbbfb783aef834ca3ce44626cfd770573700bb4 pgrag.tar.gz" | sha256sum --check && \
+    mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C .
+
+FROM rust-extensions-build-pgrx12 AS pgrag-build
+COPY --from=pgrag-src /ext-src/ /ext-src/
 
+# Install build-time dependencies
 # cmake 3.26 or higher is required, so installing it using pip (bullseye-backports has cmake 3.25).
 # Install it using virtual environment, because Python 3.11 (the default version on Debian 12 (Bookworm)) complains otherwise
+WORKDIR /ext-src/onnxruntime-src
 RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
-    python3 python3-pip python3-venv && \
+    python3 python3-pip python3-venv protobuf-compiler && \
     apt clean && rm -rf /var/lib/apt/lists/* && \
     python3 -m venv venv && \
     . venv/bin/activate && \
-    python3 -m pip install cmake==3.30.5 && \
-    wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.gz -O onnxruntime.tar.gz && \
-    mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
+    python3 -m pip install cmake==3.30.5
+
+RUN . venv/bin/activate && \
     ./build.sh --config Release --parallel --cmake_generator Ninja \
     --skip_submodule_sync --skip_tests --allow_running_as_root
 
-
-FROM pg-onnx-build AS pgrag-pg-build
-
-RUN apt update && apt install --no-install-recommends --no-install-suggests -y protobuf-compiler \
-    && apt clean && rm -rf /var/lib/apt/lists/* && \
-    wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.0.0.tar.gz -O pgrag.tar.gz &&  \
-    echo "2cbe394c1e74fc8bcad9b52d5fbbfb783aef834ca3ce44626cfd770573700bb4 pgrag.tar.gz" | sha256sum --check && \
-    mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C . && \
-    \
-    cd exts/rag && \
+WORKDIR /ext-src/pgrag-src
+RUN cd exts/rag && \
     sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
     cargo pgrx install --release && \
-    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag.control && \
-    \
-    cd ../rag_bge_small_en_v15 && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag.control
+
+RUN cd exts/rag_bge_small_en_v15 && \
     sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
-    ORT_LIB_LOCATION=/home/nonroot/onnxruntime-src/build/Linux \
+    ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
         REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/bge_small_en_v15.onnx \
         cargo pgrx install --release --features remote_onnx && \
-    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control && \
-    \
-    cd ../rag_jina_reranker_v1_tiny_en && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control
+
+RUN cd exts/rag_jina_reranker_v1_tiny_en && \
     sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
-    ORT_LIB_LOCATION=/home/nonroot/onnxruntime-src/build/Linux \
+    ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
         REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/jina_reranker_v1_tiny_en.onnx \
         cargo pgrx install --release --features remote_onnx && \
     echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_jina_reranker_v1_tiny_en.control
@@ -944,17 +1178,23 @@ RUN apt update && apt install --no-install-recommends --no-install-suggests -y p
 
 #########################################################################################
 #
-# Layer "pg-jsonschema-pg-build"
+# Layer "pg_jsonschema-build"
 # Compile "pg_jsonschema" extension
 #
 #########################################################################################
 
-FROM rust-extensions-build-pgrx12 AS pg-jsonschema-pg-build
+FROM build-deps AS pg_jsonschema-src
 ARG PG_VERSION
 # last release v0.3.3 - Oct 16, 2024
+WORKDIR /ext-src
 RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.3.tar.gz -O pg_jsonschema.tar.gz && \
     echo "40c2cffab4187e0233cb8c3bde013be92218c282f95f4469c5282f6b30d64eac pg_jsonschema.tar.gz" | sha256sum --check && \
-    mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
+    mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C .
+
+FROM rust-extensions-build-pgrx12 AS pg_jsonschema-build
+COPY --from=pg_jsonschema-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_jsonschema-src
+RUN \
     # see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8
     # `unsafe-postgres` feature allows to build pgx extensions
     # against postgres forks that decided to change their ABI name (like us).
@@ -967,55 +1207,69 @@ RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.3.tar.
 
 #########################################################################################
 #
-# Layer "pg-graphql-pg-build"
+# Layer "pg_graphql-build"
 # Compile "pg_graphql" extension
 #
 #########################################################################################
 
-FROM rust-extensions-build-pgrx12 AS pg-graphql-pg-build
+FROM build-deps AS pg_graphql-src
 ARG PG_VERSION
 
 # last release v1.5.9 - Oct 16, 2024
+WORKDIR /ext-src
+COPY compute/patches/pg_graphql.patch .
 RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.9.tar.gz -O pg_graphql.tar.gz && \
     echo "cf768385a41278be1333472204fc0328118644ae443182cf52f7b9b23277e497 pg_graphql.tar.gz" | sha256sum --check && \
     mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
-    sed -i 's/pgrx = "=0.12.6"/pgrx = { version = "=0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    sed -i 's/pgrx = "=0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
     sed -i 's/pgrx-tests = "=0.12.6"/pgrx-tests = "=0.12.9"/g' Cargo.toml && \
-    cargo pgrx install --release && \
+    patch -p1 < /ext-src/pg_graphql.patch
+
+
+FROM rust-extensions-build-pgrx12 AS pg_graphql-build
+COPY --from=pg_graphql-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_graphql-src
+RUN cargo pgrx install --release && \
     # it's needed to enable extension because it uses untrusted C language
     sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
     echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_graphql.control
 
 #########################################################################################
 #
-# Layer "pg-tiktoken-build"
+# Layer "pg_tiktoken-build"
 # Compile "pg_tiktoken" extension
 #
 #########################################################################################
 
-FROM rust-extensions-build-pgrx12 AS pg-tiktoken-pg-build
+FROM build-deps AS pg_tiktoken-src
 ARG PG_VERSION
 
 # doesn't use releases
 # 9118dd4549b7d8c0bbc98e04322499f7bf2fa6f7 - on Oct 29, 2024
+WORKDIR /ext-src
 RUN wget https://github.com/kelvich/pg_tiktoken/archive/9118dd4549b7d8c0bbc98e04322499f7bf2fa6f7.tar.gz -O pg_tiktoken.tar.gz && \
     echo "a5bc447e7920ee149d3c064b8b9f0086c0e83939499753178f7d35788416f628 pg_tiktoken.tar.gz" | sha256sum --check && \
     mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
     sed -i 's/pgrx = { version = "=0.12.6",/pgrx = { version = "0.12.9",/g' Cargo.toml && \
-    sed -i 's/pgrx-tests = "=0.12.6"/pgrx-tests = "0.12.9"/g' Cargo.toml && \
-    cargo pgrx install --release && \
+    sed -i 's/pgrx-tests = "=0.12.6"/pgrx-tests = "0.12.9"/g' Cargo.toml
+
+FROM rust-extensions-build-pgrx12 AS pg_tiktoken-build
+COPY --from=pg_tiktoken-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_tiktoken-src
+RUN cargo pgrx install --release && \
     echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
 
 #########################################################################################
 #
-# Layer "pg-pgx-ulid-build"
+# Layer "pgx_ulid-build"
 # Compile "pgx_ulid" extension for v16 and below
 #
 #########################################################################################
 
-FROM rust-extensions-build AS pg-pgx-ulid-build
+FROM build-deps AS pgx_ulid-src
 ARG PG_VERSION
 
+WORKDIR /ext-src
 RUN case "${PG_VERSION}" in \
     "v14" | "v15" | "v16") \
         ;; \
@@ -1026,20 +1280,28 @@ RUN case "${PG_VERSION}" in \
     wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
     echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17  pgx_ulid.tar.gz" | sha256sum --check && \
     mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
-    sed -i 's/pgrx       = "^0.11.2"/pgrx       = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
-    cargo pgrx install --release && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/ulid.control
+    sed -i 's/pgrx       = "^0.11.2"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml
+
+FROM rust-extensions-build AS pgx_ulid-build
+COPY --from=pgx_ulid-src /ext-src/ /ext-src/
+WORKDIR /ext-src/
+RUN if [ -d pgx_ulid-src ]; then \
+        cd pgx_ulid-src && \
+        cargo pgrx install --release && \
+        echo 'trusted = true' >> /usr/local/pgsql/share/extension/ulid.control; \
+    fi
 
 #########################################################################################
 #
-# Layer "pg-pgx-ulid-pgrx12-build"
+# Layer "pgx_ulid-pgrx12-build"
 # Compile "pgx_ulid" extension for v17 and up
 #
 #########################################################################################
 
-FROM rust-extensions-build-pgrx12 AS pg-pgx-ulid-pgrx12-build
+FROM build-deps AS pgx_ulid-pgrx12-src
 ARG PG_VERSION
 
+WORKDIR /ext-src
 RUN case "${PG_VERSION}" in \
     "v17") \
         ;; \
@@ -1050,23 +1312,32 @@ RUN case "${PG_VERSION}" in \
     wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.2.0.tar.gz -O pgx_ulid.tar.gz && \
     echo "cef6a9a2e5e7bd1a10a18989286586ee9e6c1c06005a4055cff190de41bf3e9f pgx_ulid.tar.gz" | sha256sum --check && \
     mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
-    sed -i 's/pgrx       = "^0.12.7"/pgrx       = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
-    cargo pgrx install --release && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgx_ulid.control
+    sed -i 's/pgrx       = "^0.12.7"/pgrx       = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml
+
+FROM rust-extensions-build-pgrx12 AS pgx_ulid-pgrx12-build
+ARG PG_VERSION
+WORKDIR /ext-src
+COPY --from=pgx_ulid-pgrx12-src /ext-src/ /ext-src/
+RUN if [ -d pgx_ulid-src ]; then \
+        cd pgx_ulid-src && \
+        cargo pgrx install --release && \
+        echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgx_ulid.control; \
+    fi
 
 #########################################################################################
 #
-# Layer "pg-session-jwt-build"
+# Layer "pg_session_jwt-build"
 # Compile "pg_session_jwt" extension
 #
 #########################################################################################
 
-FROM rust-extensions-build-pgrx12 AS pg-session-jwt-build
+FROM build-deps AS pg_session_jwt-src
 ARG PG_VERSION
 
 # NOTE: local_proxy depends on the version of pg_session_jwt
 # Do not update without approve from proxy team
 # Make sure the version is reflected in proxy/src/serverless/local_conn_pool.rs
+WORKDIR /ext-src
 RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.2.0.tar.gz -O pg_session_jwt.tar.gz && \
     echo "5ace028e591f2e000ca10afa5b1ca62203ebff014c2907c0ec3b29c36f28a1bb pg_session_jwt.tar.gz" | sha256sum --check && \
     mkdir pg_session_jwt-src && cd pg_session_jwt-src && tar xzf ../pg_session_jwt.tar.gz --strip-components=1 -C . && \
@@ -1074,8 +1345,12 @@ RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.2.0
     sed -i 's/version = "0.12.6"/version = "0.12.9"/g' pgrx-tests/Cargo.toml && \
     sed -i 's/pgrx = "=0.12.6"/pgrx = { version = "=0.12.9", features = [ "unsafe-postgres" ] }/g' pgrx-tests/Cargo.toml && \
     sed -i 's/pgrx-macros = "=0.12.6"/pgrx-macros = "=0.12.9"/g' pgrx-tests/Cargo.toml && \
-    sed -i 's/pgrx-pg-config = "=0.12.6"/pgrx-pg-config = "=0.12.9"/g' pgrx-tests/Cargo.toml && \
-    cargo pgrx install --release
+    sed -i 's/pgrx-pg-config = "=0.12.6"/pgrx-pg-config = "=0.12.9"/g' pgrx-tests/Cargo.toml
+
+FROM rust-extensions-build-pgrx12 AS pg_session_jwt-build
+COPY --from=pg_session_jwt-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_session_jwt-src
+RUN cargo pgrx install --release
 
 #########################################################################################
 #
@@ -1084,15 +1359,20 @@ RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.2.0
 #
 #########################################################################################
 
-FROM pg-build AS wal2json-pg-build
+FROM build-deps AS wal2json-src
 ARG PG_VERSION
 
 # wal2json wal2json_2_6 supports v17
 # last release wal2json_2_6 - Apr 25, 2024
+WORKDIR /ext-src
 RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_6.tar.gz -O wal2json.tar.gz && \
     echo "18b4bdec28c74a8fc98a11c72de38378a760327ef8e5e42e975b0029eb96ba0d wal2json.tar.gz" | sha256sum --check && \
-    mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS wal2json-build
+COPY --from=wal2json-src /ext-src/ /ext-src/
+WORKDIR /ext-src/wal2json-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install
 
 #########################################################################################
@@ -1101,15 +1381,20 @@ RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_6.tar.
 # compile pg_ivm extension
 #
 #########################################################################################
-FROM pg-build AS pg-ivm-build
+FROM build-deps AS pg_ivm-src
 ARG PG_VERSION
 
 # pg_ivm v1.9 supports v17
 # last release v1.9 - Jul 31
+WORKDIR /ext-src
 RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.9.tar.gz -O pg_ivm.tar.gz && \
     echo "59e15722939f274650abf637f315dd723c87073496ca77236b044cb205270d8b pg_ivm.tar.gz" | sha256sum --check && \
-    mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pg_ivm-build
+COPY --from=pg_ivm-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_ivm-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_ivm.control
 
@@ -1119,15 +1404,20 @@ RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.9.tar.gz -O pg_iv
 # compile pg_partman extension
 #
 #########################################################################################
-FROM pg-build AS pg-partman-build
+FROM build-deps AS pg_partman-src
 ARG PG_VERSION
 
 # should support v17 https://github.com/pgpartman/pg_partman/discussions/693
 # last release 5.1.0  Apr 2, 2024
+WORKDIR /ext-src
 RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.1.0.tar.gz -O pg_partman.tar.gz && \
     echo "3e3a27d7ff827295d5c55ef72f07a49062d6204b3cb0b9a048645d6db9f3cb9f pg_partman.tar.gz" | sha256sum --check && \
-    mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pg_partman-build
+COPY --from=pg_partman-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_partman-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_partman.control
 
@@ -1137,13 +1427,19 @@ RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.1.0.tar.gz
 # compile pg_mooncake extension
 #
 #########################################################################################
-FROM rust-extensions-build AS pg-mooncake-build
+FROM build-deps AS pg_mooncake-src
 ARG PG_VERSION
-
-RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.0/pg_mooncake-0.1.0.tar.gz -O pg_mooncake.tar.gz && \
-    echo "eafd059b77f541f11525eb8affcd66a176968cbd8fe7c0d436e733f2aa4da59f pg_mooncake.tar.gz" | sha256sum --check && \
+WORKDIR /ext-src
+RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.1/pg_mooncake-0.1.1.tar.gz -O pg_mooncake.tar.gz && \
+    echo "a2d16eff7948dde64f072609ca5d2962d6b4d07cb89d45952add473529c55f55 pg_mooncake.tar.gz" | sha256sum --check && \
     mkdir pg_mooncake-src && cd pg_mooncake-src && tar xzf ../pg_mooncake.tar.gz --strip-components=1 -C . && \
-    make release -j $(getconf _NPROCESSORS_ONLN) && \
+    echo "make -f pg_mooncake-src/Makefile.build installcheck TEST_DIR=./test SQL_DIR=./sql SRC_DIR=./src" > neon-test.sh && \
+    chmod a+x neon-test.sh
+
+FROM rust-extensions-build AS pg_mooncake-build
+COPY --from=pg_mooncake-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_mooncake-src
+RUN make release -j $(getconf _NPROCESSORS_ONLN) && \
     make install -j $(getconf _NPROCESSORS_ONLN) && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_mooncake.control
 
@@ -1154,81 +1450,122 @@ RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.0/p
 #
 #########################################################################################
 
-FROM pg-build AS pg-repack-build
+FROM build-deps AS pg_repack-src
 ARG PG_VERSION
-
+WORKDIR /ext-src
 RUN wget https://github.com/reorg/pg_repack/archive/refs/tags/ver_1.5.2.tar.gz -O pg_repack.tar.gz && \
     echo '4516cad42251ed3ad53ff619733004db47d5755acac83f75924cd94d1c4fb681 pg_repack.tar.gz' | sha256sum --check && \
-    mkdir pg_repack-src && cd pg_repack-src && tar xzf ../pg_repack.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
+    mkdir pg_repack-src && cd pg_repack-src && tar xzf ../pg_repack.tar.gz --strip-components=1 -C .
+
+FROM rust-extensions-build AS pg_repack-build
+COPY --from=pg_repack-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_repack-src
+RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install
 
 #########################################################################################
 #
-# Layer "neon-pg-ext-build"
+# Layer "neon-ext-build"
 # compile neon extensions
 #
 #########################################################################################
-FROM build-deps AS neon-pg-ext-build
+FROM pg-build AS neon-ext-build
 ARG PG_VERSION
 
-# Public extensions
-COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=postgis-build /sfcgal/* /
-COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=h3-pg-build /h3/usr /
-COPY --from=unit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=vector-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pgjwt-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pgrag-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-jsonschema-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-graphql-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-tiktoken-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=hypopg-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-hashids-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=rum-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pgtap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=ip4r-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=prefix-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-pgx-ulid-pgrx12-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-session-jwt-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-roaringbitmap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-semver-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-embedding-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=wal2json-pg-build /usr/local/pgsql /usr/local/pgsql
-COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-ivm-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-partman-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-repack-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/
-
 RUN make -j $(getconf _NPROCESSORS_ONLN) \
-        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
         -C pgxn/neon \
         -s install && \
     make -j $(getconf _NPROCESSORS_ONLN) \
-        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
         -C pgxn/neon_utils \
         -s install && \
     make -j $(getconf _NPROCESSORS_ONLN) \
-        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
         -C pgxn/neon_test_utils \
         -s install && \
     make -j $(getconf _NPROCESSORS_ONLN) \
-        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
         -C pgxn/neon_rmgr \
         -s install
 
+#########################################################################################
+#
+# Layer "extensions-none"
+#
+#########################################################################################
+FROM build-deps AS extensions-none
+
+RUN mkdir /usr/local/pgsql
+
+#########################################################################################
+#
+# Layer "extensions-minimal"
+#
+# This subset of extensions includes the extensions that we have in
+# shared_preload_libraries by default.
+#
+#########################################################################################
+FROM build-deps AS extensions-minimal
+
+COPY --from=pgrag-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=timescaledb-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_cron-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
+
+#########################################################################################
+#
+# Layer "extensions-all"
+# Bundle together all the extensions
+#
+#########################################################################################
+FROM build-deps AS extensions-all
+
+# Public extensions
+COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=postgis-build /sfcgal/* /
+COPY --from=pgrouting-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=h3-pg-build /h3/usr /
+COPY --from=postgresql-unit-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgvector-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgjwt-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgrag-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_jsonschema-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_graphql-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_tiktoken-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=hypopg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_hashids-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=rum-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgtap-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=ip4r-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=prefix-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=hll-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=plpgsql_check-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=timescaledb-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_hint_plan-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_cron-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgx_ulid-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgx_ulid-pgrx12-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_session_jwt-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=rdkit-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_uuidv7-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_roaringbitmap-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_semver-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_embedding-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=wal2json-build /usr/local/pgsql /usr/local/pgsql
+COPY --from=pg_anon-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
+
+#########################################################################################
+#
+# Layer "neon-pg-ext-build"
+# Includes Postgres and all the extensions chosen by EXTENSIONS arg.
+#
+#########################################################################################
+FROM extensions-${EXTENSIONS} AS neon-pg-ext-build
+
 #########################################################################################
 #
 # Compile the Neon-specific `compute_ctl`, `fast_import`, and `local_proxy` binaries
@@ -1283,7 +1620,9 @@ FROM alpine/curl:${ALPINE_CURL_VERSION} AS exporters
 ARG TARGETARCH
 # Keep sql_exporter version same as in build-tools.Dockerfile and
 # test_runner/regress/test_compute_metrics.py
-RUN if [ "$TARGETARCH" = "amd64" ]; then\
+# See comment on the top of the file regading `echo`, `-e` and `\n`
+RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc; \
+    if [ "$TARGETARCH" = "amd64" ]; then\
         postgres_exporter_sha256='027e75dda7af621237ff8f5ac66b78a40b0093595f06768612b92b1374bd3105';\
         pgbouncer_exporter_sha256='c9f7cf8dcff44f0472057e9bf52613d93f3ffbc381ad7547a959daa63c5e84ac';\
         sql_exporter_sha256='38e439732bbf6e28ca4a94d7bc3686d3fa1abdb0050773d5617a9efdb9e64d08';\
@@ -1307,7 +1646,8 @@ RUN if [ "$TARGETARCH" = "amd64" ]; then\
 # Clean up postgres folder before inclusion
 #
 #########################################################################################
-FROM neon-pg-ext-build AS postgres-cleanup-layer
+FROM neon-ext-build AS postgres-cleanup-layer
+
 COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
 
 # Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
@@ -1337,65 +1677,59 @@ RUN make PG_VERSION="${PG_VERSION}" -C compute
 
 #########################################################################################
 #
-# Layer neon-pg-ext-test
+# Layer extension-tests
 #
 #########################################################################################
 
-FROM neon-pg-ext-build AS neon-pg-ext-test
+FROM pg-build AS extension-tests
 ARG PG_VERSION
 RUN mkdir /ext-src
 
-#COPY --from=postgis-build /postgis.tar.gz /ext-src/
-#COPY --from=postgis-build /sfcgal/* /usr
-COPY --from=plv8-build /plv8.tar.gz /ext-src/
-#COPY --from=h3-pg-build /h3-pg.tar.gz /ext-src/
-COPY --from=unit-pg-build /postgresql-unit.tar.gz /ext-src/
-COPY --from=vector-pg-build /pgvector.tar.gz /ext-src/
-COPY --from=vector-pg-build /pgvector.patch /ext-src/
-COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
-#COPY --from=pgrag-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-#COPY --from=pg-jsonschema-pg-build /home/nonroot/pg_jsonschema.tar.gz /ext-src
-COPY --from=pg-graphql-pg-build /home/nonroot/pg_graphql.tar.gz /ext-src
-COPY compute/patches/pg_graphql.patch /ext-src
-#COPY --from=pg-tiktoken-pg-build /home/nonroot/pg_tiktoken.tar.gz /ext-src
-COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
-COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
-COPY --from=rum-pg-build /rum.tar.gz /ext-src
-COPY compute/patches/rum.patch /ext-src
-#COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
-COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
-COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
-COPY --from=hll-pg-build /hll.tar.gz /ext-src
-COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
-#COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
-COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
+COPY --from=pg-build /postgres /postgres
+#COPY --from=postgis-src /ext-src/ /ext-src/
+COPY --from=plv8-src /ext-src/ /ext-src/
+#COPY --from=h3-pg-src /ext-src/ /ext-src/
+COPY --from=postgresql-unit-src /ext-src/ /ext-src/
+COPY --from=pgvector-src /ext-src/ /ext-src/
+COPY --from=pgjwt-src /ext-src/ /ext-src/
+#COPY --from=pgrag-src /ext-src/ /ext-src/
+#COPY --from=pg_jsonschema-src /ext-src/ /ext-src/
+COPY --from=pg_graphql-src /ext-src/ /ext-src/
+#COPY --from=pg_tiktoken-src /ext-src/ /ext-src/
+COPY --from=hypopg-src /ext-src/ /ext-src/
+COPY --from=pg_hashids-src /ext-src/ /ext-src/
+COPY --from=rum-src /ext-src/ /ext-src/
+#COPY --from=pgtap-src /ext-src/ /ext-src/
+COPY --from=ip4r-src /ext-src/ /ext-src/
+COPY --from=prefix-src /ext-src/ /ext-src/
+COPY --from=hll-src /ext-src/ /ext-src/
+COPY --from=plpgsql_check-src /ext-src/ /ext-src/
+#COPY --from=timescaledb-src /ext-src/ /ext-src/
+COPY --from=pg_hint_plan-src /ext-src/ /ext-src/
 COPY compute/patches/pg_hint_plan_${PG_VERSION}.patch /ext-src
-COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
-COPY compute/patches/pg_cron.patch /ext-src
-#COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
-#COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
-COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
-COPY --from=pg-roaringbitmap-pg-build /pg_roaringbitmap.tar.gz /ext-src
-COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
-#COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
-#COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
-COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
-COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
-RUN cd /ext-src/ && for f in *.tar.gz; \
-    do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \
-    rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
-    || exit 1; rm -f $f; done
-RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
-RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
 RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan_${PG_VERSION}.patch
+COPY --from=pg_cron-src /ext-src/ /ext-src/
+#COPY --from=pgx_ulid-src /ext-src/ /ext-src/
+#COPY --from=pgx_ulid-pgrx12-src /ext-src/ /ext-src/
+#COPY --from=pg_session_jwt-src /ext-src/ /ext-src/
+#COPY --from=rdkit-src /ext-src/ /ext-src/
+COPY --from=pg_uuidv7-src /ext-src/ /ext-src/
+COPY --from=pg_roaringbitmap-src /ext-src/ /ext-src/
+COPY --from=pg_semver-src /ext-src/ /ext-src/
+#COPY --from=pg_embedding-src /ext-src/ /ext-src/
+#COPY --from=wal2json-src /ext-src/ /ext-src/
+COPY --from=pg_ivm-src /ext-src/ /ext-src/
+COPY --from=pg_partman-src /ext-src/ /ext-src/
+#COPY --from=pg_mooncake-src /ext-src/ /ext-src/
+#COPY --from=pg_repack-src /ext-src/ /ext-src/
+
 COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
-RUN patch -p1 </ext-src/pg_cron.patch
-RUN cd /ext-src/pg_graphql-src && patch -p1 </ext-src/pg_graphql.patch
 ENV PATH=/usr/local/pgsql/bin:$PATH
 ENV PGHOST=compute
 ENV PGPORT=55433
 ENV PGUSER=cloud_admin
 ENV PGDATABASE=postgres
+
 #########################################################################################
 #
 # Final layer
@@ -1404,6 +1738,10 @@ ENV PGDATABASE=postgres
 #########################################################################################
 FROM debian:$DEBIAN_FLAVOR
 ARG DEBIAN_VERSION
+
+# Use strict mode for bash to catch errors early
+SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
+
 # Add user postgres
 RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
     echo "postgres:test_console_pass" | chpasswd && \
diff --git a/compute/patches/contrib_pg16.patch b/compute/patches/contrib_pg16.patch
new file mode 100644
index 000000000000..71adaabe7d4b
--- /dev/null
+++ b/compute/patches/contrib_pg16.patch
@@ -0,0 +1,242 @@
+diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out
+index 979e5e8..2375b45 100644
+--- a/contrib/amcheck/expected/check_heap.out
++++ b/contrib/amcheck/expected/check_heap.out
+@@ -80,12 +80,9 @@ INSERT INTO heaptest (a, b)
+ -- same transaction.  The heaptest table is smaller than the default
+ -- wal_skip_threshold, so a wal_level=minimal commit reads the table into
+ -- shared_buffers.  A transaction delays that and excludes any autovacuum.
+-SET allow_in_place_tablespaces = true;
+-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
+ SELECT sum(reads) AS stats_bulkreads_before
+   FROM pg_stat_io WHERE context = 'bulkread' \gset
+ BEGIN;
+-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
+ -- Check that valid options are not rejected nor corruption reported
+ -- for a non-empty table
+ SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
+@@ -118,14 +115,6 @@ SELECT pg_stat_force_next_flush();
+  
+ (1 row)
+ 
+-SELECT sum(reads) AS stats_bulkreads_after
+-  FROM pg_stat_io WHERE context = 'bulkread' \gset
+-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
+- ?column? 
+-----------
+- t
+-(1 row)
+-
+ CREATE ROLE regress_heaptest_role;
+ -- verify permissions are checked (error due to function not callable)
+ SET ROLE regress_heaptest_role;
+@@ -233,7 +222,6 @@ ERROR:  cannot check relation "test_foreign_table"
+ DETAIL:  This operation is not supported for foreign tables.
+ -- cleanup
+ DROP TABLE heaptest;
+-DROP TABLESPACE regress_test_stats_tblspc;
+ DROP TABLE test_partition;
+ DROP TABLE test_partitioned;
+ DROP OWNED BY regress_heaptest_role; -- permissions
+diff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql
+index 1745bae..3b429c3 100644
+--- a/contrib/amcheck/sql/check_heap.sql
++++ b/contrib/amcheck/sql/check_heap.sql
+@@ -40,12 +40,9 @@ INSERT INTO heaptest (a, b)
+ -- same transaction.  The heaptest table is smaller than the default
+ -- wal_skip_threshold, so a wal_level=minimal commit reads the table into
+ -- shared_buffers.  A transaction delays that and excludes any autovacuum.
+-SET allow_in_place_tablespaces = true;
+-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
+ SELECT sum(reads) AS stats_bulkreads_before
+   FROM pg_stat_io WHERE context = 'bulkread' \gset
+ BEGIN;
+-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
+ -- Check that valid options are not rejected nor corruption reported
+ -- for a non-empty table
+ SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
+@@ -58,9 +55,6 @@ COMMIT;
+ --   ALTER TABLE ... SET TABLESPACE ...
+ -- causing an additional bulkread, which should be reflected in pg_stat_io.
+ SELECT pg_stat_force_next_flush();
+-SELECT sum(reads) AS stats_bulkreads_after
+-  FROM pg_stat_io WHERE context = 'bulkread' \gset
+-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
+ 
+ CREATE ROLE regress_heaptest_role;
+ 
+@@ -140,7 +134,6 @@ SELECT * FROM verify_heapam('test_foreign_table',
+ 
+ -- cleanup
+ DROP TABLE heaptest;
+-DROP TABLESPACE regress_test_stats_tblspc;
+ DROP TABLE test_partition;
+ DROP TABLE test_partitioned;
+ DROP OWNED BY regress_heaptest_role; -- permissions
+diff --git a/contrib/citext/expected/create_index_acl.out b/contrib/citext/expected/create_index_acl.out
+index 33be13a..70a406c 100644
+--- a/contrib/citext/expected/create_index_acl.out
++++ b/contrib/citext/expected/create_index_acl.out
+@@ -5,9 +5,6 @@
+ -- owner having as few applicable privileges as possible.  (The privileges.sql
+ -- regress_sro_user tests look for the opposite defect; they confirm that
+ -- DefineIndex() uses the table owner userid where necessary.)
+-SET allow_in_place_tablespaces = true;
+-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
+-RESET allow_in_place_tablespaces;
+ BEGIN;
+ CREATE ROLE regress_minimal;
+ CREATE SCHEMA s;
+@@ -49,11 +46,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
+ -- Empty-table DefineIndex()
+ CREATE UNIQUE INDEX u0rows ON s.x USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
+-  TABLESPACE regress_create_idx_tblspace
+   WHERE s.index_row_if(y);
+ ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
+-  USING INDEX TABLESPACE regress_create_idx_tblspace
+   WHERE (s.index_row_if(y));
+ -- Make the table nonempty.
+ INSERT INTO s.x VALUES ('foo'), ('bar');
+@@ -66,11 +61,9 @@ RESET search_path;
+ GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
+ CREATE UNIQUE INDEX u2rows ON s.x USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
+-  TABLESPACE regress_create_idx_tblspace
+   WHERE s.index_row_if(y);
+ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
+-  USING INDEX TABLESPACE regress_create_idx_tblspace
+   WHERE (s.index_row_if(y));
+ -- Shall not find s.coll via search_path, despite the s.const->public.setter
+ -- call having set search_path=s during expression planning.  Suppress the
+@@ -78,9 +71,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
+ \set VERBOSITY sqlstate
+ ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
+-  USING INDEX TABLESPACE regress_create_idx_tblspace
+   WHERE (s.index_row_if(y));
+ ERROR:  42704
+ \set VERBOSITY default
+ ROLLBACK;
+-DROP TABLESPACE regress_create_idx_tblspace;
+diff --git a/contrib/citext/sql/create_index_acl.sql b/contrib/citext/sql/create_index_acl.sql
+index 10b5225..ae442e1 100644
+--- a/contrib/citext/sql/create_index_acl.sql
++++ b/contrib/citext/sql/create_index_acl.sql
+@@ -6,10 +6,6 @@
+ -- regress_sro_user tests look for the opposite defect; they confirm that
+ -- DefineIndex() uses the table owner userid where necessary.)
+ 
+-SET allow_in_place_tablespaces = true;
+-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
+-RESET allow_in_place_tablespaces;
+-
+ BEGIN;
+ CREATE ROLE regress_minimal;
+ CREATE SCHEMA s;
+@@ -51,11 +47,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
+ -- Empty-table DefineIndex()
+ CREATE UNIQUE INDEX u0rows ON s.x USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
+-  TABLESPACE regress_create_idx_tblspace
+   WHERE s.index_row_if(y);
+ ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
+-  USING INDEX TABLESPACE regress_create_idx_tblspace
+   WHERE (s.index_row_if(y));
+ -- Make the table nonempty.
+ INSERT INTO s.x VALUES ('foo'), ('bar');
+@@ -68,11 +62,9 @@ RESET search_path;
+ GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
+ CREATE UNIQUE INDEX u2rows ON s.x USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
+-  TABLESPACE regress_create_idx_tblspace
+   WHERE s.index_row_if(y);
+ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
+-  USING INDEX TABLESPACE regress_create_idx_tblspace
+   WHERE (s.index_row_if(y));
+ -- Shall not find s.coll via search_path, despite the s.const->public.setter
+ -- call having set search_path=s during expression planning.  Suppress the
+@@ -80,9 +72,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
+ \set VERBOSITY sqlstate
+ ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
+-  USING INDEX TABLESPACE regress_create_idx_tblspace
+   WHERE (s.index_row_if(y));
+ \set VERBOSITY default
+ ROLLBACK;
+ 
+-DROP TABLESPACE regress_create_idx_tblspace;
+diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out
+index 72304e0..ebe131b 100644
+--- a/contrib/file_fdw/expected/file_fdw.out
++++ b/contrib/file_fdw/expected/file_fdw.out
+@@ -4,6 +4,7 @@
+ -- directory paths are passed to us in environment variables
+ \getenv abs_srcdir PG_ABS_SRCDIR
+ -- Clean up in case a prior regression run failed
++SET compute_query_id TO 'off';
+ SET client_min_messages TO 'warning';
+ DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
+ RESET client_min_messages;
+diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql
+index f0548e1..848a08c 100644
+--- a/contrib/file_fdw/sql/file_fdw.sql
++++ b/contrib/file_fdw/sql/file_fdw.sql
+@@ -6,6 +6,7 @@
+ \getenv abs_srcdir PG_ABS_SRCDIR
+ 
+ -- Clean up in case a prior regression run failed
++SET compute_query_id TO 'off';
+ SET client_min_messages TO 'warning';
+ DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
+ RESET client_min_messages;
+diff --git a/contrib/pageinspect/expected/gist.out b/contrib/pageinspect/expected/gist.out
+index d1adbab..38b52ac 100644
+--- a/contrib/pageinspect/expected/gist.out
++++ b/contrib/pageinspect/expected/gist.out
+@@ -10,25 +10,6 @@ BEGIN;
+ CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM
+     generate_series(1,1000) i;
+ CREATE INDEX test_gist_idx ON test_gist USING gist (p);
+--- Page 0 is the root, the rest are leaf pages
+-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0));
+- lsn | nsn | rightlink  | flags 
+------+-----+------------+-------
+- 0/1 | 0/0 | 4294967295 | {}
+-(1 row)
+-
+-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 1));
+- lsn | nsn | rightlink  | flags  
+------+-----+------------+--------
+- 0/1 | 0/0 | 4294967295 | {leaf}
+-(1 row)
+-
+-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2));
+- lsn | nsn | rightlink | flags  
+------+-----+-----------+--------
+- 0/1 | 0/0 |         1 | {leaf}
+-(1 row)
+-
+ COMMIT;
+ SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx');
+  itemoffset |   ctid    | itemlen | dead |             keys              
+diff --git a/contrib/pageinspect/sql/gist.sql b/contrib/pageinspect/sql/gist.sql
+index d263542..607992f 100644
+--- a/contrib/pageinspect/sql/gist.sql
++++ b/contrib/pageinspect/sql/gist.sql
+@@ -12,11 +12,6 @@ CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM
+     generate_series(1,1000) i;
+ CREATE INDEX test_gist_idx ON test_gist USING gist (p);
+ 
+--- Page 0 is the root, the rest are leaf pages
+-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0));
+-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 1));
+-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2));
+-
+ COMMIT;
+ 
+ SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx');
diff --git a/compute/patches/contrib_pg17.patch b/compute/patches/contrib_pg17.patch
new file mode 100644
index 000000000000..0d6c1203b0cc
--- /dev/null
+++ b/compute/patches/contrib_pg17.patch
@@ -0,0 +1,196 @@
+diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out
+index 979e5e8..2375b45 100644
+--- a/contrib/amcheck/expected/check_heap.out
++++ b/contrib/amcheck/expected/check_heap.out
+@@ -80,12 +80,9 @@ INSERT INTO heaptest (a, b)
+ -- same transaction.  The heaptest table is smaller than the default
+ -- wal_skip_threshold, so a wal_level=minimal commit reads the table into
+ -- shared_buffers.  A transaction delays that and excludes any autovacuum.
+-SET allow_in_place_tablespaces = true;
+-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
+ SELECT sum(reads) AS stats_bulkreads_before
+   FROM pg_stat_io WHERE context = 'bulkread' \gset
+ BEGIN;
+-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
+ -- Check that valid options are not rejected nor corruption reported
+ -- for a non-empty table
+ SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
+@@ -118,14 +115,6 @@ SELECT pg_stat_force_next_flush();
+  
+ (1 row)
+ 
+-SELECT sum(reads) AS stats_bulkreads_after
+-  FROM pg_stat_io WHERE context = 'bulkread' \gset
+-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
+- ?column? 
+-----------
+- t
+-(1 row)
+-
+ CREATE ROLE regress_heaptest_role;
+ -- verify permissions are checked (error due to function not callable)
+ SET ROLE regress_heaptest_role;
+@@ -233,7 +222,6 @@ ERROR:  cannot check relation "test_foreign_table"
+ DETAIL:  This operation is not supported for foreign tables.
+ -- cleanup
+ DROP TABLE heaptest;
+-DROP TABLESPACE regress_test_stats_tblspc;
+ DROP TABLE test_partition;
+ DROP TABLE test_partitioned;
+ DROP OWNED BY regress_heaptest_role; -- permissions
+diff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql
+index 1745bae..3b429c3 100644
+--- a/contrib/amcheck/sql/check_heap.sql
++++ b/contrib/amcheck/sql/check_heap.sql
+@@ -40,12 +40,9 @@ INSERT INTO heaptest (a, b)
+ -- same transaction.  The heaptest table is smaller than the default
+ -- wal_skip_threshold, so a wal_level=minimal commit reads the table into
+ -- shared_buffers.  A transaction delays that and excludes any autovacuum.
+-SET allow_in_place_tablespaces = true;
+-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
+ SELECT sum(reads) AS stats_bulkreads_before
+   FROM pg_stat_io WHERE context = 'bulkread' \gset
+ BEGIN;
+-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
+ -- Check that valid options are not rejected nor corruption reported
+ -- for a non-empty table
+ SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
+@@ -58,9 +55,6 @@ COMMIT;
+ --   ALTER TABLE ... SET TABLESPACE ...
+ -- causing an additional bulkread, which should be reflected in pg_stat_io.
+ SELECT pg_stat_force_next_flush();
+-SELECT sum(reads) AS stats_bulkreads_after
+-  FROM pg_stat_io WHERE context = 'bulkread' \gset
+-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
+ 
+ CREATE ROLE regress_heaptest_role;
+ 
+@@ -140,7 +134,6 @@ SELECT * FROM verify_heapam('test_foreign_table',
+ 
+ -- cleanup
+ DROP TABLE heaptest;
+-DROP TABLESPACE regress_test_stats_tblspc;
+ DROP TABLE test_partition;
+ DROP TABLE test_partitioned;
+ DROP OWNED BY regress_heaptest_role; -- permissions
+diff --git a/contrib/citext/expected/create_index_acl.out b/contrib/citext/expected/create_index_acl.out
+index 33be13a..70a406c 100644
+--- a/contrib/citext/expected/create_index_acl.out
++++ b/contrib/citext/expected/create_index_acl.out
+@@ -5,9 +5,6 @@
+ -- owner having as few applicable privileges as possible.  (The privileges.sql
+ -- regress_sro_user tests look for the opposite defect; they confirm that
+ -- DefineIndex() uses the table owner userid where necessary.)
+-SET allow_in_place_tablespaces = true;
+-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
+-RESET allow_in_place_tablespaces;
+ BEGIN;
+ CREATE ROLE regress_minimal;
+ CREATE SCHEMA s;
+@@ -49,11 +46,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
+ -- Empty-table DefineIndex()
+ CREATE UNIQUE INDEX u0rows ON s.x USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
+-  TABLESPACE regress_create_idx_tblspace
+   WHERE s.index_row_if(y);
+ ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
+-  USING INDEX TABLESPACE regress_create_idx_tblspace
+   WHERE (s.index_row_if(y));
+ -- Make the table nonempty.
+ INSERT INTO s.x VALUES ('foo'), ('bar');
+@@ -66,11 +61,9 @@ RESET search_path;
+ GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
+ CREATE UNIQUE INDEX u2rows ON s.x USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
+-  TABLESPACE regress_create_idx_tblspace
+   WHERE s.index_row_if(y);
+ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
+-  USING INDEX TABLESPACE regress_create_idx_tblspace
+   WHERE (s.index_row_if(y));
+ -- Shall not find s.coll via search_path, despite the s.const->public.setter
+ -- call having set search_path=s during expression planning.  Suppress the
+@@ -78,9 +71,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
+ \set VERBOSITY sqlstate
+ ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
+-  USING INDEX TABLESPACE regress_create_idx_tblspace
+   WHERE (s.index_row_if(y));
+ ERROR:  42704
+ \set VERBOSITY default
+ ROLLBACK;
+-DROP TABLESPACE regress_create_idx_tblspace;
+diff --git a/contrib/citext/sql/create_index_acl.sql b/contrib/citext/sql/create_index_acl.sql
+index 10b5225..ae442e1 100644
+--- a/contrib/citext/sql/create_index_acl.sql
++++ b/contrib/citext/sql/create_index_acl.sql
+@@ -6,10 +6,6 @@
+ -- regress_sro_user tests look for the opposite defect; they confirm that
+ -- DefineIndex() uses the table owner userid where necessary.)
+ 
+-SET allow_in_place_tablespaces = true;
+-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
+-RESET allow_in_place_tablespaces;
+-
+ BEGIN;
+ CREATE ROLE regress_minimal;
+ CREATE SCHEMA s;
+@@ -51,11 +47,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
+ -- Empty-table DefineIndex()
+ CREATE UNIQUE INDEX u0rows ON s.x USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
+-  TABLESPACE regress_create_idx_tblspace
+   WHERE s.index_row_if(y);
+ ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
+-  USING INDEX TABLESPACE regress_create_idx_tblspace
+   WHERE (s.index_row_if(y));
+ -- Make the table nonempty.
+ INSERT INTO s.x VALUES ('foo'), ('bar');
+@@ -68,11 +62,9 @@ RESET search_path;
+ GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
+ CREATE UNIQUE INDEX u2rows ON s.x USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
+-  TABLESPACE regress_create_idx_tblspace
+   WHERE s.index_row_if(y);
+ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
+-  USING INDEX TABLESPACE regress_create_idx_tblspace
+   WHERE (s.index_row_if(y));
+ -- Shall not find s.coll via search_path, despite the s.const->public.setter
+ -- call having set search_path=s during expression planning.  Suppress the
+@@ -80,9 +72,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
+ \set VERBOSITY sqlstate
+ ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
+   ((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
+-  USING INDEX TABLESPACE regress_create_idx_tblspace
+   WHERE (s.index_row_if(y));
+ \set VERBOSITY default
+ ROLLBACK;
+ 
+-DROP TABLESPACE regress_create_idx_tblspace;
+diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out
+index 86c148a..81bdb2c 100644
+--- a/contrib/file_fdw/expected/file_fdw.out
++++ b/contrib/file_fdw/expected/file_fdw.out
+@@ -4,6 +4,7 @@
+ -- directory paths are passed to us in environment variables
+ \getenv abs_srcdir PG_ABS_SRCDIR
+ -- Clean up in case a prior regression run failed
++SET compute_query_id TO 'off';
+ SET client_min_messages TO 'warning';
+ DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
+ RESET client_min_messages;
+diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql
+index f0548e1..848a08c 100644
+--- a/contrib/file_fdw/sql/file_fdw.sql
++++ b/contrib/file_fdw/sql/file_fdw.sql
+@@ -6,6 +6,7 @@
+ \getenv abs_srcdir PG_ABS_SRCDIR
+ 
+ -- Clean up in case a prior regression run failed
++SET compute_query_id TO 'off';
+ SET client_min_messages TO 'warning';
+ DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
+ RESET client_min_messages;
diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index b98cf706d343..47fc9cb7fe9d 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -34,6 +34,7 @@
 //!             -r http://pg-ext-s3-gateway \
 //! ```
 use std::collections::HashMap;
+use std::ffi::OsString;
 use std::fs::File;
 use std::path::Path;
 use std::process::exit;
@@ -44,7 +45,7 @@ use std::{thread, time::Duration};
 
 use anyhow::{Context, Result};
 use chrono::Utc;
-use clap::Arg;
+use clap::Parser;
 use compute_tools::disk_quota::set_disk_quota;
 use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
 use signal_hook::consts::{SIGQUIT, SIGTERM};
@@ -73,10 +74,75 @@ use utils::failpoint_support;
 // in-case of not-set environment var
 const BUILD_TAG_DEFAULT: &str = "latest";
 
+// Compatibility hack: if the control plane specified any remote-ext-config
+// use the default value for extension storage proxy gateway.
+// Remove this once the control plane is updated to pass the gateway URL
+fn parse_remote_ext_config(arg: &str) -> Result<String> {
+    if arg.starts_with("http") {
+        Ok(arg.trim_end_matches('/').to_string())
+    } else {
+        Ok("http://pg-ext-s3-gateway".to_string())
+    }
+}
+
+#[derive(Parser)]
+#[command(rename_all = "kebab-case")]
+struct Cli {
+    #[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
+    pub pgbin: String,
+
+    #[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
+    pub remote_ext_config: Option<String>,
+
+    #[arg(long, default_value_t = 3080)]
+    pub http_port: u16,
+
+    #[arg(short = 'D', long, value_name = "DATADIR")]
+    pub pgdata: String,
+
+    #[arg(short = 'C', long, value_name = "DATABASE_URL")]
+    pub connstr: String,
+
+    #[cfg(target_os = "linux")]
+    #[arg(long, default_value = "neon-postgres")]
+    pub cgroup: String,
+
+    #[cfg(target_os = "linux")]
+    #[arg(
+        long,
+        default_value = "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor"
+    )]
+    pub filecache_connstr: String,
+
+    #[cfg(target_os = "linux")]
+    #[arg(long, default_value = "0.0.0.0:10301")]
+    pub vm_monitor_addr: String,
+
+    #[arg(long, action = clap::ArgAction::SetTrue)]
+    pub resize_swap_on_bind: bool,
+
+    #[arg(long)]
+    pub set_disk_quota_for_fs: Option<String>,
+
+    #[arg(short = 's', long = "spec", group = "spec")]
+    pub spec_json: Option<String>,
+
+    #[arg(short = 'S', long, group = "spec-path")]
+    pub spec_path: Option<OsString>,
+
+    #[arg(short = 'i', long, group = "compute-id", conflicts_with_all = ["spec", "spec-path"])]
+    pub compute_id: Option<String>,
+
+    #[arg(short = 'p', long, conflicts_with_all = ["spec", "spec-path"], requires = "compute-id", value_name = "CONTROL_PLANE_API_BASE_URL")]
+    pub control_plane_uri: Option<String>,
+}
+
 fn main() -> Result<()> {
-    let scenario = failpoint_support::init();
+    let cli = Cli::parse();
 
-    let (build_tag, clap_args) = init()?;
+    let build_tag = init()?;
+
+    let scenario = failpoint_support::init();
 
     // enable core dumping for all child processes
     setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
@@ -85,13 +151,11 @@ fn main() -> Result<()> {
         // Enter startup tracing context
         let _startup_context_guard = startup_context_from_env();
 
-        let cli_args = process_cli(&clap_args)?;
-
-        let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
+        let cli_spec = try_spec_from_cli(&cli)?;
 
-        let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
+        let compute = wait_spec(build_tag, &cli, cli_spec)?;
 
-        start_postgres(&clap_args, wait_spec_result)?
+        start_postgres(&cli, compute)?
 
         // Startup is finished, exit the startup tracing span
     };
@@ -108,7 +172,7 @@ fn main() -> Result<()> {
     deinit_and_exit(wait_pg_result);
 }
 
-fn init() -> Result<(String, clap::ArgMatches)> {
+fn init() -> Result<String> {
     init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
 
     let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
@@ -123,66 +187,7 @@ fn init() -> Result<(String, clap::ArgMatches)> {
         .to_string();
     info!("build_tag: {build_tag}");
 
-    Ok((build_tag, cli().get_matches()))
-}
-
-fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
-    let pgbin_default = "postgres";
-    let pgbin = matches
-        .get_one::<String>("pgbin")
-        .map(|s| s.as_str())
-        .unwrap_or(pgbin_default);
-
-    let ext_remote_storage = matches
-        .get_one::<String>("remote-ext-config")
-        // Compatibility hack: if the control plane specified any remote-ext-config
-        // use the default value for extension storage proxy gateway.
-        // Remove this once the control plane is updated to pass the gateway URL
-        .map(|conf| {
-            if conf.starts_with("http") {
-                conf.trim_end_matches('/')
-            } else {
-                "http://pg-ext-s3-gateway"
-            }
-        });
-
-    let http_port = *matches
-        .get_one::<u16>("http-port")
-        .expect("http-port is required");
-    let pgdata = matches
-        .get_one::<String>("pgdata")
-        .expect("PGDATA path is required");
-    let connstr = matches
-        .get_one::<String>("connstr")
-        .expect("Postgres connection string is required");
-    let spec_json = matches.get_one::<String>("spec");
-    let spec_path = matches.get_one::<String>("spec-path");
-    let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
-    let set_disk_quota_for_fs = matches.get_one::<String>("set-disk-quota-for-fs");
-
-    Ok(ProcessCliResult {
-        connstr,
-        pgdata,
-        pgbin,
-        ext_remote_storage,
-        http_port,
-        spec_json,
-        spec_path,
-        resize_swap_on_bind,
-        set_disk_quota_for_fs,
-    })
-}
-
-struct ProcessCliResult<'clap> {
-    connstr: &'clap str,
-    pgdata: &'clap str,
-    pgbin: &'clap str,
-    ext_remote_storage: Option<&'clap str>,
-    http_port: u16,
-    spec_json: Option<&'clap String>,
-    spec_path: Option<&'clap String>,
-    resize_swap_on_bind: bool,
-    set_disk_quota_for_fs: Option<&'clap String>,
+    Ok(build_tag)
 }
 
 fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
@@ -235,19 +240,9 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
     }
 }
 
-fn try_spec_from_cli(
-    matches: &clap::ArgMatches,
-    ProcessCliResult {
-        spec_json,
-        spec_path,
-        ..
-    }: &ProcessCliResult,
-) -> Result<CliSpecParams> {
-    let compute_id = matches.get_one::<String>("compute-id");
-    let control_plane_uri = matches.get_one::<String>("control-plane-uri");
-
+fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
     // First, try to get cluster spec from the cli argument
-    if let Some(spec_json) = spec_json {
+    if let Some(ref spec_json) = cli.spec_json {
         info!("got spec from cli argument {}", spec_json);
         return Ok(CliSpecParams {
             spec: Some(serde_json::from_str(spec_json)?),
@@ -256,7 +251,7 @@ fn try_spec_from_cli(
     }
 
     // Second, try to read it from the file if path is provided
-    if let Some(spec_path) = spec_path {
+    if let Some(ref spec_path) = cli.spec_path {
         let file = File::open(Path::new(spec_path))?;
         return Ok(CliSpecParams {
             spec: Some(serde_json::from_reader(file)?),
@@ -264,17 +259,20 @@ fn try_spec_from_cli(
         });
     }
 
-    let Some(compute_id) = compute_id else {
+    if cli.compute_id.is_none() {
         panic!(
             "compute spec should be provided by one of the following ways: \
                 --spec OR --spec-path OR --control-plane-uri and --compute-id"
         );
     };
-    let Some(control_plane_uri) = control_plane_uri else {
+    if cli.control_plane_uri.is_none() {
         panic!("must specify both --control-plane-uri and --compute-id or none");
     };
 
-    match get_spec_from_control_plane(control_plane_uri, compute_id) {
+    match get_spec_from_control_plane(
+        cli.control_plane_uri.as_ref().unwrap(),
+        cli.compute_id.as_ref().unwrap(),
+    ) {
         Ok(spec) => Ok(CliSpecParams {
             spec,
             live_config_allowed: true,
@@ -298,21 +296,12 @@ struct CliSpecParams {
 
 fn wait_spec(
     build_tag: String,
-    ProcessCliResult {
-        connstr,
-        pgdata,
-        pgbin,
-        ext_remote_storage,
-        resize_swap_on_bind,
-        set_disk_quota_for_fs,
-        http_port,
-        ..
-    }: ProcessCliResult,
+    cli: &Cli,
     CliSpecParams {
         spec,
         live_config_allowed,
     }: CliSpecParams,
-) -> Result<WaitSpecResult> {
+) -> Result<Arc<ComputeNode>> {
     let mut new_state = ComputeState::new();
     let spec_set;
 
@@ -324,7 +313,7 @@ fn wait_spec(
     } else {
         spec_set = false;
     }
-    let connstr = Url::parse(connstr).context("cannot parse connstr as a URL")?;
+    let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;
     let conn_conf = postgres::config::Config::from_str(connstr.as_str())
         .context("cannot build postgres config from connstr")?;
     let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr.as_str())
@@ -333,14 +322,14 @@ fn wait_spec(
         connstr,
         conn_conf,
         tokio_conn_conf,
-        pgdata: pgdata.to_string(),
-        pgbin: pgbin.to_string(),
-        pgversion: get_pg_version_string(pgbin),
-        http_port,
+        pgdata: cli.pgdata.clone(),
+        pgbin: cli.pgbin.clone(),
+        pgversion: get_pg_version_string(&cli.pgbin),
+        http_port: cli.http_port,
         live_config_allowed,
         state: Mutex::new(new_state),
         state_changed: Condvar::new(),
-        ext_remote_storage: ext_remote_storage.map(|s| s.to_string()),
+        ext_remote_storage: cli.remote_ext_config.clone(),
         ext_download_progress: RwLock::new(HashMap::new()),
         build_tag,
     };
@@ -357,7 +346,7 @@ fn wait_spec(
     // Launch http service first, so that we can serve control-plane requests
     // while configuration is still in progress.
     let _http_handle =
-        launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
+        launch_http_server(cli.http_port, &compute).expect("cannot launch http endpoint thread");
 
     if !spec_set {
         // No spec provided, hang waiting for it.
@@ -389,27 +378,12 @@ fn wait_spec(
 
     launch_lsn_lease_bg_task_for_static(&compute);
 
-    Ok(WaitSpecResult {
-        compute,
-        resize_swap_on_bind,
-        set_disk_quota_for_fs: set_disk_quota_for_fs.cloned(),
-    })
-}
-
-struct WaitSpecResult {
-    compute: Arc<ComputeNode>,
-    resize_swap_on_bind: bool,
-    set_disk_quota_for_fs: Option<String>,
+    Ok(compute)
 }
 
 fn start_postgres(
-    // need to allow unused because `matches` is only used if target_os = "linux"
-    #[allow(unused_variables)] matches: &clap::ArgMatches,
-    WaitSpecResult {
-        compute,
-        resize_swap_on_bind,
-        set_disk_quota_for_fs,
-    }: WaitSpecResult,
+    cli: &Cli,
+    compute: Arc<ComputeNode>,
 ) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
     // We got all we need, update the state.
     let mut state = compute.state.lock().unwrap();
@@ -437,7 +411,7 @@ fn start_postgres(
     let mut delay_exit = false;
 
     // Resize swap to the desired size if the compute spec says so
-    if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
+    if let (Some(size_bytes), true) = (swap_size_bytes, cli.resize_swap_on_bind) {
         // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
         // *before* starting postgres.
         //
@@ -464,9 +438,9 @@ fn start_postgres(
 
     // Set disk quota if the compute spec says so
     if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
-        (disk_quota_bytes, set_disk_quota_for_fs)
+        (disk_quota_bytes, cli.set_disk_quota_for_fs.as_ref())
     {
-        match set_disk_quota(disk_quota_bytes, &disk_quota_fs_mountpoint) {
+        match set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) {
             Ok(()) => {
                 let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
                 info!(%disk_quota_bytes, %size_mib, "set disk quota");
@@ -509,13 +483,7 @@ fn start_postgres(
         if #[cfg(target_os = "linux")] {
             use std::env;
             use tokio_util::sync::CancellationToken;
-            let vm_monitor_addr = matches
-                .get_one::<String>("vm-monitor-addr")
-                .expect("--vm-monitor-addr should always be set because it has a default arg");
-            let file_cache_connstr = matches.get_one::<String>("filecache-connstr");
-            let cgroup = matches.get_one::<String>("cgroup");
 
-            // Only make a runtime if we need to.
             // Note: it seems like you can make a runtime in an inner scope and
             // if you start a task in it it won't be dropped. However, make it
             // in the outermost scope just to be safe.
@@ -538,15 +506,15 @@ fn start_postgres(
             let pgconnstr = if disable_lfc_resizing.unwrap_or(false) {
                 None
             } else {
-                file_cache_connstr.cloned()
+                Some(cli.filecache_connstr.clone())
             };
 
             let vm_monitor = rt.as_ref().map(|rt| {
                 rt.spawn(vm_monitor::start(
                     Box::leak(Box::new(vm_monitor::Args {
-                        cgroup: cgroup.cloned(),
+                        cgroup: Some(cli.cgroup.clone()),
                         pgconnstr,
-                        addr: vm_monitor_addr.clone(),
+                        addr: cli.vm_monitor_addr.clone(),
                     })),
                     token.clone(),
                 ))
@@ -702,105 +670,6 @@ fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
     exit(exit_code.unwrap_or(1))
 }
 
-fn cli() -> clap::Command {
-    // Env variable is set by `cargo`
-    let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
-    clap::Command::new("compute_ctl")
-        .version(version)
-        .arg(
-            Arg::new("http-port")
-                .long("http-port")
-                .value_name("HTTP_PORT")
-                .default_value("3080")
-                .value_parser(clap::value_parser!(u16))
-                .required(false),
-        )
-        .arg(
-            Arg::new("connstr")
-                .short('C')
-                .long("connstr")
-                .value_name("DATABASE_URL")
-                .required(true),
-        )
-        .arg(
-            Arg::new("pgdata")
-                .short('D')
-                .long("pgdata")
-                .value_name("DATADIR")
-                .required(true),
-        )
-        .arg(
-            Arg::new("pgbin")
-                .short('b')
-                .long("pgbin")
-                .default_value("postgres")
-                .value_name("POSTGRES_PATH"),
-        )
-        .arg(
-            Arg::new("spec")
-                .short('s')
-                .long("spec")
-                .value_name("SPEC_JSON"),
-        )
-        .arg(
-            Arg::new("spec-path")
-                .short('S')
-                .long("spec-path")
-                .value_name("SPEC_PATH"),
-        )
-        .arg(
-            Arg::new("compute-id")
-                .short('i')
-                .long("compute-id")
-                .value_name("COMPUTE_ID"),
-        )
-        .arg(
-            Arg::new("control-plane-uri")
-                .short('p')
-                .long("control-plane-uri")
-                .value_name("CONTROL_PLANE_API_BASE_URI"),
-        )
-        .arg(
-            Arg::new("remote-ext-config")
-                .short('r')
-                .long("remote-ext-config")
-                .value_name("REMOTE_EXT_CONFIG"),
-        )
-        // TODO(fprasx): we currently have default arguments because the cloud PR
-        // to pass them in hasn't been merged yet. We should get rid of them once
-        // the PR is merged.
-        .arg(
-            Arg::new("vm-monitor-addr")
-                .long("vm-monitor-addr")
-                .default_value("0.0.0.0:10301")
-                .value_name("VM_MONITOR_ADDR"),
-        )
-        .arg(
-            Arg::new("cgroup")
-                .long("cgroup")
-                .default_value("neon-postgres")
-                .value_name("CGROUP"),
-        )
-        .arg(
-            Arg::new("filecache-connstr")
-                .long("filecache-connstr")
-                .default_value(
-                    "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor",
-                )
-                .value_name("FILECACHE_CONNSTR"),
-        )
-        .arg(
-            Arg::new("resize-swap-on-bind")
-                .long("resize-swap-on-bind")
-                .action(clap::ArgAction::SetTrue),
-        )
-        .arg(
-            Arg::new("set-disk-quota-for-fs")
-                .long("set-disk-quota-for-fs")
-                .value_name("SET_DISK_QUOTA_FOR_FS")
-        )
-}
-
 /// When compute_ctl is killed, send also termination signal to sync-safekeepers
 /// to prevent leakage. TODO: it is better to convert compute_ctl to async and
 /// wait for termination which would be easy then.
@@ -810,7 +679,14 @@ fn handle_exit_signal(sig: i32) {
     exit(1);
 }
 
-#[test]
-fn verify_cli() {
-    cli().debug_assert()
+#[cfg(test)]
+mod test {
+    use clap::CommandFactory;
+
+    use super::Cli;
+
+    #[test]
+    fn verify_cli() {
+        Cli::command().debug_assert()
+    }
 }
diff --git a/compute_tools/src/extension_server.rs b/compute_tools/src/extension_server.rs
index 64c338f4d7ad..00f46386e71b 100644
--- a/compute_tools/src/extension_server.rs
+++ b/compute_tools/src/extension_server.rs
@@ -258,14 +258,11 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
 async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Result<Bytes> {
     let uri = format!("{}/{}", ext_remote_storage, ext_path);
 
-    info!("Download extension {:?} from uri {:?}", ext_path, uri);
+    info!("Download extension {} from uri {}", ext_path, uri);
 
     match do_extension_server_request(&uri).await {
         Ok(resp) => {
-            info!(
-                "Successfully downloaded remote extension data {:?}",
-                ext_path
-            );
+            info!("Successfully downloaded remote extension data {}", ext_path);
             REMOTE_EXT_REQUESTS_TOTAL
                 .with_label_values(&[&StatusCode::OK.to_string()])
                 .inc();
@@ -285,7 +282,10 @@ async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Res
 async fn do_extension_server_request(uri: &str) -> Result<Bytes, (String, String)> {
     let resp = reqwest::get(uri).await.map_err(|e| {
         (
-            format!("could not perform remote extensions server request: {}", e),
+            format!(
+                "could not perform remote extensions server request: {:?}",
+                e
+            ),
             UNKNOWN_HTTP_STATUS.to_string(),
         )
     })?;
@@ -295,7 +295,7 @@ async fn do_extension_server_request(uri: &str) -> Result<Bytes, (String, String
         StatusCode::OK => match resp.bytes().await {
             Ok(resp) => Ok(resp),
             Err(e) => Err((
-                format!("could not read remote extensions server response: {}", e),
+                format!("could not read remote extensions server response: {:?}", e),
                 // It's fine to return and report error with status as 200 OK,
                 // because we still failed to read the response.
                 status.to_string(),
diff --git a/compute_tools/src/migration.rs b/compute_tools/src/migration.rs
index aa3c6b01f035..7b7b042d848c 100644
--- a/compute_tools/src/migration.rs
+++ b/compute_tools/src/migration.rs
@@ -125,7 +125,7 @@ impl<'m> MigrationRunner<'m> {
                     info!("Finished migration id={}", migration_id);
                 }
                 Err(e) => {
-                    error!("Failed to run migration id={}: {}", migration_id, e);
+                    error!("Failed to run migration id={}: {:?}", migration_id, e);
                     DB_MIGRATION_FAILED
                         .with_label_values(&[migration_id.to_string().as_str()])
                         .inc();
diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs
index 43a820885bcd..37d5d3a1a65f 100644
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -28,7 +28,7 @@ fn do_control_plane_request(
         .map_err(|e| {
             (
                 true,
-                format!("could not perform spec request to control plane: {}", e),
+                format!("could not perform spec request to control plane: {:?}", e),
                 UNKNOWN_HTTP_STATUS.to_string(),
             )
         })?;
@@ -39,7 +39,7 @@ fn do_control_plane_request(
             Ok(spec_resp) => Ok(spec_resp),
             Err(e) => Err((
                 true,
-                format!("could not deserialize control plane response: {}", e),
+                format!("could not deserialize control plane response: {:?}", e),
                 status.to_string(),
             )),
         },
diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs
index 383c1746846b..dd37bfc4071f 100644
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -388,6 +388,11 @@ impl PageServerNode {
                 .map(|x| x.parse::<u8>())
                 .transpose()
                 .context("Failed to parse 'image_creation_check_threshold' as integer")?,
+            image_creation_preempt_threshold: settings
+                .remove("image_creation_preempt_threshold")
+                .map(|x| x.parse::<usize>())
+                .transpose()
+                .context("Failed to parse 'image_creation_preempt_threshold' as integer")?,
             pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
             walreceiver_connect_timeout: settings
                 .remove("walreceiver_connect_timeout")
diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs
index d9b76b960093..985fe6b3b1d6 100644
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -10,8 +10,8 @@ use pageserver_api::{
     controller_api::{
         AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
         SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy,
-        ShardsPreferredAzsRequest, SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse,
-        TenantPolicyRequest,
+        ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy,
+        TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
     },
     models::{
         EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
@@ -800,7 +800,7 @@ async fn main() -> anyhow::Result<()> {
                     .collect(),
             };
             storcon_client
-                .dispatch::<ShardsPreferredAzsRequest, ()>(
+                .dispatch::<ShardsPreferredAzsRequest, ShardsPreferredAzsResponse>(
                     Method::PUT,
                     "control/v1/preferred_azs".to_string(),
                     Some(req),
diff --git a/deny.toml b/deny.toml
index df00a34c60d6..b55140556801 100644
--- a/deny.toml
+++ b/deny.toml
@@ -32,6 +32,7 @@ reason = "the marvin attack only affects private key decryption, not public key
 # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
 [licenses]
 allow = [
+    "0BSD",
     "Apache-2.0",
     "BSD-2-Clause",
     "BSD-3-Clause",
diff --git a/docker-compose/compute_wrapper/Dockerfile b/docker-compose/compute_wrapper/Dockerfile
index 61f44681dabf..b5f0f47ceb88 100644
--- a/docker-compose/compute_wrapper/Dockerfile
+++ b/docker-compose/compute_wrapper/Dockerfile
@@ -13,6 +13,6 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
                        jq   \
                        netcat-openbsd
 #This is required for the pg_hintplan test
-RUN mkdir -p /ext-src/pg_hint_plan-src && chown postgres /ext-src/pg_hint_plan-src
+RUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw
 
 USER postgres
diff --git a/docker-compose/docker_compose_test.sh b/docker-compose/docker_compose_test.sh
index a05d6c043dc4..c4ff86ab663e 100755
--- a/docker-compose/docker_compose_test.sh
+++ b/docker-compose/docker_compose_test.sh
@@ -52,6 +52,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
 
     if [ $pg_version -ge 16 ]; then
         docker cp ext-src $TEST_CONTAINER_NAME:/
+        docker exec $TEST_CONTAINER_NAME bash -c "apt update && apt install -y libtap-parser-sourcehandler-pgtap-perl"
         # This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
         # It cannot be moved to Dockerfile now because the database directory is created after the start of the container
         echo Adding dummy config
@@ -61,17 +62,32 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
         docker cp $TEST_CONTAINER_NAME:/ext-src/pg_hint_plan-src/data $TMPDIR/data
         docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
         rm -rf $TMPDIR
+        # The following block does the same for the contrib/file_fdw test
+        TMPDIR=$(mktemp -d)
+        docker cp $TEST_CONTAINER_NAME:/postgres/contrib/file_fdw/data $TMPDIR/data
+        docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/postgres/contrib/file_fdw/data
+        rm -rf $TMPDIR
+        # Apply patches
+        cat ../compute/patches/contrib_pg${pg_version}.patch | docker exec -i $TEST_CONTAINER_NAME bash -c "(cd /postgres && patch -p1)"
         # We are running tests now
-        if ! docker exec -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src \
-            $TEST_CONTAINER_NAME /run-tests.sh | tee testout.txt
-        then
-            FAILED=$(tail -1 testout.txt)
-            for d in $FAILED
-            do
-                mkdir $d
-                docker cp $TEST_CONTAINER_NAME:/ext-src/$d/regression.diffs $d || true
-                docker cp $TEST_CONTAINER_NAME:/ext-src/$d/regression.out $d || true
-                cat $d/regression.out $d/regression.diffs || true
+        rm -f testout.txt testout_contrib.txt
+        docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src \
+        $TEST_CONTAINER_NAME /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
+        docker exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
+        $TEST_CONTAINER_NAME /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
+        if [ $EXT_SUCCESS -eq 0 ] || [ $CONTRIB_SUCCESS -eq 0 ]; then
+            CONTRIB_FAILED=
+            FAILED=
+            [ $EXT_SUCCESS -eq 0 ] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
+            [ $CONTRIB_SUCCESS -eq 0 ] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
+            for d in $FAILED $CONTRIB_FAILED; do
+                dn="$(basename $d)"
+                rm -rf $dn
+                mkdir $dn
+                docker cp $TEST_CONTAINER_NAME:$d/regression.diffs $dn || [ $? -eq 1 ]
+                docker cp $TEST_CONTAINER_NAME:$d/regression.out $dn || [ $? -eq 1 ]
+                cat $dn/regression.out $dn/regression.diffs || true
+                rm -rf $dn
             done
         rm -rf $FAILED
         exit 1
diff --git a/docker-compose/ext-src/pgjwt-src/neon-test.sh b/docker-compose/ext-src/pgjwt-src/neon-test.sh
new file mode 100755
index 000000000000..95af0be77b1f
--- /dev/null
+++ b/docker-compose/ext-src/pgjwt-src/neon-test.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -ex
+cd "$(dirname "${0}")"
+pg_prove test.sql
\ No newline at end of file
diff --git a/docker-compose/ext-src/pgjwt-src/test-upgrade.patch b/docker-compose/ext-src/pgjwt-src/test-upgrade.patch
new file mode 100644
index 000000000000..85b35654800f
--- /dev/null
+++ b/docker-compose/ext-src/pgjwt-src/test-upgrade.patch
@@ -0,0 +1,15 @@
+diff --git a/test.sql b/test.sql
+index d7a0ca8..f15bc76 100644
+--- a/test.sql
++++ b/test.sql
+@@ -9,9 +9,7 @@
+ \set ON_ERROR_STOP true
+ \set QUIET 1
+ 
+-CREATE EXTENSION pgcrypto;
+-CREATE EXTENSION pgtap;
+-CREATE EXTENSION pgjwt;
++CREATE EXTENSION IF NOT EXISTS pgtap;
+ 
+ BEGIN;
+ SELECT plan(23);
diff --git a/docker-compose/ext-src/pgjwt-src/test-upgrade.sh b/docker-compose/ext-src/pgjwt-src/test-upgrade.sh
new file mode 100755
index 000000000000..b7158d234000
--- /dev/null
+++ b/docker-compose/ext-src/pgjwt-src/test-upgrade.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+patch -p1 <test-upgrade.patch
+pg_prove test.sql
\ No newline at end of file
diff --git a/docker-compose/run-tests.sh b/docker-compose/run-tests.sh
index 1e794a42a1f6..72ae61b0327a 100644
--- a/docker-compose/run-tests.sh
+++ b/docker-compose/run-tests.sh
@@ -1,9 +1,11 @@
 #!/bin/bash
 set -x
 
-cd /ext-src || exit 2
+extdir=${1}
+
+cd "${extdir}" || exit 2
 FAILED=
-LIST=$( (echo -e "${SKIP//","/"\n"}"; ls -d -- *-src) | sort | uniq -u)
+LIST=$( (echo -e "${SKIP//","/"\n"}"; ls) | sort | uniq -u)
 for d in ${LIST}; do
     [ -d "${d}" ] || continue
     if ! psql -w -c "select 1" >/dev/null; then
diff --git a/docker-compose/test_extensions_upgrade.sh b/docker-compose/test_extensions_upgrade.sh
index ff93b98065c4..08b1a60f2da8 100755
--- a/docker-compose/test_extensions_upgrade.sh
+++ b/docker-compose/test_extensions_upgrade.sh
@@ -24,7 +24,7 @@ function wait_for_ready {
 }
 function create_extensions() {
   for ext in ${1}; do
-    docker compose exec neon-test-extensions psql -X -v ON_ERROR_STOP=1 -d contrib_regression -c "CREATE EXTENSION IF NOT EXISTS ${ext}"
+    docker compose exec neon-test-extensions psql -X -v ON_ERROR_STOP=1 -d contrib_regression -c "CREATE EXTENSION IF NOT EXISTS ${ext} CASCADE"
   done
 }
 EXTENSIONS='[
@@ -40,7 +40,8 @@ EXTENSIONS='[
 {"extname": "pg_uuidv7", "extdir": "pg_uuidv7-src"},
 {"extname": "roaringbitmap", "extdir": "pg_roaringbitmap-src"},
 {"extname": "semver", "extdir": "pg_semver-src"},
-{"extname": "pg_ivm", "extdir": "pg_ivm-src"}
+{"extname": "pg_ivm", "extdir": "pg_ivm-src"},
+{"extname": "pgjwt", "extdir": "pgjwt-src"}
 ]'
 EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
 TAG=${NEWTAG} docker compose --profile test-extensions up --quiet-pull --build -d
diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs
index b3f18dc6da2c..2fc95c47c616 100644
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -204,14 +204,16 @@ impl RemoteExtSpec {
 
         // Check if extension is present in public or custom.
         // If not, then it is not allowed to be used by this compute.
-        if let Some(public_extensions) = &self.public_extensions {
-            if !public_extensions.contains(&real_ext_name.to_string()) {
-                if let Some(custom_extensions) = &self.custom_extensions {
-                    if !custom_extensions.contains(&real_ext_name.to_string()) {
-                        return Err(anyhow::anyhow!("extension {} is not found", real_ext_name));
-                    }
-                }
-            }
+        if !self
+            .public_extensions
+            .as_ref()
+            .is_some_and(|exts| exts.iter().any(|e| e == ext_name))
+            && !self
+                .custom_extensions
+                .as_ref()
+                .is_some_and(|exts| exts.iter().any(|e| e == ext_name))
+        {
+            return Err(anyhow::anyhow!("extension {} is not found", real_ext_name));
         }
 
         match self.extension_data.get(real_ext_name) {
@@ -340,6 +342,96 @@ mod tests {
     use super::*;
     use std::fs::File;
 
+    #[test]
+    fn allow_installing_remote_extensions() {
+        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
+            "public_extensions": null,
+            "custom_extensions": null,
+            "library_index": {},
+            "extension_data": {},
+        }))
+        .unwrap();
+
+        rspec
+            .get_ext("ext", false, "latest", "v17")
+            .expect_err("Extension should not be found");
+
+        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
+            "public_extensions": [],
+            "custom_extensions": null,
+            "library_index": {},
+            "extension_data": {},
+        }))
+        .unwrap();
+
+        rspec
+            .get_ext("ext", false, "latest", "v17")
+            .expect_err("Extension should not be found");
+
+        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
+            "public_extensions": [],
+            "custom_extensions": [],
+            "library_index": {
+                "ext": "ext"
+            },
+            "extension_data": {
+                "ext": {
+                    "control_data": {
+                        "ext.control": ""
+                    },
+                    "archive_path": ""
+                }
+            },
+        }))
+        .unwrap();
+
+        rspec
+            .get_ext("ext", false, "latest", "v17")
+            .expect_err("Extension should not be found");
+
+        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
+            "public_extensions": [],
+            "custom_extensions": ["ext"],
+            "library_index": {
+                "ext": "ext"
+            },
+            "extension_data": {
+                "ext": {
+                    "control_data": {
+                        "ext.control": ""
+                    },
+                    "archive_path": ""
+                }
+            },
+        }))
+        .unwrap();
+
+        rspec
+            .get_ext("ext", false, "latest", "v17")
+            .expect("Extension should be found");
+
+        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
+            "public_extensions": ["ext"],
+            "custom_extensions": [],
+            "library_index": {
+                "ext": "ext"
+            },
+            "extension_data": {
+                "ext": {
+                    "control_data": {
+                        "ext.control": ""
+                    },
+                    "archive_path": ""
+                }
+            },
+        }))
+        .unwrap();
+
+        rspec
+            .get_ext("ext", false, "latest", "v17")
+            .expect("Extension should be found");
+    }
+
     #[test]
     fn parse_spec_file() {
         let file = File::open("tests/cluster_spec.json").unwrap();
diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs
index 422da0dc95c9..a0b5feea948e 100644
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -323,6 +323,10 @@ pub struct TenantConfigToml {
     // Expresed in multiples of checkpoint distance.
     pub image_layer_creation_check_threshold: u8,
 
+    // How many multiples of L0 `compaction_threshold` will preempt image layer creation and do L0 compaction.
+    // Set to 0 to disable preemption.
+    pub image_creation_preempt_threshold: usize,
+
     /// The length for an explicit LSN lease request.
     /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
     #[serde(with = "humantime_serde")]
@@ -547,6 +551,10 @@ pub mod tenant_conf_defaults {
     // Relevant: https://github.com/neondatabase/neon/issues/3394
     pub const DEFAULT_GC_PERIOD: &str = "1 hr";
     pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
+    // If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
+    // layer creation will end immediately. Set to 0 to disable. The target default will be 3 once we
+    // want to enable this feature.
+    pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 0;
     pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
     pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
     pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
@@ -605,6 +613,7 @@ impl Default for TenantConfigToml {
             lazy_slru_download: false,
             timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
             image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
+            image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,
             lsn_lease_length: LsnLease::DEFAULT_LENGTH,
             lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
             timeline_offloading: false,
diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs
index 43447c67bd0e..19beb37ab34c 100644
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -498,6 +498,8 @@ pub struct TenantConfigPatch {
     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     pub image_layer_creation_check_threshold: FieldPatch<u8>,
     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
+    pub image_creation_preempt_threshold: FieldPatch<usize>,
+    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     pub lsn_lease_length: FieldPatch<String>,
     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     pub lsn_lease_length_for_ts: FieldPatch<String>,
@@ -544,6 +546,7 @@ pub struct TenantConfig {
     pub lazy_slru_download: Option<bool>,
     pub timeline_get_throttle: Option<ThrottleConfig>,
     pub image_layer_creation_check_threshold: Option<u8>,
+    pub image_creation_preempt_threshold: Option<usize>,
     pub lsn_lease_length: Option<String>,
     pub lsn_lease_length_for_ts: Option<String>,
     pub timeline_offloading: Option<bool>,
@@ -581,6 +584,7 @@ impl TenantConfig {
             mut lazy_slru_download,
             mut timeline_get_throttle,
             mut image_layer_creation_check_threshold,
+            mut image_creation_preempt_threshold,
             mut lsn_lease_length,
             mut lsn_lease_length_for_ts,
             mut timeline_offloading,
@@ -635,6 +639,9 @@ impl TenantConfig {
         patch
             .image_layer_creation_check_threshold
             .apply(&mut image_layer_creation_check_threshold);
+        patch
+            .image_creation_preempt_threshold
+            .apply(&mut image_creation_preempt_threshold);
         patch.lsn_lease_length.apply(&mut lsn_lease_length);
         patch
             .lsn_lease_length_for_ts
@@ -679,6 +686,7 @@ impl TenantConfig {
             lazy_slru_download,
             timeline_get_throttle,
             image_layer_creation_check_threshold,
+            image_creation_preempt_threshold,
             lsn_lease_length,
             lsn_lease_length_for_ts,
             timeline_offloading,
diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs
index e205d60d747d..753f05b6fd1c 100644
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -5,6 +5,24 @@ use metrics::{IntCounter, IntCounterVec};
 use once_cell::sync::Lazy;
 use strum_macros::{EnumString, VariantNames};
 
+/// Logs a critical error, similarly to `tracing::error!`. This will:
+///
+/// * Emit an ERROR log message with prefix "CRITICAL:" and a backtrace.
+/// * Increment libmetrics_tracing_event_count{level="critical"}, and indirectly level="error".
+/// * Trigger a pageable alert (via the metric above).
+/// * In debug builds, panic the process.
+#[macro_export]
+macro_rules! critical {
+    ($($arg:tt)*) => {
+        if cfg!(debug_assertions) {
+            panic!($($arg)*);
+        }
+        $crate::logging::TRACING_EVENT_COUNT_METRIC.inc_critical();
+        let backtrace = std::backtrace::Backtrace::capture();
+        tracing::error!("CRITICAL: {}\n{backtrace}", format!($($arg)*));
+    };
+}
+
 #[derive(EnumString, strum_macros::Display, VariantNames, Eq, PartialEq, Debug, Clone, Copy)]
 #[strum(serialize_all = "snake_case")]
 pub enum LogFormat {
@@ -25,7 +43,10 @@ impl LogFormat {
     }
 }
 
-struct TracingEventCountMetric {
+pub struct TracingEventCountMetric {
+    /// CRITICAL is not a `tracing` log level. Instead, we increment it in the `critical!` macro,
+    /// and also emit it as a regular error. These are thus double-counted, but that seems fine.
+    critical: IntCounter,
     error: IntCounter,
     warn: IntCounter,
     info: IntCounter,
@@ -33,7 +54,7 @@ struct TracingEventCountMetric {
     trace: IntCounter,
 }
 
-static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(|| {
+pub static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(|| {
     let vec = metrics::register_int_counter_vec!(
         "libmetrics_tracing_event_count",
         "Number of tracing events, by level",
@@ -46,6 +67,7 @@ static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(||
 impl TracingEventCountMetric {
     fn new(vec: IntCounterVec) -> Self {
         Self {
+            critical: vec.with_label_values(&["critical"]),
             error: vec.with_label_values(&["error"]),
             warn: vec.with_label_values(&["warn"]),
             info: vec.with_label_values(&["info"]),
@@ -54,6 +76,11 @@ impl TracingEventCountMetric {
         }
     }
 
+    // Allow public access from `critical!` macro.
+    pub fn inc_critical(&self) {
+        self.critical.inc();
+    }
+
     fn inc_for_level(&self, level: tracing::Level) {
         let counter = match level {
             tracing::Level::ERROR => &self.error,
diff --git a/libs/vm_monitor/src/filecache.rs b/libs/vm_monitor/src/filecache.rs
index fe71e11197de..4f5bf1c1e327 100644
--- a/libs/vm_monitor/src/filecache.rs
+++ b/libs/vm_monitor/src/filecache.rs
@@ -177,8 +177,8 @@ impl FileCacheState {
         crate::spawn_with_cancel(
             token,
             |res| {
-                if let Err(error) = res {
-                    error!(%error, "postgres error")
+                if let Err(e) = res {
+                    error!(error = format_args!("{e:#}"), "postgres error");
                 }
             },
             conn,
@@ -205,7 +205,7 @@ impl FileCacheState {
         {
             Ok(rows) => Ok(rows),
             Err(e) => {
-                error!(error = ?e, "postgres error: {e} -> retrying");
+                error!(error = format_args!("{e:#}"), "postgres error -> retrying");
 
                 let client = FileCacheState::connect(&self.conn_str, self.token.clone())
                     .await
diff --git a/libs/vm_monitor/src/lib.rs b/libs/vm_monitor/src/lib.rs
index 1b13c8e0b23d..0cd97d4ca122 100644
--- a/libs/vm_monitor/src/lib.rs
+++ b/libs/vm_monitor/src/lib.rs
@@ -191,15 +191,12 @@ async fn start_monitor(
     .await;
     let mut monitor = match monitor {
         Ok(Ok(monitor)) => monitor,
-        Ok(Err(error)) => {
-            error!(?error, "failed to create monitor");
+        Ok(Err(e)) => {
+            error!(error = format_args!("{e:#}"), "failed to create monitor");
             return;
         }
         Err(_) => {
-            error!(
-                ?timeout,
-                "creating monitor timed out (probably waiting to receive protocol range)"
-            );
+            error!(?timeout, "creating monitor timed out");
             return;
         }
     };
@@ -207,6 +204,9 @@ async fn start_monitor(
 
     match monitor.run().await {
         Ok(()) => info!("monitor was killed due to new connection"),
-        Err(e) => error!(error = ?e, "monitor terminated unexpectedly"),
+        Err(e) => error!(
+            error = format_args!("{e:#}"),
+            "monitor terminated unexpectedly"
+        ),
     }
 }
diff --git a/libs/vm_monitor/src/runner.rs b/libs/vm_monitor/src/runner.rs
index 8605314ba9e5..8839f5803f81 100644
--- a/libs/vm_monitor/src/runner.rs
+++ b/libs/vm_monitor/src/runner.rs
@@ -370,12 +370,16 @@ impl Runner {
                 }),
             InboundMsgKind::InvalidMessage { error } => {
                 warn!(
-                    %error, id, "received notification of an invalid message we sent"
+                    error = format_args!("{error:#}"),
+                    id, "received notification of an invalid message we sent"
                 );
                 Ok(None)
             }
             InboundMsgKind::InternalError { error } => {
-                warn!(error, id, "agent experienced an internal error");
+                warn!(
+                    error = format_args!("{error:#}"),
+                    id, "agent experienced an internal error"
+                );
                 Ok(None)
             }
             InboundMsgKind::HealthCheck {} => {
@@ -476,7 +480,7 @@ impl Runner {
                                         // gives the outermost cause, and the debug impl
                                         // pretty-prints the error, whereas {:#} contains all the
                                         // causes, but is compact (no newlines).
-                                        warn!(error = format!("{e:#}"), "error handling message");
+                                        warn!(error = format_args!("{e:#}"), "error handling message");
                                         OutboundMsg::new(
                                             OutboundMsgKind::InternalError {
                                                 error: e.to_string(),
@@ -492,7 +496,7 @@ impl Runner {
                                     .context("failed to send message")?;
                             }
                             Err(e) => warn!(
-                                error = format!("{e}"),
+                                error = format_args!("{e:#}"),
                                 msg = ?msg,
                                 "received error message"
                             ),
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 0f3e9fdab601..94f7510a4abf 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1472,7 +1472,13 @@ async fn layer_download_handler(
     let downloaded = timeline
         .download_layer(&layer_name)
         .await
-        .map_err(ApiError::InternalServerError)?;
+        .map_err(|e| match e {
+            tenant::storage_layer::layer::DownloadError::TimelineShutdown
+            | tenant::storage_layer::layer::DownloadError::DownloadCancelled => {
+                ApiError::ShuttingDown
+            }
+            other => ApiError::InternalServerError(other.into()),
+        })?;
 
     match downloaded {
         Some(true) => json_response(StatusCode::OK, ()),
@@ -3169,12 +3175,16 @@ async fn put_tenant_timeline_import_basebackup(
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
 
-    let span = info_span!("import_basebackup", tenant_id=%tenant_id, timeline_id=%timeline_id, base_lsn=%base_lsn, end_lsn=%end_lsn, pg_version=%pg_version);
+    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
+
+    let span = info_span!("import_basebackup",
+        tenant_id=%tenant_id, timeline_id=%timeline_id, shard_id=%tenant_shard_id.shard_slug(),
+        base_lsn=%base_lsn, end_lsn=%end_lsn, pg_version=%pg_version);
     async move {
         let state = get_state(&request);
         let tenant = state
             .tenant_manager
-            .get_attached_tenant_shard(TenantShardId::unsharded(tenant_id))?;
+            .get_attached_tenant_shard(tenant_shard_id)?;
 
         let broker_client = state.broker_client.clone();
 
@@ -3383,7 +3393,17 @@ where
                             let status = response.status();
                             info!(%status, "Cancelled request finished successfully")
                         }
-                        Err(e) => error!("Cancelled request finished with an error: {e:?}"),
+                        Err(e) => match e {
+                            ApiError::ShuttingDown | ApiError::ResourceUnavailable(_) => {
+                                // Don't log this at error severity: they are normal during lifecycle of tenants/process
+                                info!("Cancelled request aborted for shutdown")
+                            }
+                            _ => {
+                                // Log these in a highly visible way, because we have no client to send the response to, but
+                                // would like to know that something went wrong.
+                                error!("Cancelled request finished with an error: {e:?}")
+                            }
+                        },
                     }
                 }
                 // only logging for cancelled panicked request handlers is the tracing_panic_hook,
diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs
index ff6af3566c82..f43cd08cf7ba 100644
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -263,14 +263,6 @@ pub(crate) const TENANT_HEATMAP_BASENAME: &str = "heatmap-v1.json";
 /// data directory at pageserver startup can be automatically removed.
 pub(crate) const TEMP_FILE_SUFFIX: &str = "___temp";
 
-/// A marker file to mark that a timeline directory was not fully initialized.
-/// If a timeline directory with this marker is encountered at pageserver startup,
-/// the timeline directory and the marker file are both removed.
-/// Full path: `tenants/<tenant_id>/timelines/<timeline_id>___uninit`.
-pub(crate) const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit";
-
-pub(crate) const TIMELINE_DELETE_MARK_SUFFIX: &str = "___delete";
-
 pub fn is_temporary(path: &Utf8Path) -> bool {
     match path.file_name() {
         Some(name) => name.ends_with(TEMP_FILE_SUFFIX),
@@ -278,25 +270,6 @@ pub fn is_temporary(path: &Utf8Path) -> bool {
     }
 }
 
-fn ends_with_suffix(path: &Utf8Path, suffix: &str) -> bool {
-    match path.file_name() {
-        Some(name) => name.ends_with(suffix),
-        None => false,
-    }
-}
-
-// FIXME: DO NOT ADD new query methods like this, which will have a next step of parsing timelineid
-// from the directory name. Instead create type "UninitMark(TimelineId)" and only parse it once
-// from the name.
-
-pub(crate) fn is_uninit_mark(path: &Utf8Path) -> bool {
-    ends_with_suffix(path, TIMELINE_UNINIT_MARK_SUFFIX)
-}
-
-pub(crate) fn is_delete_mark(path: &Utf8Path) -> bool {
-    ends_with_suffix(path, TIMELINE_DELETE_MARK_SUFFIX)
-}
-
 /// During pageserver startup, we need to order operations not to exhaust tokio worker threads by
 /// blocking.
 ///
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index 77c0967afc4b..6ab1178a7bd0 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -32,6 +32,7 @@ use utils::id::TimelineId;
 
 use crate::config::PageServerConf;
 use crate::context::{PageContentKind, RequestContext};
+use crate::pgdatadir_mapping::DatadirModificationStats;
 use crate::task_mgr::TaskKind;
 use crate::tenant::layer_map::LayerMap;
 use crate::tenant::mgr::TenantSlot;
@@ -116,11 +117,38 @@ pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
     .expect("failed to define a metric")
 });
 
-pub(crate) static VEC_READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
+/// Measures layers visited per read (i.e. read amplification).
+///
+/// NB: for a batch, we count all visited layers towards each read. While the cost of layer visits
+/// are amortized across the batch, and some layers may not intersect with a given key, each visited
+/// layer contributes directly to the observed latency for every read in the batch, which is what we
+/// care about.
+pub(crate) static LAYERS_PER_READ: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "pageserver_layers_per_read",
+        "Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.",
+        &["tenant_id", "shard_id", "timeline_id"],
+        // Low resolution to reduce cardinality.
+        vec![1.0, 5.0, 10.0, 25.0, 50.0, 100.0],
+    )
+    .expect("failed to define a metric")
+});
+
+pub(crate) static LAYERS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
     register_histogram!(
-        "pageserver_layers_visited_per_vectored_read_global",
-        "Average number of layers visited to reconstruct one key",
-        vec![1.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
+        "pageserver_layers_per_read_global",
+        "Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.",
+        vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
+    )
+    .expect("failed to define a metric")
+});
+
+pub(crate) static DELTAS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
+    // We expect this to be low because of Postgres checkpoints. Let's see if that holds.
+    register_histogram!(
+        "pageserver_deltas_per_read_global",
+        "Number of delta pages applied to image page per read",
+        vec![0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0],
     )
     .expect("failed to define a metric")
 });
@@ -2351,11 +2379,40 @@ pub(crate) struct WalIngestMetrics {
     pub(crate) records_observed: IntCounter,
     pub(crate) records_committed: IntCounter,
     pub(crate) records_filtered: IntCounter,
+    pub(crate) values_committed_metadata_images: IntCounter,
+    pub(crate) values_committed_metadata_deltas: IntCounter,
+    pub(crate) values_committed_data_images: IntCounter,
+    pub(crate) values_committed_data_deltas: IntCounter,
     pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
-    pub(crate) clear_vm_bits_unknown: IntCounterVec,
+}
+
+impl WalIngestMetrics {
+    pub(crate) fn inc_values_committed(&self, stats: &DatadirModificationStats) {
+        if stats.metadata_images > 0 {
+            self.values_committed_metadata_images
+                .inc_by(stats.metadata_images);
+        }
+        if stats.metadata_deltas > 0 {
+            self.values_committed_metadata_deltas
+                .inc_by(stats.metadata_deltas);
+        }
+        if stats.data_images > 0 {
+            self.values_committed_data_images.inc_by(stats.data_images);
+        }
+        if stats.data_deltas > 0 {
+            self.values_committed_data_deltas.inc_by(stats.data_deltas);
+        }
+    }
 }
 
 pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
+    let values_committed = register_int_counter_vec!(
+        "pageserver_wal_ingest_values_committed",
+        "Number of values committed to pageserver storage from WAL records",
+        &["class", "kind"],
+    )
+    .expect("failed to define a metric");
+
     WalIngestMetrics {
     bytes_received: register_int_counter!(
         "pageserver_wal_ingest_bytes_received",
@@ -2382,17 +2439,15 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
         "Number of WAL records filtered out due to sharding"
     )
     .expect("failed to define a metric"),
+    values_committed_metadata_images: values_committed.with_label_values(&["metadata", "image"]),
+    values_committed_metadata_deltas: values_committed.with_label_values(&["metadata", "delta"]),
+    values_committed_data_images: values_committed.with_label_values(&["data", "image"]),
+    values_committed_data_deltas: values_committed.with_label_values(&["data", "delta"]),
     gap_blocks_zeroed_on_rel_extend: register_int_counter!(
         "pageserver_gap_blocks_zeroed_on_rel_extend",
         "Total number of zero gap blocks written on relation extends"
     )
     .expect("failed to define a metric"),
-    clear_vm_bits_unknown: register_int_counter_vec!(
-        "pageserver_wal_ingest_clear_vm_bits_unknown",
-        "Number of ignored ClearVmBits operations due to unknown pages/relations",
-        &["entity"],
-    )
-    .expect("failed to define a metric"),
 }
 });
 
@@ -2632,6 +2687,7 @@ pub(crate) struct TimelineMetrics {
     pub disk_consistent_lsn_gauge: IntGauge,
     pub pitr_history_size: UIntGauge,
     pub archival_size: UIntGauge,
+    pub layers_per_read: Histogram,
     pub standby_horizon_gauge: IntGauge,
     pub resident_physical_size_gauge: UIntGauge,
     pub visible_physical_size_gauge: UIntGauge,
@@ -2729,6 +2785,10 @@ impl TimelineMetrics {
             .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
             .unwrap();
 
+        let layers_per_read = LAYERS_PER_READ
+            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
+            .unwrap();
+
         let standby_horizon_gauge = STANDBY_HORIZON
             .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
             .unwrap();
@@ -2793,6 +2853,7 @@ impl TimelineMetrics {
             disk_consistent_lsn_gauge,
             pitr_history_size,
             archival_size,
+            layers_per_read,
             standby_horizon_gauge,
             resident_physical_size_gauge,
             visible_physical_size_gauge,
@@ -2962,6 +3023,8 @@ impl TimelineMetrics {
             }
         }
 
+        let _ = LAYERS_PER_READ.remove_label_values(&[tenant_id, shard_id, timeline_id]);
+
         let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
         let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
         let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]);
@@ -3912,7 +3975,8 @@ pub fn preinitialize_metrics(conf: &'static PageServerConf) {
 
     // histograms
     [
-        &VEC_READ_NUM_LAYERS_VISITED,
+        &LAYERS_PER_READ_GLOBAL,
+        &DELTAS_PER_READ_GLOBAL,
         &WAIT_LSN_TIME,
         &WAL_REDO_TIME,
         &WAL_REDO_RECORDS_HISTOGRAM,
diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs
index 40c657524dff..dcbf62b56c6c 100644
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -48,7 +48,7 @@ use tracing::{debug, trace, warn};
 use utils::bin_ser::DeserializeError;
 use utils::pausable_failpoint;
 use utils::{bin_ser::BeSer, lsn::Lsn};
-use wal_decoder::serialized_batch::SerializedValueBatch;
+use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
 
 /// Max delta records appended to the AUX_FILES_KEY (for aux v1). The write path will write a full image once this threshold is reached.
 pub const MAX_AUX_FILE_DELTAS: usize = 1024;
@@ -1297,6 +1297,26 @@ impl DatadirModification<'_> {
             .is_some_and(|b| b.has_data())
     }
 
+    /// Returns statistics about the currently pending modifications.
+    pub(crate) fn stats(&self) -> DatadirModificationStats {
+        let mut stats = DatadirModificationStats::default();
+        for (_, _, value) in self.pending_metadata_pages.values().flatten() {
+            match value {
+                Value::Image(_) => stats.metadata_images += 1,
+                Value::WalRecord(r) if r.will_init() => stats.metadata_images += 1,
+                Value::WalRecord(_) => stats.metadata_deltas += 1,
+            }
+        }
+        for valuemeta in self.pending_data_batch.iter().flat_map(|b| &b.metadata) {
+            match valuemeta {
+                ValueMeta::Serialized(s) if s.will_init => stats.data_images += 1,
+                ValueMeta::Serialized(_) => stats.data_deltas += 1,
+                ValueMeta::Observed(_) => {}
+            }
+        }
+        stats
+    }
+
     /// Set the current lsn
     pub(crate) fn set_lsn(&mut self, lsn: Lsn) -> anyhow::Result<()> {
         ensure!(
@@ -2317,6 +2337,15 @@ impl DatadirModification<'_> {
     }
 }
 
+/// Statistics for a DatadirModification.
+#[derive(Default)]
+pub struct DatadirModificationStats {
+    pub metadata_images: u64,
+    pub metadata_deltas: u64,
+    pub data_images: u64,
+    pub data_deltas: u64,
+}
+
 /// This struct facilitates accessing either a committed key from the timeline at a
 /// specific LSN, or the latest uncommitted key from a pending modification.
 ///
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 085f76c05d98..c1b408ed72a4 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -46,6 +46,7 @@ use std::sync::atomic::AtomicBool;
 use std::sync::Weak;
 use std::time::SystemTime;
 use storage_broker::BrokerClientChannel;
+use timeline::compaction::CompactionOutcome;
 use timeline::compaction::GcCompactionQueue;
 use timeline::import_pgdata;
 use timeline::offload::offload_timeline;
@@ -95,7 +96,6 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::deletion_queue::DeletionQueueClient;
 use crate::deletion_queue::DeletionQueueError;
 use crate::import_datadir;
-use crate::is_uninit_mark;
 use crate::l0_flush::L0FlushGlobalState;
 use crate::metrics::CONCURRENT_INITDBS;
 use crate::metrics::INITDB_RUN_TIME;
@@ -1793,11 +1793,7 @@ impl Tenant {
             let entry = entry.context("read timeline dir entry")?;
             let entry_path = entry.path();
 
-            let purge = if crate::is_temporary(entry_path)
-                // TODO: remove uninit mark code (https://github.com/neondatabase/neon/issues/5718)
-                || is_uninit_mark(entry_path)
-                || crate::is_delete_mark(entry_path)
-            {
+            let purge = if crate::is_temporary(entry_path) {
                 true
             } else {
                 match TimelineId::try_from(entry_path.file_name()) {
@@ -2426,7 +2422,7 @@ impl Tenant {
         // Make sure the freeze_and_flush reaches remote storage.
         tline.remote_client.wait_completion().await.unwrap();
 
-        let tl = uninit_tl.finish_creation()?;
+        let tl = uninit_tl.finish_creation().await?;
         // The non-test code would call tl.activate() here.
         tl.set_state(TimelineState::Active);
         Ok(tl)
@@ -2912,10 +2908,10 @@ impl Tenant {
         self: &Arc<Self>,
         cancel: &CancellationToken,
         ctx: &RequestContext,
-    ) -> Result<bool, timeline::CompactionError> {
+    ) -> Result<CompactionOutcome, timeline::CompactionError> {
         // Don't start doing work during shutdown, or when broken, we do not need those in the logs
         if !self.is_active() {
-            return Ok(false);
+            return Ok(CompactionOutcome::Done);
         }
 
         {
@@ -2929,7 +2925,7 @@ impl Tenant {
             // to AttachedSingle state.
             if !conf.location.may_upload_layers_hint() {
                 info!("Skipping compaction in location state {:?}", conf.location);
-                return Ok(false);
+                return Ok(CompactionOutcome::Done);
             }
         }
 
@@ -2972,7 +2968,7 @@ impl Tenant {
         // Before doing any I/O work, check our circuit breaker
         if self.compaction_circuit_breaker.lock().unwrap().is_broken() {
             info!("Skipping compaction due to previous failures");
-            return Ok(false);
+            return Ok(CompactionOutcome::Done);
         }
 
         let mut has_pending_task = false;
@@ -2980,10 +2976,10 @@ impl Tenant {
         for (timeline_id, timeline, (can_compact, can_offload)) in &timelines_to_compact_or_offload
         {
             // pending_task_left == None: cannot compact, maybe still pending tasks
-            // pending_task_left == Some(true): compaction task left
-            // pending_task_left == Some(false): no compaction task left
+            // pending_task_left == Some(Pending): compaction task left
+            // pending_task_left == Some(Done): no compaction task left
             let pending_task_left = if *can_compact {
-                let has_pending_l0_compaction_task = timeline
+                let compaction_outcome = timeline
                     .compact(cancel, EnumSet::empty(), ctx)
                     .instrument(info_span!("compact_timeline", %timeline_id))
                     .await
@@ -3001,27 +2997,27 @@ impl Tenant {
                                 .fail(&CIRCUIT_BREAKERS_BROKEN, e);
                         }
                     })?;
-                if has_pending_l0_compaction_task {
-                    Some(true)
+                if let CompactionOutcome::Pending = compaction_outcome {
+                    Some(CompactionOutcome::Pending)
                 } else {
                     let queue = {
                         let guard = self.scheduled_compaction_tasks.lock().unwrap();
                         guard.get(timeline_id).cloned()
                     };
                     if let Some(queue) = queue {
-                        let has_pending_tasks = queue
+                        let outcome = queue
                             .iteration(cancel, ctx, &self.gc_block, timeline)
                             .await?;
-                        Some(has_pending_tasks)
+                        Some(outcome)
                     } else {
-                        Some(false)
+                        Some(CompactionOutcome::Done)
                     }
                 }
             } else {
                 None
             };
-            has_pending_task |= pending_task_left.unwrap_or(false);
-            if pending_task_left == Some(false) && *can_offload {
+            has_pending_task |= pending_task_left == Some(CompactionOutcome::Pending);
+            if pending_task_left == Some(CompactionOutcome::Done) && *can_offload {
                 pausable_failpoint!("before-timeline-auto-offload");
                 match offload_timeline(self, timeline)
                     .instrument(info_span!("offload_timeline", %timeline_id))
@@ -3041,7 +3037,11 @@ impl Tenant {
             .unwrap()
             .success(&CIRCUIT_BREAKERS_UNBROKEN);
 
-        Ok(has_pending_task)
+        Ok(if has_pending_task {
+            CompactionOutcome::Pending
+        } else {
+            CompactionOutcome::Done
+        })
     }
 
     /// Cancel scheduled compaction tasks
@@ -4702,7 +4702,7 @@ impl Tenant {
             )
             .await?;
 
-        let new_timeline = uninitialized_timeline.finish_creation()?;
+        let new_timeline = uninitialized_timeline.finish_creation().await?;
 
         // Root timeline gets its layers during creation and uploads them along with the metadata.
         // A branch timeline though, when created, can get no writes for some time, hence won't get any layers created.
@@ -4892,10 +4892,11 @@ impl Tenant {
         }
 
         // this new directory is very temporary, set to remove it immediately after bootstrap, we don't need it
+        let pgdata_path_deferred = pgdata_path.clone();
         scopeguard::defer! {
-            if let Err(e) = fs::remove_dir_all(&pgdata_path) {
+            if let Err(e) = fs::remove_dir_all(&pgdata_path_deferred) {
                 // this is unlikely, but we will remove the directory on pageserver restart or another bootstrap call
-                error!("Failed to remove temporary initdb directory '{pgdata_path}': {e}");
+                error!("Failed to remove temporary initdb directory '{pgdata_path_deferred}': {e}");
             }
         }
         if let Some(existing_initdb_timeline_id) = load_existing_initdb {
@@ -4962,7 +4963,7 @@ impl Tenant {
             pgdata_lsn,
             pg_version,
         );
-        let raw_timeline = self
+        let mut raw_timeline = self
             .prepare_new_timeline(
                 timeline_id,
                 &new_metadata,
@@ -4973,42 +4974,33 @@ impl Tenant {
             .await?;
 
         let tenant_shard_id = raw_timeline.owning_tenant.tenant_shard_id;
-        let unfinished_timeline = raw_timeline.raw_timeline()?;
-
-        // Flush the new layer files to disk, before we make the timeline as available to
-        // the outside world.
-        //
-        // Flush loop needs to be spawned in order to be able to flush.
-        unfinished_timeline.maybe_spawn_flush_loop();
-
-        import_datadir::import_timeline_from_postgres_datadir(
-            unfinished_timeline,
-            &pgdata_path,
-            pgdata_lsn,
-            ctx,
-        )
-        .await
-        .with_context(|| {
-            format!("Failed to import pgdatadir for timeline {tenant_shard_id}/{timeline_id}")
-        })?;
+        raw_timeline
+            .write(|unfinished_timeline| async move {
+                import_datadir::import_timeline_from_postgres_datadir(
+                    &unfinished_timeline,
+                    &pgdata_path,
+                    pgdata_lsn,
+                    ctx,
+                )
+                .await
+                .with_context(|| {
+                    format!(
+                        "Failed to import pgdatadir for timeline {tenant_shard_id}/{timeline_id}"
+                    )
+                })?;
 
-        fail::fail_point!("before-checkpoint-new-timeline", |_| {
-            Err(CreateTimelineError::Other(anyhow::anyhow!(
-                "failpoint before-checkpoint-new-timeline"
-            )))
-        });
+                fail::fail_point!("before-checkpoint-new-timeline", |_| {
+                    Err(CreateTimelineError::Other(anyhow::anyhow!(
+                        "failpoint before-checkpoint-new-timeline"
+                    )))
+                });
 
-        unfinished_timeline
-            .freeze_and_flush()
-            .await
-            .with_context(|| {
-                format!(
-                    "Failed to flush after pgdatadir import for timeline {tenant_shard_id}/{timeline_id}"
-                )
-            })?;
+                Ok(())
+            })
+            .await?;
 
         // All done!
-        let timeline = raw_timeline.finish_creation()?;
+        let timeline = raw_timeline.finish_creation().await?;
 
         // Callers are responsible to wait for uploads to complete and for activating the timeline.
 
@@ -5499,6 +5491,9 @@ pub(crate) mod harness {
                 image_layer_creation_check_threshold: Some(
                     tenant_conf.image_layer_creation_check_threshold,
                 ),
+                image_creation_preempt_threshold: Some(
+                    tenant_conf.image_creation_preempt_threshold,
+                ),
                 lsn_lease_length: Some(tenant_conf.lsn_lease_length),
                 lsn_lease_length_for_ts: Some(tenant_conf.lsn_lease_length_for_ts),
                 timeline_offloading: Some(tenant_conf.timeline_offloading),
diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs
index 139ed27bd200..972837dc442c 100644
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -357,6 +357,9 @@ pub struct TenantConfOpt {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub image_layer_creation_check_threshold: Option<u8>,
 
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub image_creation_preempt_threshold: Option<usize>,
+
     #[serde(skip_serializing_if = "Option::is_none")]
     #[serde(with = "humantime_serde")]
     #[serde(default)]
@@ -453,6 +456,9 @@ impl TenantConfOpt {
             image_layer_creation_check_threshold: self
                 .image_layer_creation_check_threshold
                 .unwrap_or(global_conf.image_layer_creation_check_threshold),
+            image_creation_preempt_threshold: self
+                .image_creation_preempt_threshold
+                .unwrap_or(global_conf.image_creation_preempt_threshold),
             lsn_lease_length: self
                 .lsn_lease_length
                 .unwrap_or(global_conf.lsn_lease_length),
@@ -504,6 +510,7 @@ impl TenantConfOpt {
             mut lazy_slru_download,
             mut timeline_get_throttle,
             mut image_layer_creation_check_threshold,
+            mut image_creation_preempt_threshold,
             mut lsn_lease_length,
             mut lsn_lease_length_for_ts,
             mut timeline_offloading,
@@ -578,6 +585,9 @@ impl TenantConfOpt {
         patch
             .image_layer_creation_check_threshold
             .apply(&mut image_layer_creation_check_threshold);
+        patch
+            .image_creation_preempt_threshold
+            .apply(&mut image_creation_preempt_threshold);
         patch
             .lsn_lease_length
             .map(|v| humantime::parse_duration(&v))?
@@ -626,6 +636,7 @@ impl TenantConfOpt {
             lazy_slru_download,
             timeline_get_throttle,
             image_layer_creation_check_threshold,
+            image_creation_preempt_threshold,
             lsn_lease_length,
             lsn_lease_length_for_ts,
             timeline_offloading,
@@ -689,6 +700,7 @@ impl From<TenantConfOpt> for models::TenantConfig {
             lazy_slru_download: value.lazy_slru_download,
             timeline_get_throttle: value.timeline_get_throttle,
             image_layer_creation_check_threshold: value.image_layer_creation_check_threshold,
+            image_creation_preempt_threshold: value.image_creation_preempt_threshold,
             lsn_lease_length: value.lsn_lease_length.map(humantime),
             lsn_lease_length_for_ts: value.lsn_lease_length_for_ts.map(humantime),
             timeline_offloading: value.timeline_offloading,
diff --git a/pageserver/src/tenant/secondary/downloader.rs b/pageserver/src/tenant/secondary/downloader.rs
index cf524fcb25e2..2e8c3946bd69 100644
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -673,12 +673,30 @@ impl<'a> TenantDownloader<'a> {
             HeatMapDownload::Modified(m) => m,
         };
 
-        let heatmap = serde_json::from_slice::<HeatMapTenant>(&heatmap_bytes)?;
-
-        // Save the heatmap: this will be useful on restart, allowing us to reconstruct
-        // layer metadata without having to re-download it.
+        // Heatmap storage location
         let heatmap_path = self.conf.tenant_heatmap_path(tenant_shard_id);
 
+        let last_heatmap = if last_download.is_none() {
+            match load_heatmap(&heatmap_path, ctx).await {
+                Ok(htm) => htm,
+                Err(e) => {
+                    tracing::warn!("Couldn't load heatmap from {heatmap_path}: {e:?}");
+                    None
+                }
+            }
+        } else {
+            None
+        };
+
+        let last_heatmap_timelines = last_heatmap.as_ref().map(|htm| {
+            htm.timelines
+                .iter()
+                .map(|tl| (tl.timeline_id, tl))
+                .collect::<HashMap<_, _>>()
+        });
+
+        let heatmap = serde_json::from_slice::<HeatMapTenant>(&heatmap_bytes)?;
+
         let temp_path = path_with_suffix_extension(&heatmap_path, TEMP_FILE_SUFFIX);
         let context_msg = format!("write tenant {tenant_shard_id} heatmap to {heatmap_path}");
         let heatmap_path_bg = heatmap_path.clone();
@@ -707,10 +725,17 @@ impl<'a> TenantDownloader<'a> {
             let timeline_state = match timeline_state {
                 Some(t) => t,
                 None => {
+                    let last_heatmap =
+                        last_heatmap_timelines
+                            .as_ref()
+                            .and_then(|last_heatmap_timelines| {
+                                last_heatmap_timelines.get(&timeline.timeline_id).copied()
+                            });
                     // We have no existing state: need to scan local disk for layers first.
                     let timeline_state = init_timeline_state(
                         self.conf,
                         tenant_shard_id,
+                        last_heatmap,
                         timeline,
                         &self.secondary_state.resident_size_metric,
                     )
@@ -1079,12 +1104,12 @@ impl<'a> TenantDownloader<'a> {
                 }
             }
 
-            if on_disk.metadata.generation_file_size() != on_disk.metadata.generation_file_size() {
+            if on_disk.metadata.generation_file_size() != layer.metadata.generation_file_size() {
                 tracing::info!(
                     "Re-downloading layer {} with changed size or generation: {:?}->{:?}",
                     layer.name,
                     on_disk.metadata.generation_file_size(),
-                    on_disk.metadata.generation_file_size()
+                    layer.metadata.generation_file_size()
                 );
                 return LayerAction::Download;
             }
@@ -1277,6 +1302,7 @@ impl<'a> TenantDownloader<'a> {
 async fn init_timeline_state(
     conf: &'static PageServerConf,
     tenant_shard_id: &TenantShardId,
+    last_heatmap: Option<&HeatMapTimeline>,
     heatmap: &HeatMapTimeline,
     resident_metric: &UIntGauge,
 ) -> SecondaryDetailTimeline {
@@ -1306,6 +1332,13 @@ async fn init_timeline_state(
     let heatmap_metadata: HashMap<&LayerName, &HeatMapLayer> =
         heatmap.layers.iter().map(|l| (&l.name, l)).collect();
 
+    let last_heatmap_metadata: HashMap<&LayerName, &HeatMapLayer> =
+        if let Some(last_heatmap) = last_heatmap {
+            last_heatmap.layers.iter().map(|l| (&l.name, l)).collect()
+        } else {
+            HashMap::new()
+        };
+
     while let Some(dentry) = dir
         .next_entry()
         .await
@@ -1339,18 +1372,32 @@ async fn init_timeline_state(
         match LayerName::from_str(file_name) {
             Ok(name) => {
                 let remote_meta = heatmap_metadata.get(&name);
+                let last_meta = last_heatmap_metadata.get(&name);
+                let mut remove = false;
                 match remote_meta {
                     Some(remote_meta) => {
+                        let last_meta_generation_file_size = last_meta
+                            .map(|m| m.metadata.generation_file_size())
+                            .unwrap_or(remote_meta.metadata.generation_file_size());
                         // TODO: checksums for layers (https://github.com/neondatabase/neon/issues/2784)
-                        if local_meta.len() != remote_meta.metadata.file_size {
-                            // This should not happen, because we do crashsafe write-then-rename when downloading
-                            // layers, and layers in remote storage are immutable.  Remove the local file because
-                            // we cannot trust it.
-                            tracing::warn!(
+                        if remote_meta.metadata.generation_file_size()
+                            != last_meta_generation_file_size
+                        {
+                            tracing::info!(
+                                "Removing local layer {name} as on-disk json metadata has different generation or file size from remote: {:?} -> {:?}",
+                                last_meta_generation_file_size,
+                                remote_meta.metadata.generation_file_size()
+                            );
+                            remove = true;
+                        } else if local_meta.len() != remote_meta.metadata.file_size {
+                            // This can happen in the presence of race conditions: the remote and on-disk metadata have changed, but we haven't had
+                            // the chance yet to download the new layer to disk, before the process restarted.
+                            tracing::info!(
                                 "Removing local layer {name} with unexpected local size {} != {}",
                                 local_meta.len(),
                                 remote_meta.metadata.file_size
                             );
+                            remove = true;
                         } else {
                             // We expect the access time to be initialized immediately afterwards, when
                             // the latest heatmap is applied to the state.
@@ -1372,15 +1419,18 @@ async fn init_timeline_state(
                             "Removing secondary local layer {} because it's absent in heatmap",
                             name
                         );
-                        tokio::fs::remove_file(&dentry.path())
-                            .await
-                            .or_else(fs_ext::ignore_not_found)
-                            .fatal_err(&format!(
-                                "Removing layer {}",
-                                dentry.path().to_string_lossy()
-                            ));
+                        remove = true;
                     }
                 }
+                if remove {
+                    tokio::fs::remove_file(&dentry.path())
+                        .await
+                        .or_else(fs_ext::ignore_not_found)
+                        .fatal_err(&format!(
+                            "Removing layer {}",
+                            dentry.path().to_string_lossy()
+                        ));
+                }
             }
             Err(_) => {
                 // Ignore it.
@@ -1391,3 +1441,18 @@ async fn init_timeline_state(
 
     detail
 }
+
+/// Loads a json-encoded heatmap file from the provided on-disk path
+async fn load_heatmap(
+    path: &Utf8PathBuf,
+    ctx: &RequestContext,
+) -> Result<Option<HeatMapTenant>, anyhow::Error> {
+    let mut file = match VirtualFile::open(path, ctx).await {
+        Ok(file) => file,
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
+        Err(e) => Err(e)?,
+    };
+    let st = file.read_to_string(ctx).await?;
+    let htm = serde_json::from_str(&st)?;
+    Ok(Some(htm))
+}
diff --git a/pageserver/src/tenant/secondary/heatmap_uploader.rs b/pageserver/src/tenant/secondary/heatmap_uploader.rs
index c5e5e0494571..d72c33736954 100644
--- a/pageserver/src/tenant/secondary/heatmap_uploader.rs
+++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs
@@ -9,13 +9,14 @@ use crate::{
     metrics::SECONDARY_MODE,
     tenant::{
         config::AttachmentMode,
-        mgr::GetTenantError,
-        mgr::TenantManager,
+        mgr::{GetTenantError, TenantManager},
         remote_timeline_client::remote_heatmap_path,
         span::debug_assert_current_span_has_tenant_id,
         tasks::{warn_when_period_overrun, BackgroundLoopKind},
         Tenant,
     },
+    virtual_file::VirtualFile,
+    TEMP_FILE_SUFFIX,
 };
 
 use futures::Future;
@@ -32,7 +33,10 @@ use super::{
 };
 use tokio_util::sync::CancellationToken;
 use tracing::{info_span, instrument, Instrument};
-use utils::{backoff, completion::Barrier, yielding_loop::yielding_loop};
+use utils::{
+    backoff, completion::Barrier, crashsafe::path_with_suffix_extension,
+    yielding_loop::yielding_loop,
+};
 
 pub(super) async fn heatmap_uploader_task(
     tenant_manager: Arc<TenantManager>,
@@ -461,6 +465,18 @@ async fn upload_tenant_heatmap(
         }
     }
 
+    // After a successful upload persist the fresh heatmap to disk.
+    // When restarting, the tenant will read the heatmap from disk
+    // and additively generate a new heatmap (see [`Timeline::generate_heatmap`]).
+    // If the heatmap is stale, the additive generation can lead to keeping previously
+    // evicted timelines on the secondarie's disk.
+    let tenant_shard_id = tenant.get_tenant_shard_id();
+    let heatmap_path = tenant.conf.tenant_heatmap_path(tenant_shard_id);
+    let temp_path = path_with_suffix_extension(&heatmap_path, TEMP_FILE_SUFFIX);
+    if let Err(err) = VirtualFile::crashsafe_overwrite(heatmap_path, temp_path, bytes).await {
+        tracing::warn!("Non fatal IO error writing to disk after heatmap upload: {err}");
+    }
+
     tracing::info!("Successfully uploaded {size} byte heatmap to {path}");
 
     Ok(UploadHeatmapOutcome::Uploaded(LastUploadState {
diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs
index c1fe67c87c5d..3800852ccc3e 100644
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -80,6 +80,16 @@ pub(crate) struct ValueReconstructState {
     pub(crate) img: Option<(Lsn, Bytes)>,
 }
 
+impl ValueReconstructState {
+    /// Returns the number of page deltas applied to the page image.
+    pub fn num_deltas(&self) -> usize {
+        match self.img {
+            Some(_) => self.records.len(),
+            None => self.records.len() - 1, // omit will_init record
+        }
+    }
+}
+
 #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
 pub(crate) enum ValueReconstructSituation {
     Complete,
diff --git a/pageserver/src/tenant/storage_layer/batch_split_writer.rs b/pageserver/src/tenant/storage_layer/batch_split_writer.rs
index 22d8b81bccfd..7da51c27df99 100644
--- a/pageserver/src/tenant/storage_layer/batch_split_writer.rs
+++ b/pageserver/src/tenant/storage_layer/batch_split_writer.rs
@@ -166,6 +166,10 @@ impl BatchLayerWriter {
         // END: catch every error and do the recovery in the above section
         Ok(generated_layers)
     }
+
+    pub fn pending_layer_num(&self) -> usize {
+        self.generated_layer_writers.len()
+    }
 }
 
 /// An image writer that takes images and produces multiple image layers.
diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs
index 99e0ff1aa5df..92313afba7b7 100644
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -340,7 +340,7 @@ impl Layer {
     /// Download the layer if evicted.
     ///
     /// Will not error when the layer is already downloaded.
-    pub(crate) async fn download(&self) -> anyhow::Result<()> {
+    pub(crate) async fn download(&self) -> Result<(), DownloadError> {
         self.0.get_or_maybe_download(true, None).await?;
         Ok(())
     }
diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs
index 3725e2f7fcbc..d65f0991820d 100644
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -11,6 +11,7 @@ use crate::metrics::TENANT_TASK_EVENTS;
 use crate::task_mgr;
 use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
 use crate::tenant::throttle::Stats;
+use crate::tenant::timeline::compaction::CompactionOutcome;
 use crate::tenant::timeline::CompactionError;
 use crate::tenant::{Tenant, TenantState};
 use rand::Rng;
@@ -206,11 +207,11 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
                     .run(tenant.compaction_iteration(&cancel, &ctx))
                     .await;
                 match output {
-                    Ok(has_pending_task) => {
+                    Ok(outcome) => {
                         error_run_count = 0;
                         // schedule the next compaction immediately in case there is a pending compaction task
-                        sleep_duration = if has_pending_task {
-                            Duration::ZERO
+                        sleep_duration = if let CompactionOutcome::Pending = outcome {
+                            Duration::from_secs(1)
                         } else {
                             period
                         };
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index b4b30fcd23e2..b6a349a20913 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -18,6 +18,7 @@ use arc_swap::{ArcSwap, ArcSwapOption};
 use bytes::Bytes;
 use camino::Utf8Path;
 use chrono::{DateTime, Utc};
+use compaction::CompactionOutcome;
 use enumset::EnumSet;
 use fail::fail_point;
 use futures::{stream::FuturesUnordered, StreamExt};
@@ -51,6 +52,7 @@ use tokio::{
 };
 use tokio_util::sync::CancellationToken;
 use tracing::*;
+use utils::rate_limit::RateLimit;
 use utils::{
     fs_ext,
     guard_arc_swap::GuardArcSwap,
@@ -115,7 +117,7 @@ use pageserver_api::config::tenant_conf_defaults::DEFAULT_PITR_INTERVAL;
 
 use crate::config::PageServerConf;
 use crate::keyspace::{KeyPartitioning, KeySpace};
-use crate::metrics::TimelineMetrics;
+use crate::metrics::{TimelineMetrics, DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL};
 use crate::pgdatadir_mapping::CalculateLogicalSizeError;
 use crate::tenant::config::TenantConfOpt;
 use pageserver_api::reltag::RelTag;
@@ -188,6 +190,19 @@ pub enum ImageLayerCreationMode {
     Initial,
 }
 
+#[derive(Clone, Debug, Default)]
+pub enum LastImageLayerCreationStatus {
+    Incomplete {
+        /// The last key of the partition (exclusive) that was processed in the last
+        /// image layer creation attempt. We will continue from this key in the next
+        /// attempt.
+        last_key: Key,
+    },
+    Complete,
+    #[default]
+    Initial,
+}
+
 impl std::fmt::Display for ImageLayerCreationMode {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         write!(f, "{:?}", self)
@@ -340,10 +355,14 @@ pub struct Timeline {
     // Needed to ensure that we can't create a branch at a point that was already garbage collected
     pub latest_gc_cutoff_lsn: Rcu<Lsn>,
 
+    pub(crate) gc_compaction_layer_update_lock: tokio::sync::RwLock<()>,
+
     // List of child timelines and their branch points. This is needed to avoid
     // garbage collecting data that is still needed by the child timelines.
     pub(crate) gc_info: std::sync::RwLock<GcInfo>,
 
+    pub(crate) last_image_layer_creation_status: ArcSwap<LastImageLayerCreationStatus>,
+
     // It may change across major versions so for simplicity
     // keep it after running initdb for a timeline.
     // It is needed in checks when we want to error on some operations
@@ -933,9 +952,16 @@ pub(crate) enum ShutdownMode {
     Hard,
 }
 
-struct ImageLayerCreationOutcome {
-    unfinished_image_layer: Option<ImageLayerWriter>,
-    next_start_key: Key,
+enum ImageLayerCreationOutcome {
+    /// We generated an image layer
+    Generated {
+        unfinished_image_layer: ImageLayerWriter,
+    },
+    /// The key range is empty
+    Empty,
+    /// (Only used in metadata image layer creation), after reading the metadata keys, we decide to skip
+    /// the image layer creation.
+    Skip,
 }
 
 /// Public interface functions
@@ -1044,7 +1070,7 @@ impl Timeline {
     }
 
     pub(crate) const MAX_GET_VECTORED_KEYS: u64 = 32;
-    pub(crate) const VEC_GET_LAYERS_VISITED_WARN_THRESH: f64 = 512.0;
+    pub(crate) const LAYERS_VISITED_WARN_THRESHOLD: u32 = 100;
 
     /// Look up multiple page versions at a given LSN
     ///
@@ -1194,6 +1220,7 @@ impl Timeline {
                             return (key, Err(err));
                         }
                     };
+                    DELTAS_PER_READ_GLOBAL.observe(converted.num_deltas() as f64);
 
                     // The walredo module expects the records to be descending in terms of Lsn.
                     // And we submit the IOs in that order, so, there shuold be no need to sort here.
@@ -1221,25 +1248,28 @@ impl Timeline {
         // (this is a requirement, not a bug). Skip updating the metric in these cases
         // to avoid infinite results.
         if !results.is_empty() {
-            let avg = layers_visited as f64 / results.len() as f64;
-            if avg >= Self::VEC_GET_LAYERS_VISITED_WARN_THRESH {
-                use utils::rate_limit::RateLimit;
-                static LOGGED: Lazy<Mutex<RateLimit>> =
+            // Record the total number of layers visited towards each key in the batch. While some
+            // layers may not intersect with a given read, and the cost of layer visits are
+            // amortized across the batch, each visited layer contributes directly to the observed
+            // latency for every read in the batch, which is what we care about.
+            if layers_visited >= Self::LAYERS_VISITED_WARN_THRESHOLD {
+                static LOG_PACER: Lazy<Mutex<RateLimit>> =
                     Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(60))));
-                let mut rate_limit = LOGGED.lock().unwrap();
-                rate_limit.call(|| {
+                LOG_PACER.lock().unwrap().call(|| {
+                    let num_keys = keyspace.total_raw_size();
+                    let num_pages = results.len();
                     tracing::info!(
                       shard_id = %self.tenant_shard_id.shard_slug(),
                       lsn = %lsn,
-                      "Vectored read for {} visited {} layers on average per key and {} in total. {}/{} pages were returned",
-                      keyspace, avg, layers_visited, results.len(), keyspace.total_raw_size());
+                      "Vectored read for {keyspace} visited {layers_visited} layers. Returned {num_pages}/{num_keys} pages.",
+                    );
                 });
             }
 
-            // Note that this is an approximation. Tracking the exact number of layers visited
-            // per key requires virtually unbounded memory usage and is inefficient
-            // (i.e. segment tree tracking each range queried from a layer)
-            crate::metrics::VEC_READ_NUM_LAYERS_VISITED.observe(avg);
+            for _ in &results {
+                self.metrics.layers_per_read.observe(layers_visited as f64);
+                LAYERS_PER_READ_GLOBAL.observe(layers_visited as f64);
+            }
         }
 
         Ok(results)
@@ -1655,7 +1685,7 @@ impl Timeline {
         cancel: &CancellationToken,
         flags: EnumSet<CompactFlags>,
         ctx: &RequestContext,
-    ) -> Result<bool, CompactionError> {
+    ) -> Result<CompactionOutcome, CompactionError> {
         self.compact_with_options(
             cancel,
             CompactOptions {
@@ -1677,7 +1707,7 @@ impl Timeline {
         cancel: &CancellationToken,
         options: CompactOptions,
         ctx: &RequestContext,
-    ) -> Result<bool, CompactionError> {
+    ) -> Result<CompactionOutcome, CompactionError> {
         // most likely the cancellation token is from background task, but in tests it could be the
         // request task as well.
 
@@ -1697,8 +1727,8 @@ impl Timeline {
         // compaction task goes over it's period (20s) which is quite often in production.
         let (_guard, _permit) = tokio::select! {
             tuple = prepare => { tuple },
-            _ = self.cancel.cancelled() => return Ok(false),
-            _ = cancel.cancelled() => return Ok(false),
+            _ = self.cancel.cancelled() => return Ok(CompactionOutcome::Done),
+            _ = cancel.cancelled() => return Ok(CompactionOutcome::Done),
         };
 
         let last_record_lsn = self.get_last_record_lsn();
@@ -1706,13 +1736,13 @@ impl Timeline {
         // Last record Lsn could be zero in case the timeline was just created
         if !last_record_lsn.is_valid() {
             warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
-            return Ok(false);
+            return Ok(CompactionOutcome::Done);
         }
 
         let result = match self.get_compaction_algorithm_settings().kind {
             CompactionAlgorithm::Tiered => {
                 self.compact_tiered(cancel, ctx).await?;
-                Ok(false)
+                Ok(CompactionOutcome::Done)
             }
             CompactionAlgorithm::Legacy => self.compact_legacy(cancel, options, ctx).await,
         };
@@ -1811,7 +1841,7 @@ impl Timeline {
         self.last_record_lsn.shutdown();
 
         if let ShutdownMode::FreezeAndFlush = mode {
-            if let Some((open, frozen)) = self
+            let do_flush = if let Some((open, frozen)) = self
                 .layers
                 .read()
                 .await
@@ -1820,43 +1850,54 @@ impl Timeline {
                 .ok()
                 .filter(|(open, frozen)| *open || *frozen > 0)
             {
-                tracing::info!(?open, frozen, "flushing and freezing on shutdown");
+                if self.remote_client.is_archived() == Some(true) {
+                    // No point flushing on shutdown for an archived timeline: it is not important
+                    // to have it nice and fresh after our restart, and trying to flush here might
+                    // race with trying to offload it (which also stops the flush loop)
+                    false
+                } else {
+                    tracing::info!(?open, frozen, "flushing and freezing on shutdown");
+                    true
+                }
             } else {
-                // this is double-shutdown, ignore it
-            }
+                // this is double-shutdown, it'll be a no-op
+                true
+            };
 
             // we shut down walreceiver above, so, we won't add anything more
             // to the InMemoryLayer; freeze it and wait for all frozen layers
             // to reach the disk & upload queue, then shut the upload queue and
             // wait for it to drain.
-            match self.freeze_and_flush().await {
-                Ok(_) => {
-                    // drain the upload queue
-                    // if we did not wait for completion here, it might be our shutdown process
-                    // didn't wait for remote uploads to complete at all, as new tasks can forever
-                    // be spawned.
-                    //
-                    // what is problematic is the shutting down of RemoteTimelineClient, because
-                    // obviously it does not make sense to stop while we wait for it, but what
-                    // about corner cases like s3 suddenly hanging up?
-                    self.remote_client.shutdown().await;
-                }
-                Err(FlushLayerError::Cancelled) => {
-                    // this is likely the second shutdown, ignore silently.
-                    // TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
-                    debug_assert!(self.cancel.is_cancelled());
-                }
-                Err(e) => {
-                    // Non-fatal.  Shutdown is infallible.  Failures to flush just mean that
-                    // we have some extra WAL replay to do next time the timeline starts.
-                    warn!("failed to freeze and flush: {e:#}");
+            if do_flush {
+                match self.freeze_and_flush().await {
+                    Ok(_) => {
+                        // drain the upload queue
+                        // if we did not wait for completion here, it might be our shutdown process
+                        // didn't wait for remote uploads to complete at all, as new tasks can forever
+                        // be spawned.
+                        //
+                        // what is problematic is the shutting down of RemoteTimelineClient, because
+                        // obviously it does not make sense to stop while we wait for it, but what
+                        // about corner cases like s3 suddenly hanging up?
+                        self.remote_client.shutdown().await;
+                    }
+                    Err(FlushLayerError::Cancelled) => {
+                        // this is likely the second shutdown, ignore silently.
+                        // TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
+                        debug_assert!(self.cancel.is_cancelled());
+                    }
+                    Err(e) => {
+                        // Non-fatal.  Shutdown is infallible.  Failures to flush just mean that
+                        // we have some extra WAL replay to do next time the timeline starts.
+                        warn!("failed to freeze and flush: {e:#}");
+                    }
                 }
-            }
 
-            // `self.remote_client.shutdown().await` above should have already flushed everything from the queue, but
-            // we also do a final check here to ensure that the queue is empty.
-            if !self.remote_client.no_pending_work() {
-                warn!("still have pending work in remote upload queue, but continuing shutting down anyways");
+                // `self.remote_client.shutdown().await` above should have already flushed everything from the queue, but
+                // we also do a final check here to ensure that the queue is empty.
+                if !self.remote_client.no_pending_work() {
+                    warn!("still have pending work in remote upload queue, but continuing shutting down anyways");
+                }
             }
         }
 
@@ -2021,8 +2062,16 @@ impl Timeline {
     pub(crate) async fn download_layer(
         &self,
         layer_file_name: &LayerName,
-    ) -> anyhow::Result<Option<bool>> {
-        let Some(layer) = self.find_layer(layer_file_name).await? else {
+    ) -> Result<Option<bool>, super::storage_layer::layer::DownloadError> {
+        let Some(layer) = self
+            .find_layer(layer_file_name)
+            .await
+            .map_err(|e| match e {
+                layer_manager::Shutdown => {
+                    super::storage_layer::layer::DownloadError::TimelineShutdown
+                }
+            })?
+        else {
             return Ok(None);
         };
 
@@ -2323,6 +2372,18 @@ impl Timeline {
             )
     }
 
+    fn get_image_creation_preempt_threshold(&self) -> usize {
+        let tenant_conf = self.tenant_conf.load();
+        tenant_conf
+            .tenant_conf
+            .image_creation_preempt_threshold
+            .unwrap_or(
+                self.conf
+                    .default_tenant_conf
+                    .image_creation_preempt_threshold,
+            )
+    }
+
     /// Resolve the effective WAL receiver protocol to use for this tenant.
     ///
     /// Priority order is:
@@ -2432,6 +2493,7 @@ impl Timeline {
                 shard_identity,
                 pg_version,
                 layers: Default::default(),
+                gc_compaction_layer_update_lock: tokio::sync::RwLock::new(()),
 
                 walredo_mgr,
                 walreceiver: Mutex::new(None),
@@ -2472,6 +2534,10 @@ impl Timeline {
 
                 gc_info: std::sync::RwLock::new(GcInfo::default()),
 
+                last_image_layer_creation_status: ArcSwap::new(Arc::new(
+                    LastImageLayerCreationStatus::default(),
+                )),
+
                 latest_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()),
                 initdb_lsn: metadata.initdb_lsn(),
 
@@ -3475,6 +3541,9 @@ impl Timeline {
         // image layer).
         let _gc_cutoff_holder = timeline.get_latest_gc_cutoff_lsn();
 
+        // See `compaction::compact_with_gc` for why we need this.
+        let _guard = timeline.gc_compaction_layer_update_lock.read().await;
+
         loop {
             if cancel.is_cancelled() {
                 return Err(GetVectoredError::Cancelled);
@@ -4012,15 +4081,20 @@ impl Timeline {
             }
 
             let mut layers_to_upload = Vec::new();
-            layers_to_upload.extend(
-                self.create_image_layers(
+            let (generated_image_layers, is_complete) = self
+                .create_image_layers(
                     &partitions,
                     self.initdb_lsn,
                     ImageLayerCreationMode::Initial,
                     ctx,
+                    LastImageLayerCreationStatus::Initial,
                 )
-                .await?,
+                .await?;
+            debug_assert!(
+                matches!(is_complete, LastImageLayerCreationStatus::Complete),
+                "init image generation mode must fully cover the keyspace"
             );
+            layers_to_upload.extend(generated_image_layers);
 
             (layers_to_upload, None)
         } else {
@@ -4277,7 +4351,7 @@ impl Timeline {
         Ok(result)
     }
 
-    // Is it time to create a new image layer for the given partition?
+    // Is it time to create a new image layer for the given partition? True if we want to generate.
     async fn time_for_new_image_layer(&self, partition: &KeySpace, lsn: Lsn) -> bool {
         let threshold = self.get_image_creation_threshold();
 
@@ -4340,7 +4414,6 @@ impl Timeline {
         lsn: Lsn,
         ctx: &RequestContext,
         img_range: Range<Key>,
-        start: Key,
         io_concurrency: IoConcurrency,
     ) -> Result<ImageLayerCreationOutcome, CreateImageLayersError> {
         let mut wrote_keys = false;
@@ -4428,26 +4501,23 @@ impl Timeline {
                     lsn
                 },
             );
-            Ok(ImageLayerCreationOutcome {
-                unfinished_image_layer: Some(image_layer_writer),
-                next_start_key: img_range.end,
+            Ok(ImageLayerCreationOutcome::Generated {
+                unfinished_image_layer: image_layer_writer,
             })
         } else {
-            // Special case: the image layer may be empty if this is a sharded tenant and the
-            // partition does not cover any keys owned by this shard.  In this case, to ensure
-            // we don't leave gaps between image layers, leave `start` where it is, so that the next
-            // layer we write will cover the key range that we just scanned.
             tracing::debug!("no data in range {}-{}", img_range.start, img_range.end);
-            Ok(ImageLayerCreationOutcome {
-                unfinished_image_layer: None,
-                next_start_key: start,
-            })
+            Ok(ImageLayerCreationOutcome::Empty)
         }
     }
 
     /// Create an image layer for metadata keys. This function produces one image layer for all metadata
     /// keys for now. Because metadata keys cannot exceed basebackup size limit, the image layer for it
     /// would not be too large to fit in a single image layer.
+    ///
+    /// Creating image layers for metadata keys are different from relational keys. Firstly, instead of
+    /// iterating each key and get an image for each of them, we do a `vectored_get` scan over the sparse
+    /// keyspace to get all images in one run. Secondly, we use a different image layer generation metrics
+    /// for metadata keys than relational keys, which is the number of delta files visited during the scan.
     #[allow(clippy::too_many_arguments)]
     async fn create_image_layer_for_metadata_keys(
         self: &Arc<Self>,
@@ -4457,12 +4527,13 @@ impl Timeline {
         ctx: &RequestContext,
         img_range: Range<Key>,
         mode: ImageLayerCreationMode,
-        start: Key,
         io_concurrency: IoConcurrency,
     ) -> Result<ImageLayerCreationOutcome, CreateImageLayersError> {
         // Metadata keys image layer creation.
         let mut reconstruct_state = ValuesReconstructState::new(io_concurrency);
         let begin = Instant::now();
+        // Directly use `get_vectored_impl` to skip the max_vectored_read_key limit check. Note that the keyspace should
+        // not contain too many keys, otherwise this takes a lot of memory.
         let data = self
             .get_vectored_impl(partition.clone(), lsn, &mut reconstruct_state, ctx)
             .await?;
@@ -4487,10 +4558,7 @@ impl Timeline {
         );
 
         if !trigger_generation && mode == ImageLayerCreationMode::Try {
-            return Ok(ImageLayerCreationOutcome {
-                unfinished_image_layer: None,
-                next_start_key: img_range.end,
-            });
+            return Ok(ImageLayerCreationOutcome::Skip);
         }
         if self.cancel.is_cancelled() {
             return Err(CreateImageLayersError::Cancelled);
@@ -4521,20 +4589,12 @@ impl Timeline {
                     lsn
                 }
             );
-            Ok(ImageLayerCreationOutcome {
-                unfinished_image_layer: Some(image_layer_writer),
-                next_start_key: img_range.end,
+            Ok(ImageLayerCreationOutcome::Generated {
+                unfinished_image_layer: image_layer_writer,
             })
         } else {
-            // Special case: the image layer may be empty if this is a sharded tenant and the
-            // partition does not cover any keys owned by this shard. In this case, to ensure
-            // we don't leave gaps between image layers, leave `start` where it is, so that the next
-            // layer we write will cover the key range that we just scanned.
             tracing::debug!("no data in range {}-{}", img_range.start, img_range.end);
-            Ok(ImageLayerCreationOutcome {
-                unfinished_image_layer: None,
-                next_start_key: start,
-            })
+            Ok(ImageLayerCreationOutcome::Empty)
         }
     }
 
@@ -4590,6 +4650,8 @@ impl Timeline {
         decision
     }
 
+    /// Returns the image layers generated and an enum indicating whether the process is fully completed.
+    /// true = we have generate all image layers, false = we preempt the process for L0 compaction.
     #[tracing::instrument(skip_all, fields(%lsn, %mode))]
     async fn create_image_layers(
         self: &Arc<Timeline>,
@@ -4597,9 +4659,15 @@ impl Timeline {
         lsn: Lsn,
         mode: ImageLayerCreationMode,
         ctx: &RequestContext,
-    ) -> Result<Vec<ResidentLayer>, CreateImageLayersError> {
+        last_status: LastImageLayerCreationStatus,
+    ) -> Result<(Vec<ResidentLayer>, LastImageLayerCreationStatus), CreateImageLayersError> {
         let timer = self.metrics.create_images_time_histo.start_timer();
 
+        if partitioning.parts.is_empty() {
+            warn!("no partitions to create image layers for");
+            return Ok((vec![], LastImageLayerCreationStatus::Complete));
+        }
+
         // We need to avoid holes between generated image layers.
         // Otherwise LayerMap::image_layer_exists will return false if key range of some layer is covered by more than one
         // image layer with hole between them. In this case such layer can not be utilized by GC.
@@ -4611,15 +4679,65 @@ impl Timeline {
         // image layers  <100000000..100000099> and <200000000..200000199> are not completely covering it.
         let mut start = Key::MIN;
 
-        let check_for_image_layers = self.should_check_if_image_layers_required(lsn);
+        let check_for_image_layers =
+            if let LastImageLayerCreationStatus::Incomplete { last_key } = last_status {
+                info!(
+                    "resuming image layer creation: last_status=incomplete, continue from {}",
+                    last_key
+                );
+                true
+            } else {
+                self.should_check_if_image_layers_required(lsn)
+            };
 
         let mut batch_image_writer = BatchLayerWriter::new(self.conf).await?;
 
-        for partition in partitioning.parts.iter() {
+        let mut all_generated = true;
+
+        let mut partition_processed = 0;
+        let mut total_partitions = partitioning.parts.len();
+        let mut last_partition_processed = None;
+        let mut partition_parts = partitioning.parts.clone();
+
+        if let LastImageLayerCreationStatus::Incomplete { last_key } = last_status {
+            // We need to skip the partitions that have already been processed.
+            let mut found = false;
+            for (i, partition) in partition_parts.iter().enumerate() {
+                if last_key <= partition.end().unwrap() {
+                    // ```plain
+                    // |------|--------|----------|------|
+                    //              ^last_key
+                    //                    ^start from this partition
+                    // ```
+                    // Why `i+1` instead of `i`?
+                    // It is possible that the user did some writes after the previous image layer creation attempt so that
+                    // a relation grows in size, and the last_key is now in the middle of the partition. In this case, we
+                    // still want to skip this partition, so that we can make progress and avoid generating image layers over
+                    // the same partition. Doing a mod to ensure we don't end up with an empty vec.
+                    if i + 1 >= total_partitions {
+                        // In general, this case should not happen -- if last_key is on the last partition, the previous
+                        // iteration of image layer creation should return a complete status.
+                        break; // with found=false
+                    }
+                    partition_parts = partition_parts.split_off(i + 1); // Remove the first i + 1 elements
+                    total_partitions = partition_parts.len();
+                    // Update the start key to the partition start.
+                    start = partition_parts[0].start().unwrap();
+                    found = true;
+                    break;
+                }
+            }
+            if !found {
+                // Last key is within the last partition, or larger than all partitions.
+                return Ok((vec![], LastImageLayerCreationStatus::Complete));
+            }
+        }
+
+        for partition in partition_parts.iter() {
             if self.cancel.is_cancelled() {
                 return Err(CreateImageLayersError::Cancelled);
             }
-
+            partition_processed += 1;
             let img_range = start..partition.ranges.last().unwrap().end;
             let compact_metadata = partition.overlaps(&Key::metadata_key_range());
             if compact_metadata {
@@ -4654,6 +4772,8 @@ impl Timeline {
                     lsn_range: PersistentLayerDesc::image_layer_lsn_range(lsn),
                     is_delta: false,
                 }) {
+                    // TODO: this can be processed with the BatchLayerWriter::finish_with_discard
+                    // in the future.
                     tracing::info!(
                         "Skipping image layer at {lsn} {}..{}, already exists",
                         img_range.start,
@@ -4687,17 +4807,13 @@ impl Timeline {
                     .map_err(|_| CreateImageLayersError::Cancelled)?,
             );
 
-            let ImageLayerCreationOutcome {
-                unfinished_image_layer,
-                next_start_key,
-            } = if !compact_metadata {
+            let outcome = if !compact_metadata {
                 self.create_image_layer_for_rel_blocks(
                     partition,
                     image_layer_writer,
                     lsn,
                     ctx,
                     img_range.clone(),
-                    start,
                     io_concurrency,
                 )
                 .await?
@@ -4709,18 +4825,58 @@ impl Timeline {
                     ctx,
                     img_range.clone(),
                     mode,
-                    start,
                     io_concurrency,
                 )
                 .await?
             };
-            start = next_start_key;
-            if let Some(unfinished_image_layer) = unfinished_image_layer {
-                batch_image_writer.add_unfinished_image_writer(
+            match outcome {
+                ImageLayerCreationOutcome::Empty => {
+                    // No data in this partition, so we don't need to create an image layer (for now).
+                    // The next image layer should cover this key range, so we don't advance the `start`
+                    // key.
+                }
+                ImageLayerCreationOutcome::Generated {
                     unfinished_image_layer,
-                    img_range,
-                    lsn,
-                );
+                } => {
+                    batch_image_writer.add_unfinished_image_writer(
+                        unfinished_image_layer,
+                        img_range.clone(),
+                        lsn,
+                    );
+                    // The next image layer should be generated right after this one.
+                    start = img_range.end;
+                }
+                ImageLayerCreationOutcome::Skip => {
+                    // We don't need to create an image layer for this partition.
+                    // The next image layer should NOT cover this range, otherwise
+                    // the keyspace becomes empty (reads don't go past image layers).
+                    start = img_range.end;
+                }
+            }
+
+            if let ImageLayerCreationMode::Try = mode {
+                // We have at least made some progress
+                if batch_image_writer.pending_layer_num() >= 1 {
+                    // The `Try` mode is currently only used on the compaction path. We want to avoid
+                    // image layer generation taking too long time and blocking L0 compaction. So in this
+                    // mode, we also inspect the current number of L0 layers and skip image layer generation
+                    // if there are too many of them.
+                    let num_of_l0_layers = {
+                        let layers = self.layers.read().await;
+                        layers.layer_map()?.level0_deltas().len()
+                    };
+                    let image_preempt_threshold = self.get_image_creation_preempt_threshold()
+                        * self.get_compaction_threshold();
+                    if image_preempt_threshold != 0 && num_of_l0_layers >= image_preempt_threshold {
+                        tracing::info!(
+                        "preempt image layer generation at {lsn} when processing partition {}..{}: too many L0 layers {}",
+                        partition.start().unwrap(), partition.end().unwrap(), num_of_l0_layers
+                    );
+                        last_partition_processed = Some(partition.clone());
+                        all_generated = false;
+                        break;
+                    }
+                }
             }
         }
 
@@ -4735,14 +4891,42 @@ impl Timeline {
             .open_mut()?
             .track_new_image_layers(&image_layers, &self.metrics);
         drop_wlock(guard);
-        timer.stop_and_record();
+        let duration = timer.stop_and_record();
 
         // Creating image layers may have caused some previously visible layers to be covered
         if !image_layers.is_empty() {
             self.update_layer_visibility().await?;
         }
 
-        Ok(image_layers)
+        let total_layer_size = image_layers
+            .iter()
+            .map(|l| l.metadata().file_size)
+            .sum::<u64>();
+
+        info!(
+            "created {} image layers ({} bytes) in {}s, processed {} out of {} partitions",
+            image_layers.len(),
+            total_layer_size,
+            duration.as_secs_f64(),
+            partition_processed,
+            total_partitions
+        );
+
+        Ok((
+            image_layers,
+            if all_generated {
+                LastImageLayerCreationStatus::Complete
+            } else {
+                LastImageLayerCreationStatus::Incomplete {
+                    last_key: if let Some(last_partition_processed) = last_partition_processed {
+                        last_partition_processed.end().unwrap_or(Key::MIN)
+                    } else {
+                        // This branch should be unreachable, but in case it happens, we can just return the start key.
+                        Key::MIN
+                    },
+                }
+            },
+        ))
     }
 
     /// Wait until the background initial logical size calculation is complete, or
diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index 5f7b5f1af584..cfde0704424f 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -11,7 +11,7 @@ use std::sync::Arc;
 use super::layer_manager::LayerManager;
 use super::{
     CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode,
-    RecordedDuration, Timeline,
+    LastImageLayerCreationStatus, RecordedDuration, Timeline,
 };
 
 use anyhow::{anyhow, bail, Context};
@@ -33,6 +33,7 @@ use crate::page_cache;
 use crate::statvfs::Statvfs;
 use crate::tenant::checks::check_valid_layermap;
 use crate::tenant::gc_block::GcBlock;
+use crate::tenant::layer_map::LayerMap;
 use crate::tenant::remote_timeline_client::WaitCompletionError;
 use crate::tenant::storage_layer::batch_split_writer::{
     BatchWriterResult, SplitDeltaLayerWriter, SplitImageLayerWriter,
@@ -262,13 +263,13 @@ impl GcCompactionQueue {
         ctx: &RequestContext,
         gc_block: &GcBlock,
         timeline: &Arc<Timeline>,
-    ) -> Result<bool, CompactionError> {
+    ) -> Result<CompactionOutcome, CompactionError> {
         let _one_op_at_a_time_guard = self.consumer_lock.lock().await;
         let has_pending_tasks;
         let (id, item) = {
             let mut guard = self.inner.lock().unwrap();
             let Some((id, item)) = guard.queued.pop_front() else {
-                return Ok(false);
+                return Ok(CompactionOutcome::Done);
             };
             guard.running = Some((id, item.clone()));
             has_pending_tasks = !guard.queued.is_empty();
@@ -323,7 +324,11 @@ impl GcCompactionQueue {
             let mut guard = self.inner.lock().unwrap();
             guard.running = None;
         }
-        Ok(has_pending_tasks)
+        Ok(if has_pending_tasks {
+            CompactionOutcome::Pending
+        } else {
+            CompactionOutcome::Done
+        })
     }
 
     #[allow(clippy::type_complexity)]
@@ -434,6 +439,11 @@ impl KeyHistoryRetention {
         if dry_run {
             return true;
         }
+        if LayerMap::is_l0(&key.key_range, key.is_delta) {
+            // gc-compaction should not produce L0 deltas, otherwise it will break the layer order.
+            // We should ignore such layers.
+            return true;
+        }
         let layer_generation;
         {
             let guard = tline.layers.read().await;
@@ -589,6 +599,17 @@ impl CompactionStatistics {
     }
 }
 
+#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CompactionOutcome {
+    #[default]
+    /// No layers need to be compacted after this round. Compaction doesn't need
+    /// to be immediately scheduled.
+    Done,
+    /// Still has pending layers to be compacted after this round. Ideally, the scheduler
+    /// should immediately schedule another compaction.
+    Pending,
+}
+
 impl Timeline {
     /// TODO: cancellation
     ///
@@ -598,7 +619,7 @@ impl Timeline {
         cancel: &CancellationToken,
         options: CompactOptions,
         ctx: &RequestContext,
-    ) -> Result<bool, CompactionError> {
+    ) -> Result<CompactionOutcome, CompactionError> {
         if options
             .flags
             .contains(CompactFlags::EnhancedGcBottomMostCompaction)
@@ -606,7 +627,7 @@ impl Timeline {
             self.compact_with_gc(cancel, options, ctx)
                 .await
                 .map_err(CompactionError::Other)?;
-            return Ok(false);
+            return Ok(CompactionOutcome::Done);
         }
 
         if options.flags.contains(CompactFlags::DryRun) {
@@ -666,9 +687,9 @@ impl Timeline {
         // Define partitioning schema if needed
 
         // 1. L0 Compact
-        let fully_compacted = {
+        let l0_compaction_outcome = {
             let timer = self.metrics.compact_time_histo.start_timer();
-            let fully_compacted = self
+            let l0_compaction_outcome = self
                 .compact_level0(
                     target_file_size,
                     options.flags.contains(CompactFlags::ForceL0Compaction),
@@ -676,15 +697,15 @@ impl Timeline {
                 )
                 .await?;
             timer.stop_and_record();
-            fully_compacted
+            l0_compaction_outcome
         };
 
-        if !fully_compacted {
+        if let CompactionOutcome::Pending = l0_compaction_outcome {
             // Yield and do not do any other kind of compaction. True means
             // that we have pending L0 compaction tasks and the compaction scheduler
             // will prioritize compacting this tenant/timeline again.
             info!("skipping image layer generation and shard ancestor compaction due to L0 compaction did not include all layers.");
-            return Ok(true);
+            return Ok(CompactionOutcome::Pending);
         }
 
         // 2. Repartition and create image layers if necessary
@@ -709,7 +730,7 @@ impl Timeline {
                     .extend(sparse_partitioning.into_dense().parts);
 
                 // 3. Create new image layers for partitions that have been modified "enough".
-                let image_layers = self
+                let (image_layers, outcome) = self
                     .create_image_layers(
                         &partitioning,
                         lsn,
@@ -722,10 +743,22 @@ impl Timeline {
                             ImageLayerCreationMode::Try
                         },
                         &image_ctx,
+                        self.last_image_layer_creation_status
+                            .load()
+                            .as_ref()
+                            .clone(),
                     )
                     .await?;
 
+                self.last_image_layer_creation_status
+                    .store(Arc::new(outcome.clone()));
+
                 self.upload_new_image_layers(image_layers)?;
+                if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
+                    // Yield and do not do any other kind of compaction.
+                    info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
+                    return Ok(CompactionOutcome::Pending);
+                }
                 partitioning.parts.len()
             }
             Err(err) => {
@@ -753,7 +786,7 @@ impl Timeline {
             self.compact_shard_ancestors(rewrite_max, ctx).await?;
         }
 
-        Ok(false)
+        Ok(CompactionOutcome::Done)
     }
 
     /// Check for layers that are elegible to be rewritten:
@@ -1010,11 +1043,11 @@ impl Timeline {
         target_file_size: u64,
         force_compaction_ignore_threshold: bool,
         ctx: &RequestContext,
-    ) -> Result<bool, CompactionError> {
+    ) -> Result<CompactionOutcome, CompactionError> {
         let CompactLevel0Phase1Result {
             new_layers,
             deltas_to_compact,
-            fully_compacted,
+            outcome,
         } = {
             let phase1_span = info_span!("compact_level0_phase1");
             let ctx = ctx.attached_child();
@@ -1043,12 +1076,12 @@ impl Timeline {
 
         if new_layers.is_empty() && deltas_to_compact.is_empty() {
             // nothing to do
-            return Ok(true);
+            return Ok(CompactionOutcome::Done);
         }
 
         self.finish_compact_batch(&new_layers, &Vec::new(), &deltas_to_compact)
             .await?;
-        Ok(fully_compacted)
+        Ok(outcome)
     }
 
     /// Level0 files first phase of compaction, explained in the [`Self::compact_legacy`] comment.
@@ -1503,11 +1536,9 @@ impl Timeline {
                     .await
                     .map_err(CompactionError::Other)?;
             } else {
-                let shard = self.shard_identity.shard_index();
                 let owner = self.shard_identity.get_shard_number(&key);
-                if cfg!(debug_assertions) {
-                    panic!("key {key} does not belong on shard {shard}, owned by {owner}");
-                }
+
+                // This happens after a shard split, when we're compacting an L0 created by our parent shard
                 debug!("dropping key {key} during compaction (it belongs on shard {owner})");
             }
 
@@ -1592,7 +1623,11 @@ impl Timeline {
                 .into_iter()
                 .map(|x| x.drop_eviction_guard())
                 .collect::<Vec<_>>(),
-            fully_compacted,
+            outcome: if fully_compacted {
+                CompactionOutcome::Done
+            } else {
+                CompactionOutcome::Pending
+            },
         })
     }
 }
@@ -1603,7 +1638,7 @@ struct CompactLevel0Phase1Result {
     deltas_to_compact: Vec<Layer>,
     // Whether we have included all L0 layers, or selected only part of them due to the
     // L0 compaction size limit.
-    fully_compacted: bool,
+    outcome: CompactionOutcome,
 }
 
 #[derive(Default)]
@@ -2919,10 +2954,45 @@ impl Timeline {
         // Between the sanity check and this compaction update, there could be new layers being flushed, but it should be fine because we only
         // operate on L1 layers.
         {
+            // Gc-compaction will rewrite the history of a key. This could happen in two ways:
+            //
+            // 1. We create an image layer to replace all the deltas below the compact LSN. In this case, assume
+            // we have 2 delta layers A and B, both below the compact LSN. We create an image layer I to replace
+            // A and B at the compact LSN. If the read path finishes reading A, yields, and now we update the layer
+            // map, the read path then cannot find any keys below A, reporting a missing key error, while the key
+            // now gets stored in I at the compact LSN.
+            //
+            // ---------------                                       ---------------
+            //   delta1@LSN20                                         image1@LSN20
+            // ---------------  (read path collects delta@LSN20,  => ---------------  (read path cannot find anything
+            //   delta1@LSN10    yields)                                               below LSN 20)
+            // ---------------
+            //
+            // 2. We create a delta layer to replace all the deltas below the compact LSN, and in the delta layers,
+            // we combines the history of a key into a single image. For example, we have deltas at LSN 1, 2, 3, 4,
+            // Assume one delta layer contains LSN 1, 2, 3 and the other contains LSN 4.
+            //
+            // We let gc-compaction combine delta 2, 3, 4 into an image at LSN 4, which produces a delta layer that
+            // contains the delta at LSN 1, the image at LSN 4. If the read path finishes reading the original delta
+            // layer containing 4, yields, and we update the layer map to put the delta layer.
+            //
+            // ---------------                                      ---------------
+            //   delta1@LSN4                                          image1@LSN4
+            // ---------------  (read path collects delta@LSN4,  => ---------------  (read path collects LSN4 and LSN1,
+            //  delta1@LSN1-3    yields)                              delta1@LSN1     which is an invalid history)
+            // ---------------                                      ---------------
+            //
+            // Therefore, the gc-compaction layer update operation should wait for all ongoing reads, block all pending reads,
+            // and only allow reads to continue after the update is finished.
+
+            let update_guard = self.gc_compaction_layer_update_lock.write().await;
+            // Acquiring the update guard ensures current read operations end and new read operations are blocked.
+            // TODO: can we use `latest_gc_cutoff` Rcu to achieve the same effect?
             let mut guard = self.layers.write().await;
             guard
                 .open_mut()?
-                .finish_gc_compaction(&layer_selection, &compact_to, &self.metrics)
+                .finish_gc_compaction(&layer_selection, &compact_to, &self.metrics);
+            drop(update_guard); // Allow new reads to start ONLY after we finished updating the layer map.
         };
 
         // Schedule an index-only upload to update the `latest_gc_cutoff` in the index_part.json.
@@ -3199,11 +3269,7 @@ impl TimelineAdaptor {
             ranges: self.get_keyspace(key_range, lsn, ctx).await?,
         };
         // TODO set proper (stateful) start. The create_image_layer_for_rel_blocks function mostly
-        let start = Key::MIN;
-        let ImageLayerCreationOutcome {
-            unfinished_image_layer,
-            next_start_key: _,
-        } = self
+        let outcome = self
             .timeline
             .create_image_layer_for_rel_blocks(
                 &keyspace,
@@ -3211,13 +3277,15 @@ impl TimelineAdaptor {
                 lsn,
                 ctx,
                 key_range.clone(),
-                start,
                 IoConcurrency::sequential(),
             )
             .await?;
 
-        if let Some(image_layer_writer) = unfinished_image_layer {
-            let (desc, path) = image_layer_writer.finish(ctx).await?;
+        if let ImageLayerCreationOutcome::Generated {
+            unfinished_image_layer,
+        } = outcome
+        {
+            let (desc, path) = unfinished_image_layer.finish(ctx).await?;
             let image_layer =
                 Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?;
             self.new_images.push(image_layer);
diff --git a/pageserver/src/tenant/timeline/uninit.rs b/pageserver/src/tenant/timeline/uninit.rs
index 80a09b4840d0..3074463384fb 100644
--- a/pageserver/src/tenant/timeline/uninit.rs
+++ b/pageserver/src/tenant/timeline/uninit.rs
@@ -1,4 +1,4 @@
-use std::{collections::hash_map::Entry, fs, sync::Arc};
+use std::{collections::hash_map::Entry, fs, future::Future, sync::Arc};
 
 use anyhow::Context;
 use camino::Utf8PathBuf;
@@ -8,7 +8,8 @@ use utils::{fs_ext, id::TimelineId, lsn::Lsn, sync::gate::GateGuard};
 use crate::{
     context::RequestContext,
     import_datadir,
-    tenant::{CreateTimelineIdempotency, Tenant, TimelineOrOffloaded},
+    span::debug_assert_current_span_has_tenant_and_timeline_id,
+    tenant::{CreateTimelineError, CreateTimelineIdempotency, Tenant, TimelineOrOffloaded},
 };
 
 use super::Timeline;
@@ -24,6 +25,9 @@ pub struct UninitializedTimeline<'t> {
     pub(crate) owning_tenant: &'t Tenant,
     timeline_id: TimelineId,
     raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
+    /// Whether we spawned the inner Timeline's tasks such that we must later shut it down
+    /// if aborting the timeline creation
+    needs_shutdown: bool,
 }
 
 impl<'t> UninitializedTimeline<'t> {
@@ -36,6 +40,50 @@ impl<'t> UninitializedTimeline<'t> {
             owning_tenant,
             timeline_id,
             raw_timeline,
+            needs_shutdown: false,
+        }
+    }
+
+    /// When writing data to this timeline during creation, use this wrapper: it will take care of
+    /// setup of Timeline tasks required for I/O (flush loop) and making sure they are torn down
+    /// later.
+    pub(crate) async fn write<F, Fut>(&mut self, f: F) -> anyhow::Result<()>
+    where
+        F: FnOnce(Arc<Timeline>) -> Fut,
+        Fut: Future<Output = Result<(), CreateTimelineError>>,
+    {
+        debug_assert_current_span_has_tenant_and_timeline_id();
+
+        // Remember that we did I/O (spawned the flush loop), so that we can check we shut it down on drop
+        self.needs_shutdown = true;
+
+        let timeline = self.raw_timeline()?;
+
+        // Spawn flush loop so that the Timeline is ready to accept writes
+        timeline.maybe_spawn_flush_loop();
+
+        // Invoke the provided function, which will write some data into the new timeline
+        if let Err(e) = f(timeline.clone()).await {
+            self.abort().await;
+            return Err(e.into());
+        }
+
+        // Flush the underlying timeline's ephemeral layers to disk
+        if let Err(e) = timeline
+            .freeze_and_flush()
+            .await
+            .context("Failed to flush after timeline creation writes")
+        {
+            self.abort().await;
+            return Err(e);
+        }
+
+        Ok(())
+    }
+
+    pub(crate) async fn abort(&self) {
+        if let Some((raw_timeline, _)) = self.raw_timeline.as_ref() {
+            raw_timeline.shutdown(super::ShutdownMode::Hard).await;
         }
     }
 
@@ -44,11 +92,13 @@ impl<'t> UninitializedTimeline<'t> {
     /// This function launches the flush loop if not already done.
     ///
     /// The caller is responsible for activating the timeline (function `.activate()`).
-    pub(crate) fn finish_creation(mut self) -> anyhow::Result<Arc<Timeline>> {
+    pub(crate) async fn finish_creation(mut self) -> anyhow::Result<Arc<Timeline>> {
         let timeline_id = self.timeline_id;
         let tenant_shard_id = self.owning_tenant.tenant_shard_id;
 
         if self.raw_timeline.is_none() {
+            self.abort().await;
+
             return Err(anyhow::anyhow!(
                 "No timeline for initialization found for {tenant_shard_id}/{timeline_id}"
             ));
@@ -62,16 +112,25 @@ impl<'t> UninitializedTimeline<'t> {
             .0
             .get_disk_consistent_lsn();
 
-        anyhow::ensure!(
-            new_disk_consistent_lsn.is_valid(),
-            "new timeline {tenant_shard_id}/{timeline_id} has invalid disk_consistent_lsn"
-        );
+        if !new_disk_consistent_lsn.is_valid() {
+            self.abort().await;
+
+            return Err(anyhow::anyhow!(
+                "new timeline {tenant_shard_id}/{timeline_id} has invalid disk_consistent_lsn"
+            ));
+        }
 
         let mut timelines = self.owning_tenant.timelines.lock().unwrap();
         match timelines.entry(timeline_id) {
-            Entry::Occupied(_) => anyhow::bail!(
+            Entry::Occupied(_) => {
+                // Unexpected, bug in the caller.  Tenant is responsible for preventing concurrent creation of the same timeline.
+                //
+                // We do not call Self::abort here.  Because we don't cleanly shut down our Timeline, [`Self::drop`] should
+                // skip trying to delete the timeline directory too.
+                anyhow::bail!(
                 "Found freshly initialized timeline {tenant_shard_id}/{timeline_id} in the tenant map"
-            ),
+                )
+            }
             Entry::Vacant(v) => {
                 // after taking here should be no fallible operations, because the drop guard will not
                 // cleanup after and would block for example the tenant deletion
@@ -93,36 +152,31 @@ impl<'t> UninitializedTimeline<'t> {
 
     /// Prepares timeline data by loading it from the basebackup archive.
     pub(crate) async fn import_basebackup_from_tar(
-        self,
+        mut self,
         tenant: Arc<Tenant>,
         copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
         base_lsn: Lsn,
         broker_client: storage_broker::BrokerClientChannel,
         ctx: &RequestContext,
     ) -> anyhow::Result<Arc<Timeline>> {
-        let raw_timeline = self.raw_timeline()?;
+        self.write(|raw_timeline| async move {
+            import_datadir::import_basebackup_from_tar(&raw_timeline, copyin_read, base_lsn, ctx)
+                .await
+                .context("Failed to import basebackup")
+                .map_err(CreateTimelineError::Other)?;
 
-        import_datadir::import_basebackup_from_tar(raw_timeline, copyin_read, base_lsn, ctx)
-            .await
-            .context("Failed to import basebackup")?;
-
-        // Flush the new layer files to disk, before we make the timeline as available to
-        // the outside world.
-        //
-        // Flush loop needs to be spawned in order to be able to flush.
-        raw_timeline.maybe_spawn_flush_loop();
-
-        fail::fail_point!("before-checkpoint-new-timeline", |_| {
-            anyhow::bail!("failpoint before-checkpoint-new-timeline");
-        });
+            fail::fail_point!("before-checkpoint-new-timeline", |_| {
+                Err(CreateTimelineError::Other(anyhow::anyhow!(
+                    "failpoint before-checkpoint-new-timeline"
+                )))
+            });
 
-        raw_timeline
-            .freeze_and_flush()
-            .await
-            .context("Failed to flush after basebackup import")?;
+            Ok(())
+        })
+        .await?;
 
         // All the data has been imported. Insert the Timeline into the tenant's timelines map
-        let tl = self.finish_creation()?;
+        let tl = self.finish_creation().await?;
         tl.activate(tenant, broker_client, None, ctx);
         Ok(tl)
     }
@@ -143,12 +197,19 @@ impl<'t> UninitializedTimeline<'t> {
 
 impl Drop for UninitializedTimeline<'_> {
     fn drop(&mut self) {
-        if let Some((_, create_guard)) = self.raw_timeline.take() {
+        if let Some((timeline, create_guard)) = self.raw_timeline.take() {
             let _entered = info_span!("drop_uninitialized_timeline", tenant_id = %self.owning_tenant.tenant_shard_id.tenant_id, shard_id = %self.owning_tenant.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id).entered();
-            // This is unusual, but can happen harmlessly if the pageserver is stopped while
-            // creating a timeline.
-            info!("Timeline got dropped without initializing, cleaning its files");
-            cleanup_timeline_directory(create_guard);
+            if self.needs_shutdown && !timeline.gate.close_complete() {
+                // This should not happen: caller should call [`Self::abort`] on failures
+                tracing::warn!(
+                    "Timeline not shut down after initialization failure, cannot clean up files"
+                );
+            } else {
+                // This is unusual, but can happen harmlessly if the pageserver is stopped while
+                // creating a timeline.
+                info!("Timeline got dropped without initializing, cleaning its files");
+                cleanup_timeline_directory(create_guard);
+            }
         }
     }
 }
diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
index d69e7dbd3245..de917377cb54 100644
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -355,6 +355,19 @@ pub(super) async fn handle_walreceiver_connection(
                 // advances it to its end LSN. 0 is just an initialization placeholder.
                 let mut modification = timeline.begin_modification(Lsn(0));
 
+                async fn commit(
+                    modification: &mut DatadirModification<'_>,
+                    ctx: &RequestContext,
+                    uncommitted: &mut u64,
+                ) -> anyhow::Result<()> {
+                    let stats = modification.stats();
+                    modification.commit(ctx).await?;
+                    WAL_INGEST.records_committed.inc_by(*uncommitted);
+                    WAL_INGEST.inc_values_committed(&stats);
+                    *uncommitted = 0;
+                    Ok(())
+                }
+
                 if !records.is_empty() {
                     timeline
                         .metrics
@@ -366,8 +379,7 @@ pub(super) async fn handle_walreceiver_connection(
                     if matches!(interpreted.flush_uncommitted, FlushUncommittedRecords::Yes)
                         && uncommitted_records > 0
                     {
-                        modification.commit(&ctx).await?;
-                        uncommitted_records = 0;
+                        commit(&mut modification, &ctx, &mut uncommitted_records).await?;
                     }
 
                     let local_next_record_lsn = interpreted.next_record_lsn;
@@ -396,8 +408,7 @@ pub(super) async fn handle_walreceiver_connection(
                         || modification.approx_pending_bytes()
                             > DatadirModification::MAX_PENDING_BYTES
                     {
-                        modification.commit(&ctx).await?;
-                        uncommitted_records = 0;
+                        commit(&mut modification, &ctx, &mut uncommitted_records).await?;
                     }
                 }
 
@@ -415,7 +426,7 @@ pub(super) async fn handle_walreceiver_connection(
 
                 if uncommitted_records > 0 || needs_last_record_lsn_advance {
                     // Commit any uncommitted records
-                    modification.commit(&ctx).await?;
+                    commit(&mut modification, &ctx, &mut uncommitted_records).await?;
                 }
 
                 if !caught_up && streaming_lsn >= end_of_wal {
@@ -442,10 +453,12 @@ pub(super) async fn handle_walreceiver_connection(
                     filtered: &mut u64,
                     ctx: &RequestContext,
                 ) -> anyhow::Result<()> {
+                    let stats = modification.stats();
+                    modification.commit(ctx).await?;
                     WAL_INGEST
                         .records_committed
                         .inc_by(*uncommitted - *filtered);
-                    modification.commit(ctx).await?;
+                    WAL_INGEST.inc_values_committed(&stats);
                     *uncommitted = 0;
                     *filtered = 0;
                     Ok(())
diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs
index 8a7f4a4bf5fd..9d539198c7ae 100644
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -234,6 +234,19 @@ impl VirtualFile {
     ) -> (FullSlice<Buf>, Result<usize, Error>) {
         self.inner.write_all(buf, ctx).await
     }
+
+    async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
+        self.inner.read_to_end(buf, ctx).await
+    }
+
+    pub(crate) async fn read_to_string(
+        &mut self,
+        ctx: &RequestContext,
+    ) -> Result<String, anyhow::Error> {
+        let mut buf = Vec::new();
+        self.read_to_end(&mut buf, ctx).await?;
+        Ok(String::from_utf8(buf)?)
+    }
 }
 
 /// Indicates whether to enable fsync, fdatasync, or O_SYNC/O_DSYNC when writing
@@ -993,6 +1006,24 @@ impl VirtualFileInner {
             (buf, result)
         })
     }
+
+    async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
+        let mut tmp = vec![0; 128];
+        loop {
+            let slice = tmp.slice(..128);
+            let (slice, res) = self.read_at(slice, self.pos, ctx).await;
+            match res {
+                Ok(0) => return Ok(()),
+                Ok(n) => {
+                    self.pos += n as u64;
+                    buf.extend_from_slice(&slice[..n]);
+                }
+                Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
+                Err(e) => return Err(e),
+            }
+            tmp = slice.into_inner();
+        }
+    }
 }
 
 // Adapted from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#117-135
@@ -1237,10 +1268,6 @@ impl VirtualFile {
     ) -> Result<crate::tenant::block_io::BlockLease<'_>, std::io::Error> {
         self.inner.read_blk(blknum, ctx).await
     }
-
-    async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
-        self.inner.read_to_end(buf, ctx).await
-    }
 }
 
 #[cfg(test)]
@@ -1260,24 +1287,6 @@ impl VirtualFileInner {
             slice.into_inner(),
         ))
     }
-
-    async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
-        let mut tmp = vec![0; 128];
-        loop {
-            let slice = tmp.slice(..128);
-            let (slice, res) = self.read_at(slice, self.pos, ctx).await;
-            match res {
-                Ok(0) => return Ok(()),
-                Ok(n) => {
-                    self.pos += n as u64;
-                    buf.extend_from_slice(&slice[..n]);
-                }
-                Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
-                Err(e) => return Err(e),
-            }
-            tmp = slice.into_inner();
-        }
-    }
 }
 
 impl Drop for VirtualFileInner {
diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs
index e0283d99e0fb..04edb3e3f47b 100644
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -28,17 +28,9 @@ use std::time::Duration;
 use std::time::Instant;
 use std::time::SystemTime;
 
-use pageserver_api::shard::ShardIdentity;
-use postgres_ffi::fsm_logical_to_physical;
-use postgres_ffi::walrecord::*;
-use postgres_ffi::{dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch, TimestampTz};
-use wal_decoder::models::*;
-
 use anyhow::{bail, Result};
 use bytes::{Buf, Bytes};
 use tracing::*;
-use utils::failpoint_support;
-use utils::rate_limit::RateLimit;
 
 use crate::context::RequestContext;
 use crate::metrics::WAL_INGEST;
@@ -50,11 +42,18 @@ use crate::ZERO_PAGE;
 use pageserver_api::key::rel_block_to_key;
 use pageserver_api::record::NeonWalRecord;
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
+use pageserver_api::shard::ShardIdentity;
+use postgres_ffi::fsm_logical_to_physical;
 use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
+use postgres_ffi::walrecord::*;
 use postgres_ffi::TransactionId;
+use postgres_ffi::{dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch, TimestampTz};
 use utils::bin_ser::SerializeError;
 use utils::lsn::Lsn;
+use utils::rate_limit::RateLimit;
+use utils::{critical, failpoint_support};
+use wal_decoder::models::*;
 
 enum_pgversion! {CheckPoint, pgv::CheckPoint}
 
@@ -327,93 +326,75 @@ impl WalIngest {
         let mut new_vm_blk = new_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
         let mut old_vm_blk = old_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
 
-        // Sometimes, Postgres seems to create heap WAL records with the
-        // ALL_VISIBLE_CLEARED flag set, even though the bit in the VM page is
-        // not set. In fact, it's possible that the VM page does not exist at all.
-        // In that case, we don't want to store a record to clear the VM bit;
-        // replaying it would fail to find the previous image of the page, because
-        // it doesn't exist. So check if the VM page(s) exist, and skip the WAL
-        // record if it doesn't.
-        //
-        // TODO: analyze the metrics and tighten this up accordingly. This logic
-        // implicitly assumes that VM pages see explicit WAL writes before
-        // implicit ClearVmBits, and will otherwise silently drop updates.
+        // VM bits can only be cleared on the shard(s) owning the VM relation, and must be within
+        // its view of the VM relation size. Out of caution, error instead of failing WAL ingestion,
+        // as there has historically been cases where PostgreSQL has cleared spurious VM pages. See:
+        // https://github.com/neondatabase/neon/pull/10634.
         let Some(vm_size) = get_relsize(modification, vm_rel, ctx).await? else {
-            WAL_INGEST
-                .clear_vm_bits_unknown
-                .with_label_values(&["relation"])
-                .inc();
+            critical!("clear_vm_bits for unknown VM relation {vm_rel}");
             return Ok(());
         };
         if let Some(blknum) = new_vm_blk {
             if blknum >= vm_size {
-                WAL_INGEST
-                    .clear_vm_bits_unknown
-                    .with_label_values(&["new_page"])
-                    .inc();
+                critical!("new_vm_blk {blknum} not in {vm_rel} of size {vm_size}");
                 new_vm_blk = None;
             }
         }
         if let Some(blknum) = old_vm_blk {
             if blknum >= vm_size {
-                WAL_INGEST
-                    .clear_vm_bits_unknown
-                    .with_label_values(&["old_page"])
-                    .inc();
+                critical!("old_vm_blk {blknum} not in {vm_rel} of size {vm_size}");
                 old_vm_blk = None;
             }
         }
 
-        if new_vm_blk.is_some() || old_vm_blk.is_some() {
-            if new_vm_blk == old_vm_blk {
-                // An UPDATE record that needs to clear the bits for both old and the
-                // new page, both of which reside on the same VM page.
+        if new_vm_blk.is_none() && old_vm_blk.is_none() {
+            return Ok(());
+        } else if new_vm_blk == old_vm_blk {
+            // An UPDATE record that needs to clear the bits for both old and the new page, both of
+            // which reside on the same VM page.
+            self.put_rel_wal_record(
+                modification,
+                vm_rel,
+                new_vm_blk.unwrap(),
+                NeonWalRecord::ClearVisibilityMapFlags {
+                    new_heap_blkno,
+                    old_heap_blkno,
+                    flags,
+                },
+                ctx,
+            )
+            .await?;
+        } else {
+            // Clear VM bits for one heap page, or for two pages that reside on different VM pages.
+            if let Some(new_vm_blk) = new_vm_blk {
                 self.put_rel_wal_record(
                     modification,
                     vm_rel,
-                    new_vm_blk.unwrap(),
+                    new_vm_blk,
                     NeonWalRecord::ClearVisibilityMapFlags {
                         new_heap_blkno,
+                        old_heap_blkno: None,
+                        flags,
+                    },
+                    ctx,
+                )
+                .await?;
+            }
+            if let Some(old_vm_blk) = old_vm_blk {
+                self.put_rel_wal_record(
+                    modification,
+                    vm_rel,
+                    old_vm_blk,
+                    NeonWalRecord::ClearVisibilityMapFlags {
+                        new_heap_blkno: None,
                         old_heap_blkno,
                         flags,
                     },
                     ctx,
                 )
                 .await?;
-            } else {
-                // Clear VM bits for one heap page, or for two pages that reside on
-                // different VM pages.
-                if let Some(new_vm_blk) = new_vm_blk {
-                    self.put_rel_wal_record(
-                        modification,
-                        vm_rel,
-                        new_vm_blk,
-                        NeonWalRecord::ClearVisibilityMapFlags {
-                            new_heap_blkno,
-                            old_heap_blkno: None,
-                            flags,
-                        },
-                        ctx,
-                    )
-                    .await?;
-                }
-                if let Some(old_vm_blk) = old_vm_blk {
-                    self.put_rel_wal_record(
-                        modification,
-                        vm_rel,
-                        old_vm_blk,
-                        NeonWalRecord::ClearVisibilityMapFlags {
-                            new_heap_blkno: None,
-                            old_heap_blkno,
-                            flags,
-                        },
-                        ctx,
-                    )
-                    .await?;
-                }
             }
         }
-
         Ok(())
     }
 
diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c
index 08b76521756c..01da61f84b56 100644
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -563,8 +563,8 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 
 	LWLockRelease(lfc_lock);
 
-#if USE_ASSERT_CHECKING
-	do {
+#ifdef USE_ASSERT_CHECKING
+	{
 		int count = 0;
 
 		for (int j = 0; j < nblocks; j++)
@@ -574,7 +574,7 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		}
 
 		Assert(count == found);
-	} while (false);
+	}
 #endif
 
 	return found;
diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c
index 4460e3b40c11..22aeb2e2d658 100644
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -36,6 +36,11 @@
 #include "pagestore_client.h"
 #include "walproposer.h"
 
+#ifdef __linux__
+#include <sys/ioctl.h>
+#include <linux/sockios.h>
+#endif
+
 #define PageStoreTrace DEBUG5
 
 #define MIN_RECONNECT_INTERVAL_USEC 1000
@@ -728,11 +733,36 @@ call_PQgetCopyData(shardno_t shard_no, char **buffer)
 		INSTR_TIME_SUBTRACT(since_last_log, last_log_ts);
 		if (INSTR_TIME_GET_MILLISEC(since_last_log) >= LOG_INTERVAL_MS)
 		{
+			int sndbuf = -1;
+			int recvbuf = -1;
+#ifdef __linux__
+			int socketfd;
+#endif
+
 			since_start = now;
 			INSTR_TIME_SUBTRACT(since_start, start_ts);
-			neon_shard_log(shard_no, LOG, "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses)",
+
+#ifdef __linux__
+			/*
+			 * get kernel's send and recv queue size via ioctl
+			 * https://elixir.bootlin.com/linux/v6.1.128/source/include/uapi/linux/sockios.h#L25-L27
+			 */
+			socketfd = PQsocket(pageserver_conn);
+			if (socketfd != -1) {
+				int ioctl_err;
+				ioctl_err = ioctl(socketfd, SIOCOUTQ, &sndbuf);
+				if (ioctl_err!= 0) {
+					sndbuf = -errno;
+				}
+				ioctl_err = ioctl(socketfd, FIONREAD, &recvbuf);
+				if (ioctl_err != 0) {
+					recvbuf = -errno;
+				}
+			}
+#endif
+			neon_shard_log(shard_no, LOG, "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses) (socket sndbuf=%d recvbuf=%d)",
 						   INSTR_TIME_GET_DOUBLE(since_start),
-						   shard->nrequests_sent, shard->nresponses_received);
+						   shard->nrequests_sent, shard->nresponses_received, sndbuf, recvbuf);
 			last_log_ts = now;
 			logged = true;
 		}
diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c
index 54cacea98448..012bd479bcd6 100644
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -916,7 +916,7 @@ prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
 {
 	uint64		min_ring_index;
 	PrefetchRequest hashkey;
-#if USE_ASSERT_CHECKING
+#ifdef USE_ASSERT_CHECKING
 	bool		any_hits = false;
 #endif
 	/* We will never read further ahead than our buffer can store. */
@@ -955,7 +955,7 @@ prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
 		else
 			lsns = NULL;
 
-#if USE_ASSERT_CHECKING
+#ifdef USE_ASSERT_CHECKING
 		any_hits = true;
 #endif
 
diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml
index f362a4503537..d7880ea7b964 100644
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -19,14 +19,15 @@ aws-config.workspace = true
 aws-sdk-iam.workspace = true
 aws-sigv4.workspace = true
 base64.workspace = true
+boxcar = "0.2.8"
 bstr.workspace = true
 bytes = { workspace = true, features = ["serde"] }
 camino.workspace = true
 chrono.workspace = true
 clap = { workspace = true, features = ["derive", "env"] }
+clashmap.workspace = true
 compute_api.workspace = true
 consumption_metrics.workspace = true
-dashmap.workspace = true
 env_logger.workspace = true
 framed-websockets.workspace = true
 futures.workspace = true
@@ -42,6 +43,7 @@ hyper0.workspace = true
 hyper = { workspace = true, features = ["server", "http1", "http2"] }
 hyper-util = { version = "0.1", features = ["server", "http1", "http2", "tokio"] }
 http-body-util = { version = "0.1" }
+gettid = "0.1.3"
 indexmap = { workspace = true, features = ["serde"] }
 ipnet.workspace = true
 itertools.workspace = true
@@ -50,6 +52,8 @@ lasso = { workspace = true, features = ["multi-threaded"] }
 measured = { workspace = true, features = ["lasso"] }
 metrics.workspace = true
 once_cell.workspace = true
+opentelemetry = { workspace = true, features = ["trace"] }
+papaya = "0.1.8"
 parking_lot.workspace = true
 parquet.workspace = true
 parquet_derive.workspace = true
@@ -89,6 +93,9 @@ tokio = { workspace = true, features = ["signal"] }
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 tracing.workspace = true
+tracing-log.workspace = true
+tracing-serde.workspace = true
+tracing-opentelemetry.workspace = true
 try-lock.workspace = true
 typed-json.workspace = true
 url.workspace = true
@@ -112,6 +119,7 @@ rsa = "0.9"
 workspace_hack.workspace = true
 
 [dev-dependencies]
+assert-json-diff.workspace = true
 camino-tempfile.workspace = true
 fallible-iterator.workspace = true
 flate2.workspace = true
diff --git a/proxy/README.md b/proxy/README.md
index 4b98342d7275..ecd54fbbd864 100644
--- a/proxy/README.md
+++ b/proxy/README.md
@@ -106,17 +106,7 @@ cases where it is hard to use rows represented as objects (e.g. when several fie
 
 Proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so we can use *.localtest.me` which resolves to `127.0.0.1`.
 
-Let's create self-signed certificate by running:
-```sh
-openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj "/CN=*.localtest.me"
-```
-
-Then we need to build proxy with 'testing' feature and run, e.g.:
-```sh
-RUST_LOG=proxy cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://proxy:password@endpoint.localtest.me:5432/postgres' --is-private-access-proxy true -c server.crt -k server.key
-```
-
-We will also need to have a postgres instance. Assuming that we have setted up docker we can set it up as follows:
+We will need to have a postgres instance. Assuming that we have set up docker we can set it up as follows:
 ```sh
 docker run \
   --detach \
@@ -133,8 +123,18 @@ docker exec -it proxy-postgres psql -U postgres -c "CREATE TABLE neon_control_pl
 docker exec -it proxy-postgres psql -U postgres -c "CREATE ROLE proxy WITH SUPERUSER LOGIN PASSWORD 'password';"
 ```
 
+Let's create self-signed certificate by running:
+```sh
+openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj "/CN=*.localtest.me"
+```
+
+Then we need to build proxy with 'testing' feature and run, e.g.:
+```sh
+RUST_LOG=proxy cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://postgres:proxy-postgres@127.0.0.1:5432/postgres' -c server.crt -k server.key
+```
+
 Now from client you can start a new session:
 
 ```sh
 PGSSLROOTCERT=./server.crt psql  "postgresql://proxy:password@endpoint.localtest.me:4432/postgres?sslmode=verify-full"
-```
\ No newline at end of file
+```
diff --git a/proxy/src/auth/backend/console_redirect.rs b/proxy/src/auth/backend/console_redirect.rs
index 1cbf91d3ae73..9be29c38c938 100644
--- a/proxy/src/auth/backend/console_redirect.rs
+++ b/proxy/src/auth/backend/console_redirect.rs
@@ -7,8 +7,8 @@ use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, info_span};
 
-use super::{ComputeCredentialKeys, ControlPlaneApi};
-use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
+use super::ComputeCredentialKeys;
+use crate::auth::backend::ComputeUserInfo;
 use crate::auth::IpPattern;
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
@@ -84,26 +84,15 @@ pub(crate) fn new_psql_session_id() -> String {
     hex::encode(rand::random::<[u8; 8]>())
 }
 
-#[async_trait]
-impl BackendIpAllowlist for ConsoleRedirectBackend {
-    async fn get_allowed_ips(
-        &self,
-        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> auth::Result<Vec<auth::IpPattern>> {
-        self.api
-            .get_allowed_ips_and_secret(ctx, user_info)
-            .await
-            .map(|(ips, _)| ips.as_ref().clone())
-            .map_err(|e| e.into())
-    }
-}
-
 impl ConsoleRedirectBackend {
     pub fn new(console_uri: reqwest::Url, api: cplane_proxy_v1::NeonControlPlaneClient) -> Self {
         Self { console_uri, api }
     }
 
+    pub(crate) fn get_api(&self) -> &cplane_proxy_v1::NeonControlPlaneClient {
+        &self.api
+    }
+
     pub(crate) async fn authenticate(
         &self,
         ctx: &RequestContext,
@@ -191,6 +180,15 @@ async fn authenticate(
         }
     }
 
+    // Check if the access over the public internet is allowed, otherwise block. Note that
+    // the console redirect is not behind the VPC service endpoint, so we don't need to check
+    // the VPC endpoint ID.
+    if let Some(public_access_allowed) = db_info.public_access_allowed {
+        if !public_access_allowed {
+            return Err(auth::AuthError::NetworkNotAllowed);
+        }
+    }
+
     client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;
 
     // This config should be self-contained, because we won't
diff --git a/proxy/src/auth/backend/jwt.rs b/proxy/src/auth/backend/jwt.rs
index df716f8455f0..e05a693cee27 100644
--- a/proxy/src/auth/backend/jwt.rs
+++ b/proxy/src/auth/backend/jwt.rs
@@ -4,7 +4,7 @@ use std::sync::Arc;
 use std::time::{Duration, SystemTime};
 
 use arc_swap::ArcSwapOption;
-use dashmap::DashMap;
+use clashmap::ClashMap;
 use jose_jwk::crypto::KeyInfo;
 use reqwest::{redirect, Client};
 use reqwest_retry::policies::ExponentialBackoff;
@@ -64,7 +64,7 @@ pub(crate) struct AuthRule {
 pub struct JwkCache {
     client: reqwest_middleware::ClientWithMiddleware,
 
-    map: DashMap<(EndpointId, RoleName), Arc<JwkCacheEntryLock>>,
+    map: ClashMap<(EndpointId, RoleName), Arc<JwkCacheEntryLock>>,
 }
 
 pub(crate) struct JwkCacheEntry {
@@ -469,7 +469,7 @@ impl Default for JwkCache {
 
         JwkCache {
             client,
-            map: DashMap::default(),
+            map: ClashMap::default(),
         }
     }
 }
diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs
index d17d91a56d96..7ef096207aed 100644
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -26,10 +26,12 @@ use crate::context::RequestContext;
 use crate::control_plane::client::ControlPlaneClient;
 use crate::control_plane::errors::GetAuthInfoError;
 use crate::control_plane::{
-    self, AuthSecret, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi,
+    self, AccessBlockerFlags, AuthSecret, CachedAccessBlockerFlags, CachedAllowedIps,
+    CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi,
 };
 use crate::intern::EndpointIdInt;
 use crate::metrics::Metrics;
+use crate::protocol2::ConnectionInfoExtra;
 use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::proxy::NeonOptions;
 use crate::rate_limiter::{BucketRateLimiter, EndpointRateLimiter};
@@ -99,6 +101,13 @@ impl<T> Backend<'_, T> {
             Self::Local(l) => Backend::Local(MaybeOwned::Borrowed(l)),
         }
     }
+
+    pub(crate) fn get_api(&self) -> &ControlPlaneClient {
+        match self {
+            Self::ControlPlane(api, _) => api,
+            Self::Local(_) => panic!("Local backend has no API"),
+        }
+    }
 }
 
 impl<'a, T> Backend<'a, T> {
@@ -247,15 +256,6 @@ impl AuthenticationConfig {
     }
 }
 
-#[async_trait::async_trait]
-pub(crate) trait BackendIpAllowlist {
-    async fn get_allowed_ips(
-        &self,
-        ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> auth::Result<Vec<auth::IpPattern>>;
-}
-
 /// True to its name, this function encapsulates our current auth trade-offs.
 /// Here, we choose the appropriate auth flow based on circumstances.
 ///
@@ -282,23 +282,51 @@ async fn auth_quirks(
         Ok(info) => (info, None),
     };
 
-    debug!("fetching user's authentication info");
-    let (allowed_ips, maybe_secret) = api.get_allowed_ips_and_secret(ctx, &info).await?;
+    debug!("fetching authentication info and allowlists");
 
     // check allowed list
-    if config.ip_allowlist_check_enabled
-        && !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
-    {
-        return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
+    let allowed_ips = if config.ip_allowlist_check_enabled {
+        let allowed_ips = api.get_allowed_ips(ctx, &info).await?;
+        if !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips) {
+            return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
+        }
+        allowed_ips
+    } else {
+        Cached::new_uncached(Arc::new(vec![]))
+    };
+
+    // check if a VPC endpoint ID is coming in and if yes, if it's allowed
+    let access_blocks = api.get_block_public_or_vpc_access(ctx, &info).await?;
+    if config.is_vpc_acccess_proxy {
+        if access_blocks.vpc_access_blocked {
+            return Err(AuthError::NetworkNotAllowed);
+        }
+
+        let incoming_vpc_endpoint_id = match ctx.extra() {
+            None => return Err(AuthError::MissingEndpointName),
+            Some(ConnectionInfoExtra::Aws { vpce_id }) => {
+                // Convert the vcpe_id to a string
+                String::from_utf8(vpce_id.to_vec()).unwrap_or_default()
+            }
+            Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
+        };
+        let allowed_vpc_endpoint_ids = api.get_allowed_vpc_endpoint_ids(ctx, &info).await?;
+        // TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
+        if !allowed_vpc_endpoint_ids.is_empty()
+            && !allowed_vpc_endpoint_ids.contains(&incoming_vpc_endpoint_id)
+        {
+            return Err(AuthError::vpc_endpoint_id_not_allowed(
+                incoming_vpc_endpoint_id,
+            ));
+        }
+    } else if access_blocks.public_access_blocked {
+        return Err(AuthError::NetworkNotAllowed);
     }
 
     if !endpoint_rate_limiter.check(info.endpoint.clone().into(), 1) {
         return Err(AuthError::too_many_connections());
     }
-    let cached_secret = match maybe_secret {
-        Some(secret) => secret,
-        None => api.get_role_secret(ctx, &info).await?,
-    };
+    let cached_secret = api.get_role_secret(ctx, &info).await?;
     let (cached_entry, secret) = cached_secret.take_value();
 
     let secret = if let Some(secret) = secret {
@@ -440,34 +468,38 @@ impl Backend<'_, ComputeUserInfo> {
         }
     }
 
-    pub(crate) async fn get_allowed_ips_and_secret(
+    pub(crate) async fn get_allowed_ips(
+        &self,
+        ctx: &RequestContext,
+    ) -> Result<CachedAllowedIps, GetAuthInfoError> {
+        match self {
+            Self::ControlPlane(api, user_info) => api.get_allowed_ips(ctx, user_info).await,
+            Self::Local(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
+        }
+    }
+
+    pub(crate) async fn get_allowed_vpc_endpoint_ids(
         &self,
         ctx: &RequestContext,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
+    ) -> Result<CachedAllowedVpcEndpointIds, GetAuthInfoError> {
         match self {
             Self::ControlPlane(api, user_info) => {
-                api.get_allowed_ips_and_secret(ctx, user_info).await
+                api.get_allowed_vpc_endpoint_ids(ctx, user_info).await
             }
-            Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
+            Self::Local(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
         }
     }
-}
 
-#[async_trait::async_trait]
-impl BackendIpAllowlist for Backend<'_, ()> {
-    async fn get_allowed_ips(
+    pub(crate) async fn get_block_public_or_vpc_access(
         &self,
         ctx: &RequestContext,
-        user_info: &ComputeUserInfo,
-    ) -> auth::Result<Vec<auth::IpPattern>> {
-        let auth_data = match self {
-            Self::ControlPlane(api, ()) => api.get_allowed_ips_and_secret(ctx, user_info).await,
-            Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
-        };
-
-        auth_data
-            .map(|(ips, _)| ips.as_ref().clone())
-            .map_err(|e| e.into())
+    ) -> Result<CachedAccessBlockerFlags, GetAuthInfoError> {
+        match self {
+            Self::ControlPlane(api, user_info) => {
+                api.get_block_public_or_vpc_access(ctx, user_info).await
+            }
+            Self::Local(_) => Ok(Cached::new_uncached(AccessBlockerFlags::default())),
+        }
     }
 }
 
@@ -514,7 +546,10 @@ mod tests {
     use crate::auth::{ComputeUserInfoMaybeEndpoint, IpPattern};
     use crate::config::AuthenticationConfig;
     use crate::context::RequestContext;
-    use crate::control_plane::{self, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret};
+    use crate::control_plane::{
+        self, AccessBlockerFlags, CachedAccessBlockerFlags, CachedAllowedIps,
+        CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret,
+    };
     use crate::proxy::NeonOptions;
     use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo};
     use crate::scram::threadpool::ThreadPool;
@@ -523,6 +558,8 @@ mod tests {
 
     struct Auth {
         ips: Vec<IpPattern>,
+        vpc_endpoint_ids: Vec<String>,
+        access_blocker_flags: AccessBlockerFlags,
         secret: AuthSecret,
     }
 
@@ -535,17 +572,31 @@ mod tests {
             Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone())))
         }
 
-        async fn get_allowed_ips_and_secret(
+        async fn get_allowed_ips(
+            &self,
+            _ctx: &RequestContext,
+            _user_info: &super::ComputeUserInfo,
+        ) -> Result<CachedAllowedIps, control_plane::errors::GetAuthInfoError> {
+            Ok(CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())))
+        }
+
+        async fn get_allowed_vpc_endpoint_ids(
+            &self,
+            _ctx: &RequestContext,
+            _user_info: &super::ComputeUserInfo,
+        ) -> Result<CachedAllowedVpcEndpointIds, control_plane::errors::GetAuthInfoError> {
+            Ok(CachedAllowedVpcEndpointIds::new_uncached(Arc::new(
+                self.vpc_endpoint_ids.clone(),
+            )))
+        }
+
+        async fn get_block_public_or_vpc_access(
             &self,
             _ctx: &RequestContext,
             _user_info: &super::ComputeUserInfo,
-        ) -> Result<
-            (CachedAllowedIps, Option<CachedRoleSecret>),
-            control_plane::errors::GetAuthInfoError,
-        > {
-            Ok((
-                CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())),
-                Some(CachedRoleSecret::new_uncached(Some(self.secret.clone()))),
+        ) -> Result<CachedAccessBlockerFlags, control_plane::errors::GetAuthInfoError> {
+            Ok(CachedAccessBlockerFlags::new_uncached(
+                self.access_blocker_flags.clone(),
             ))
         }
 
@@ -575,6 +626,7 @@ mod tests {
         rate_limiter: AuthRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET),
         rate_limit_ip_subnet: 64,
         ip_allowlist_check_enabled: true,
+        is_vpc_acccess_proxy: false,
         is_auth_broker: false,
         accept_jwts: false,
         console_redirect_confirmation_timeout: std::time::Duration::from_secs(5),
@@ -642,6 +694,8 @@ mod tests {
         let ctx = RequestContext::test();
         let api = Auth {
             ips: vec![],
+            vpc_endpoint_ids: vec![],
+            access_blocker_flags: AccessBlockerFlags::default(),
             secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
         };
 
@@ -722,6 +776,8 @@ mod tests {
         let ctx = RequestContext::test();
         let api = Auth {
             ips: vec![],
+            vpc_endpoint_ids: vec![],
+            access_blocker_flags: AccessBlockerFlags::default(),
             secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
         };
 
@@ -774,6 +830,8 @@ mod tests {
         let ctx = RequestContext::test();
         let api = Auth {
             ips: vec![],
+            vpc_endpoint_ids: vec![],
+            access_blocker_flags: AccessBlockerFlags::default(),
             secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
         };
 
diff --git a/proxy/src/auth/mod.rs b/proxy/src/auth/mod.rs
index 0198cc306e08..6082695a6b1b 100644
--- a/proxy/src/auth/mod.rs
+++ b/proxy/src/auth/mod.rs
@@ -55,6 +55,12 @@ pub(crate) enum AuthError {
     )]
     MissingEndpointName,
 
+    #[error(
+        "VPC endpoint ID is not specified. \
+        This endpoint requires a VPC endpoint ID to connect."
+    )]
+    MissingVPCEndpointId,
+
     #[error("password authentication failed for user '{0}'")]
     PasswordFailed(Box<str>),
 
@@ -69,6 +75,15 @@ pub(crate) enum AuthError {
     )]
     IpAddressNotAllowed(IpAddr),
 
+    #[error("This connection is trying to access this endpoint from a blocked network.")]
+    NetworkNotAllowed,
+
+    #[error(
+        "This VPC endpoint id {0} is not allowed to connect to this endpoint. \
+        Please add it to the allowed list in the Neon console."
+    )]
+    VpcEndpointIdNotAllowed(String),
+
     #[error("Too many connections to this endpoint. Please try again later.")]
     TooManyConnections,
 
@@ -95,6 +110,10 @@ impl AuthError {
         AuthError::IpAddressNotAllowed(ip)
     }
 
+    pub(crate) fn vpc_endpoint_id_not_allowed(id: String) -> Self {
+        AuthError::VpcEndpointIdNotAllowed(id)
+    }
+
     pub(crate) fn too_many_connections() -> Self {
         AuthError::TooManyConnections
     }
@@ -122,8 +141,11 @@ impl UserFacingError for AuthError {
             Self::BadAuthMethod(_) => self.to_string(),
             Self::MalformedPassword(_) => self.to_string(),
             Self::MissingEndpointName => self.to_string(),
+            Self::MissingVPCEndpointId => self.to_string(),
             Self::Io(_) => "Internal error".to_string(),
             Self::IpAddressNotAllowed(_) => self.to_string(),
+            Self::NetworkNotAllowed => self.to_string(),
+            Self::VpcEndpointIdNotAllowed(_) => self.to_string(),
             Self::TooManyConnections => self.to_string(),
             Self::UserTimeout(_) => self.to_string(),
             Self::ConfirmationTimeout(_) => self.to_string(),
@@ -142,8 +164,11 @@ impl ReportableError for AuthError {
             Self::BadAuthMethod(_) => crate::error::ErrorKind::User,
             Self::MalformedPassword(_) => crate::error::ErrorKind::User,
             Self::MissingEndpointName => crate::error::ErrorKind::User,
+            Self::MissingVPCEndpointId => crate::error::ErrorKind::User,
             Self::Io(_) => crate::error::ErrorKind::ClientDisconnect,
             Self::IpAddressNotAllowed(_) => crate::error::ErrorKind::User,
+            Self::NetworkNotAllowed => crate::error::ErrorKind::User,
+            Self::VpcEndpointIdNotAllowed(_) => crate::error::ErrorKind::User,
             Self::TooManyConnections => crate::error::ErrorKind::RateLimit,
             Self::UserTimeout(_) => crate::error::ErrorKind::User,
             Self::ConfirmationTimeout(_) => crate::error::ErrorKind::User,
diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs
index ee8b3d4ef579..7a855bf54b41 100644
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -284,6 +284,7 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
             rate_limiter: BucketRateLimiter::new(vec![]),
             rate_limit_ip_subnet: 64,
             ip_allowlist_check_enabled: true,
+            is_vpc_acccess_proxy: false,
             is_auth_broker: false,
             accept_jwts: true,
             console_redirect_confirmation_timeout: Duration::ZERO,
diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs
index e1affe8391a6..de685a82c627 100644
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -630,6 +630,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
         rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
         rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
         ip_allowlist_check_enabled: !args.is_private_access_proxy,
+        is_vpc_acccess_proxy: args.is_private_access_proxy,
         is_auth_broker: args.is_auth_broker,
         accept_jwts: args.is_auth_broker,
         console_redirect_confirmation_timeout: args.webauth_confirmation_timeout,
diff --git a/proxy/src/cache/endpoints.rs b/proxy/src/cache/endpoints.rs
index 0136446d6dfb..b5c42cd23db7 100644
--- a/proxy/src/cache/endpoints.rs
+++ b/proxy/src/cache/endpoints.rs
@@ -3,7 +3,7 @@ use std::future::pending;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::{Arc, Mutex};
 
-use dashmap::DashSet;
+use clashmap::ClashSet;
 use redis::streams::{StreamReadOptions, StreamReadReply};
 use redis::{AsyncCommands, FromRedisValue, Value};
 use serde::Deserialize;
@@ -55,9 +55,9 @@ impl TryFrom<&Value> for ControlPlaneEvent {
 
 pub struct EndpointsCache {
     config: EndpointCacheConfig,
-    endpoints: DashSet<EndpointIdInt>,
-    branches: DashSet<BranchIdInt>,
-    projects: DashSet<ProjectIdInt>,
+    endpoints: ClashSet<EndpointIdInt>,
+    branches: ClashSet<BranchIdInt>,
+    projects: ClashSet<ProjectIdInt>,
     ready: AtomicBool,
     limiter: Arc<Mutex<GlobalRateLimiter>>,
 }
@@ -69,9 +69,9 @@ impl EndpointsCache {
                 config.limiter_info.clone(),
             ))),
             config,
-            endpoints: DashSet::new(),
-            branches: DashSet::new(),
-            projects: DashSet::new(),
+            endpoints: ClashSet::new(),
+            branches: ClashSet::new(),
+            projects: ClashSet::new(),
             ready: AtomicBool::new(false),
         }
     }
diff --git a/proxy/src/cache/project_info.rs b/proxy/src/cache/project_info.rs
index cab0b8b90594..7651eb71a2e0 100644
--- a/proxy/src/cache/project_info.rs
+++ b/proxy/src/cache/project_info.rs
@@ -5,7 +5,7 @@ use std::sync::Arc;
 use std::time::Duration;
 
 use async_trait::async_trait;
-use dashmap::DashMap;
+use clashmap::ClashMap;
 use rand::{thread_rng, Rng};
 use smol_str::SmolStr;
 use tokio::sync::Mutex;
@@ -15,13 +15,16 @@ use tracing::{debug, info};
 use super::{Cache, Cached};
 use crate::auth::IpPattern;
 use crate::config::ProjectInfoCacheOptions;
-use crate::control_plane::AuthSecret;
-use crate::intern::{EndpointIdInt, ProjectIdInt, RoleNameInt};
+use crate::control_plane::{AccessBlockerFlags, AuthSecret};
+use crate::intern::{AccountIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
 use crate::types::{EndpointId, RoleName};
 
 #[async_trait]
 pub(crate) trait ProjectInfoCache {
     fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt);
+    fn invalidate_allowed_vpc_endpoint_ids_for_projects(&self, project_ids: Vec<ProjectIdInt>);
+    fn invalidate_allowed_vpc_endpoint_ids_for_org(&self, account_id: AccountIdInt);
+    fn invalidate_block_public_or_vpc_access_for_project(&self, project_id: ProjectIdInt);
     fn invalidate_role_secret_for_project(&self, project_id: ProjectIdInt, role_name: RoleNameInt);
     async fn decrement_active_listeners(&self);
     async fn increment_active_listeners(&self);
@@ -51,6 +54,8 @@ impl<T> From<T> for Entry<T> {
 struct EndpointInfo {
     secret: std::collections::HashMap<RoleNameInt, Entry<Option<AuthSecret>>>,
     allowed_ips: Option<Entry<Arc<Vec<IpPattern>>>>,
+    block_public_or_vpc_access: Option<Entry<AccessBlockerFlags>>,
+    allowed_vpc_endpoint_ids: Option<Entry<Arc<Vec<String>>>>,
 }
 
 impl EndpointInfo {
@@ -92,9 +97,52 @@ impl EndpointInfo {
         }
         None
     }
+    pub(crate) fn get_allowed_vpc_endpoint_ids(
+        &self,
+        valid_since: Instant,
+        ignore_cache_since: Option<Instant>,
+    ) -> Option<(Arc<Vec<String>>, bool)> {
+        if let Some(allowed_vpc_endpoint_ids) = &self.allowed_vpc_endpoint_ids {
+            if valid_since < allowed_vpc_endpoint_ids.created_at {
+                return Some((
+                    allowed_vpc_endpoint_ids.value.clone(),
+                    Self::check_ignore_cache(
+                        ignore_cache_since,
+                        allowed_vpc_endpoint_ids.created_at,
+                    ),
+                ));
+            }
+        }
+        None
+    }
+    pub(crate) fn get_block_public_or_vpc_access(
+        &self,
+        valid_since: Instant,
+        ignore_cache_since: Option<Instant>,
+    ) -> Option<(AccessBlockerFlags, bool)> {
+        if let Some(block_public_or_vpc_access) = &self.block_public_or_vpc_access {
+            if valid_since < block_public_or_vpc_access.created_at {
+                return Some((
+                    block_public_or_vpc_access.value.clone(),
+                    Self::check_ignore_cache(
+                        ignore_cache_since,
+                        block_public_or_vpc_access.created_at,
+                    ),
+                ));
+            }
+        }
+        None
+    }
+
     pub(crate) fn invalidate_allowed_ips(&mut self) {
         self.allowed_ips = None;
     }
+    pub(crate) fn invalidate_allowed_vpc_endpoint_ids(&mut self) {
+        self.allowed_vpc_endpoint_ids = None;
+    }
+    pub(crate) fn invalidate_block_public_or_vpc_access(&mut self) {
+        self.block_public_or_vpc_access = None;
+    }
     pub(crate) fn invalidate_role_secret(&mut self, role_name: RoleNameInt) {
         self.secret.remove(&role_name);
     }
@@ -108,9 +156,11 @@ impl EndpointInfo {
 /// One may ask, why the data is stored per project, when on the user request there is only data about the endpoint available?
 /// On the cplane side updates are done per project (or per branch), so it's easier to invalidate the whole project cache.
 pub struct ProjectInfoCacheImpl {
-    cache: DashMap<EndpointIdInt, EndpointInfo>,
+    cache: ClashMap<EndpointIdInt, EndpointInfo>,
 
-    project2ep: DashMap<ProjectIdInt, HashSet<EndpointIdInt>>,
+    project2ep: ClashMap<ProjectIdInt, HashSet<EndpointIdInt>>,
+    // FIXME(stefan): we need a way to GC the account2ep map.
+    account2ep: ClashMap<AccountIdInt, HashSet<EndpointIdInt>>,
     config: ProjectInfoCacheOptions,
 
     start_time: Instant,
@@ -120,6 +170,63 @@ pub struct ProjectInfoCacheImpl {
 
 #[async_trait]
 impl ProjectInfoCache for ProjectInfoCacheImpl {
+    fn invalidate_allowed_vpc_endpoint_ids_for_projects(&self, project_ids: Vec<ProjectIdInt>) {
+        info!(
+            "invalidating allowed vpc endpoint ids for projects `{}`",
+            project_ids
+                .iter()
+                .map(|id| id.to_string())
+                .collect::<Vec<_>>()
+                .join(", ")
+        );
+        for project_id in project_ids {
+            let endpoints = self
+                .project2ep
+                .get(&project_id)
+                .map(|kv| kv.value().clone())
+                .unwrap_or_default();
+            for endpoint_id in endpoints {
+                if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
+                    endpoint_info.invalidate_allowed_vpc_endpoint_ids();
+                }
+            }
+        }
+    }
+
+    fn invalidate_allowed_vpc_endpoint_ids_for_org(&self, account_id: AccountIdInt) {
+        info!(
+            "invalidating allowed vpc endpoint ids for org `{}`",
+            account_id
+        );
+        let endpoints = self
+            .account2ep
+            .get(&account_id)
+            .map(|kv| kv.value().clone())
+            .unwrap_or_default();
+        for endpoint_id in endpoints {
+            if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
+                endpoint_info.invalidate_allowed_vpc_endpoint_ids();
+            }
+        }
+    }
+
+    fn invalidate_block_public_or_vpc_access_for_project(&self, project_id: ProjectIdInt) {
+        info!(
+            "invalidating block public or vpc access for project `{}`",
+            project_id
+        );
+        let endpoints = self
+            .project2ep
+            .get(&project_id)
+            .map(|kv| kv.value().clone())
+            .unwrap_or_default();
+        for endpoint_id in endpoints {
+            if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
+                endpoint_info.invalidate_block_public_or_vpc_access();
+            }
+        }
+    }
+
     fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt) {
         info!("invalidating allowed ips for project `{}`", project_id);
         let endpoints = self
@@ -176,8 +283,9 @@ impl ProjectInfoCache for ProjectInfoCacheImpl {
 impl ProjectInfoCacheImpl {
     pub(crate) fn new(config: ProjectInfoCacheOptions) -> Self {
         Self {
-            cache: DashMap::new(),
-            project2ep: DashMap::new(),
+            cache: ClashMap::new(),
+            project2ep: ClashMap::new(),
+            account2ep: ClashMap::new(),
             config,
             ttl_disabled_since_us: AtomicU64::new(u64::MAX),
             start_time: Instant::now(),
@@ -226,6 +334,49 @@ impl ProjectInfoCacheImpl {
         }
         Some(Cached::new_uncached(value))
     }
+    pub(crate) fn get_allowed_vpc_endpoint_ids(
+        &self,
+        endpoint_id: &EndpointId,
+    ) -> Option<Cached<&Self, Arc<Vec<String>>>> {
+        let endpoint_id = EndpointIdInt::get(endpoint_id)?;
+        let (valid_since, ignore_cache_since) = self.get_cache_times();
+        let endpoint_info = self.cache.get(&endpoint_id)?;
+        let value = endpoint_info.get_allowed_vpc_endpoint_ids(valid_since, ignore_cache_since);
+        let (value, ignore_cache) = value?;
+        if !ignore_cache {
+            let cached = Cached {
+                token: Some((
+                    self,
+                    CachedLookupInfo::new_allowed_vpc_endpoint_ids(endpoint_id),
+                )),
+                value,
+            };
+            return Some(cached);
+        }
+        Some(Cached::new_uncached(value))
+    }
+    pub(crate) fn get_block_public_or_vpc_access(
+        &self,
+        endpoint_id: &EndpointId,
+    ) -> Option<Cached<&Self, AccessBlockerFlags>> {
+        let endpoint_id = EndpointIdInt::get(endpoint_id)?;
+        let (valid_since, ignore_cache_since) = self.get_cache_times();
+        let endpoint_info = self.cache.get(&endpoint_id)?;
+        let value = endpoint_info.get_block_public_or_vpc_access(valid_since, ignore_cache_since);
+        let (value, ignore_cache) = value?;
+        if !ignore_cache {
+            let cached = Cached {
+                token: Some((
+                    self,
+                    CachedLookupInfo::new_block_public_or_vpc_access(endpoint_id),
+                )),
+                value,
+            };
+            return Some(cached);
+        }
+        Some(Cached::new_uncached(value))
+    }
+
     pub(crate) fn insert_role_secret(
         &self,
         project_id: ProjectIdInt,
@@ -256,6 +407,43 @@ impl ProjectInfoCacheImpl {
         self.insert_project2endpoint(project_id, endpoint_id);
         self.cache.entry(endpoint_id).or_default().allowed_ips = Some(allowed_ips.into());
     }
+    pub(crate) fn insert_allowed_vpc_endpoint_ids(
+        &self,
+        account_id: Option<AccountIdInt>,
+        project_id: ProjectIdInt,
+        endpoint_id: EndpointIdInt,
+        allowed_vpc_endpoint_ids: Arc<Vec<String>>,
+    ) {
+        if self.cache.len() >= self.config.size {
+            // If there are too many entries, wait until the next gc cycle.
+            return;
+        }
+        if let Some(account_id) = account_id {
+            self.insert_account2endpoint(account_id, endpoint_id);
+        }
+        self.insert_project2endpoint(project_id, endpoint_id);
+        self.cache
+            .entry(endpoint_id)
+            .or_default()
+            .allowed_vpc_endpoint_ids = Some(allowed_vpc_endpoint_ids.into());
+    }
+    pub(crate) fn insert_block_public_or_vpc_access(
+        &self,
+        project_id: ProjectIdInt,
+        endpoint_id: EndpointIdInt,
+        access_blockers: AccessBlockerFlags,
+    ) {
+        if self.cache.len() >= self.config.size {
+            // If there are too many entries, wait until the next gc cycle.
+            return;
+        }
+        self.insert_project2endpoint(project_id, endpoint_id);
+        self.cache
+            .entry(endpoint_id)
+            .or_default()
+            .block_public_or_vpc_access = Some(access_blockers.into());
+    }
+
     fn insert_project2endpoint(&self, project_id: ProjectIdInt, endpoint_id: EndpointIdInt) {
         if let Some(mut endpoints) = self.project2ep.get_mut(&project_id) {
             endpoints.insert(endpoint_id);
@@ -264,6 +452,14 @@ impl ProjectInfoCacheImpl {
                 .insert(project_id, HashSet::from([endpoint_id]));
         }
     }
+    fn insert_account2endpoint(&self, account_id: AccountIdInt, endpoint_id: EndpointIdInt) {
+        if let Some(mut endpoints) = self.account2ep.get_mut(&account_id) {
+            endpoints.insert(endpoint_id);
+        } else {
+            self.account2ep
+                .insert(account_id, HashSet::from([endpoint_id]));
+        }
+    }
     fn get_cache_times(&self) -> (Instant, Option<Instant>) {
         let mut valid_since = Instant::now() - self.config.ttl;
         // Only ignore cache if ttl is disabled.
@@ -302,7 +498,7 @@ impl ProjectInfoCacheImpl {
         let mut removed = 0;
         let shard = self.project2ep.shards()[shard].write();
         for (_, endpoints) in shard.iter() {
-            for endpoint in endpoints.get() {
+            for endpoint in endpoints {
                 self.cache.remove(endpoint);
                 removed += 1;
             }
@@ -334,11 +530,25 @@ impl CachedLookupInfo {
             lookup_type: LookupType::AllowedIps,
         }
     }
+    pub(self) fn new_allowed_vpc_endpoint_ids(endpoint_id: EndpointIdInt) -> Self {
+        Self {
+            endpoint_id,
+            lookup_type: LookupType::AllowedVpcEndpointIds,
+        }
+    }
+    pub(self) fn new_block_public_or_vpc_access(endpoint_id: EndpointIdInt) -> Self {
+        Self {
+            endpoint_id,
+            lookup_type: LookupType::BlockPublicOrVpcAccess,
+        }
+    }
 }
 
 enum LookupType {
     RoleSecret(RoleNameInt),
     AllowedIps,
+    AllowedVpcEndpointIds,
+    BlockPublicOrVpcAccess,
 }
 
 impl Cache for ProjectInfoCacheImpl {
@@ -360,6 +570,16 @@ impl Cache for ProjectInfoCacheImpl {
                     endpoint_info.invalidate_allowed_ips();
                 }
             }
+            LookupType::AllowedVpcEndpointIds => {
+                if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
+                    endpoint_info.invalidate_allowed_vpc_endpoint_ids();
+                }
+            }
+            LookupType::BlockPublicOrVpcAccess => {
+                if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
+                    endpoint_info.invalidate_block_public_or_vpc_access();
+                }
+            }
         }
     }
 }
diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs
index 34f708a36b90..4d919f374a2d 100644
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -1,3 +1,4 @@
+use std::convert::Infallible;
 use std::net::{IpAddr, SocketAddr};
 use std::sync::Arc;
 
@@ -8,23 +9,22 @@ use pq_proto::CancelKeyData;
 use serde::{Deserialize, Serialize};
 use thiserror::Error;
 use tokio::net::TcpStream;
-use tokio::sync::mpsc;
+use tokio::sync::{mpsc, oneshot};
 use tracing::{debug, info};
 
-use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
+use crate::auth::backend::ComputeUserInfo;
 use crate::auth::{check_peer_addr_is_in_list, AuthError};
 use crate::config::ComputeConfig;
 use crate::context::RequestContext;
+use crate::control_plane::ControlPlaneApi;
 use crate::error::ReportableError;
 use crate::ext::LockExt;
-use crate::metrics::CancelChannelSizeGuard;
-use crate::metrics::{CancellationRequest, Metrics, RedisMsgKind};
+use crate::metrics::{CancelChannelSizeGuard, CancellationRequest, Metrics, RedisMsgKind};
+use crate::protocol2::ConnectionInfoExtra;
 use crate::rate_limiter::LeakyBucketRateLimiter;
 use crate::redis::keys::KeyPrefix;
 use crate::redis::kv_ops::RedisKVClient;
 use crate::tls::postgres_rustls::MakeRustlsConnect;
-use std::convert::Infallible;
-use tokio::sync::oneshot;
 
 type IpSubnetKey = IpNet;
 
@@ -135,6 +135,9 @@ pub(crate) enum CancelError {
     #[error("IP is not allowed")]
     IpNotAllowed,
 
+    #[error("VPC endpoint id is not allowed to connect")]
+    VpcEndpointIdNotAllowed,
+
     #[error("Authentication backend error")]
     AuthError(#[from] AuthError),
 
@@ -154,8 +157,9 @@ impl ReportableError for CancelError {
             }
             CancelError::Postgres(_) => crate::error::ErrorKind::Compute,
             CancelError::RateLimit => crate::error::ErrorKind::RateLimit,
-            CancelError::IpNotAllowed => crate::error::ErrorKind::User,
-            CancelError::NotFound => crate::error::ErrorKind::User,
+            CancelError::IpNotAllowed
+            | CancelError::VpcEndpointIdNotAllowed
+            | CancelError::NotFound => crate::error::ErrorKind::User,
             CancelError::AuthError(_) => crate::error::ErrorKind::ControlPlane,
             CancelError::InternalError => crate::error::ErrorKind::Service,
         }
@@ -267,11 +271,12 @@ impl CancellationHandler {
     /// Will fetch IP allowlist internally.
     ///
     /// return Result primarily for tests
-    pub(crate) async fn cancel_session<T: BackendIpAllowlist>(
+    pub(crate) async fn cancel_session<T: ControlPlaneApi>(
         &self,
         key: CancelKeyData,
         ctx: RequestContext,
-        check_allowed: bool,
+        check_ip_allowed: bool,
+        check_vpc_allowed: bool,
         auth_backend: &T,
     ) -> Result<(), CancelError> {
         let subnet_key = match ctx.peer_addr() {
@@ -306,11 +311,11 @@ impl CancellationHandler {
             return Err(CancelError::NotFound);
         };
 
-        if check_allowed {
+        if check_ip_allowed {
             let ip_allowlist = auth_backend
                 .get_allowed_ips(&ctx, &cancel_closure.user_info)
                 .await
-                .map_err(CancelError::AuthError)?;
+                .map_err(|e| CancelError::AuthError(e.into()))?;
 
             if !check_peer_addr_is_in_list(&ctx.peer_addr(), &ip_allowlist) {
                 // log it here since cancel_session could be spawned in a task
@@ -322,6 +327,40 @@ impl CancellationHandler {
             }
         }
 
+        // check if a VPC endpoint ID is coming in and if yes, if it's allowed
+        let access_blocks = auth_backend
+            .get_block_public_or_vpc_access(&ctx, &cancel_closure.user_info)
+            .await
+            .map_err(|e| CancelError::AuthError(e.into()))?;
+
+        if check_vpc_allowed {
+            if access_blocks.vpc_access_blocked {
+                return Err(CancelError::AuthError(AuthError::NetworkNotAllowed));
+            }
+
+            let incoming_vpc_endpoint_id = match ctx.extra() {
+                None => return Err(CancelError::AuthError(AuthError::MissingVPCEndpointId)),
+                Some(ConnectionInfoExtra::Aws { vpce_id }) => {
+                    // Convert the vcpe_id to a string
+                    String::from_utf8(vpce_id.to_vec()).unwrap_or_default()
+                }
+                Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
+            };
+
+            let allowed_vpc_endpoint_ids = auth_backend
+                .get_allowed_vpc_endpoint_ids(&ctx, &cancel_closure.user_info)
+                .await
+                .map_err(|e| CancelError::AuthError(e.into()))?;
+            // TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
+            if !allowed_vpc_endpoint_ids.is_empty()
+                && !allowed_vpc_endpoint_ids.contains(&incoming_vpc_endpoint_id)
+            {
+                return Err(CancelError::VpcEndpointIdNotAllowed);
+            }
+        } else if access_blocks.public_access_blocked {
+            return Err(CancelError::VpcEndpointIdNotAllowed);
+        }
+
         Metrics::get()
             .proxy
             .cancellation_requests_total
diff --git a/proxy/src/config.rs b/proxy/src/config.rs
index 8502edcfab09..1dcd37712ea2 100644
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -68,6 +68,7 @@ pub struct AuthenticationConfig {
     pub rate_limiter: AuthRateLimiter,
     pub rate_limit_ip_subnet: u8,
     pub ip_allowlist_check_enabled: bool,
+    pub is_vpc_acccess_proxy: bool,
     pub jwks_cache: JwkCache,
     pub is_auth_broker: bool,
     pub accept_jwts: bool,
diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs
index 78bfb6deacc3..c4548a7ddd95 100644
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -182,7 +182,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
                             cancel_key_data,
                             ctx,
                             config.authentication_config.ip_allowlist_check_enabled,
-                            backend,
+                            config.authentication_config.is_vpc_acccess_proxy,
+                            backend.get_api(),
                         )
                         .await
                         .inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs
index a9fb513d3ceb..3236b2e1bfb0 100644
--- a/proxy/src/context/mod.rs
+++ b/proxy/src/context/mod.rs
@@ -19,7 +19,7 @@ use crate::intern::{BranchIdInt, ProjectIdInt};
 use crate::metrics::{
     ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol, Waiting,
 };
-use crate::protocol2::ConnectionInfo;
+use crate::protocol2::{ConnectionInfo, ConnectionInfoExtra};
 use crate::types::{DbName, EndpointId, RoleName};
 
 pub mod parquet;
@@ -312,6 +312,15 @@ impl RequestContext {
             .ip()
     }
 
+    pub(crate) fn extra(&self) -> Option<ConnectionInfoExtra> {
+        self.0
+            .try_lock()
+            .expect("should not deadlock")
+            .conn_info
+            .extra
+            .clone()
+    }
+
     pub(crate) fn cold_start_info(&self) -> ColdStartInfo {
         self.0
             .try_lock()
diff --git a/proxy/src/control_plane/client/cplane_proxy_v1.rs b/proxy/src/control_plane/client/cplane_proxy_v1.rs
index ece03156d1fa..ef6621fc598a 100644
--- a/proxy/src/control_plane/client/cplane_proxy_v1.rs
+++ b/proxy/src/control_plane/client/cplane_proxy_v1.rs
@@ -22,7 +22,8 @@ use crate::control_plane::errors::{
 use crate::control_plane::locks::ApiLocks;
 use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, Reason};
 use crate::control_plane::{
-    AuthInfo, AuthSecret, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, NodeInfo,
+    AccessBlockerFlags, AuthInfo, AuthSecret, CachedAccessBlockerFlags, CachedAllowedIps,
+    CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret, NodeInfo,
 };
 use crate::metrics::{CacheOutcome, Metrics};
 use crate::rate_limiter::WakeComputeRateLimiter;
@@ -137,9 +138,6 @@ impl NeonControlPlaneClient {
                 }
             };
 
-            // Ivan: don't know where it will be used, so I leave it here
-            let _endpoint_vpc_ids = body.allowed_vpc_endpoint_ids.unwrap_or_default();
-
             let secret = if body.role_secret.is_empty() {
                 None
             } else {
@@ -153,10 +151,23 @@ impl NeonControlPlaneClient {
                 .proxy
                 .allowed_ips_number
                 .observe(allowed_ips.len() as f64);
+            let allowed_vpc_endpoint_ids = body.allowed_vpc_endpoint_ids.unwrap_or_default();
+            Metrics::get()
+                .proxy
+                .allowed_vpc_endpoint_ids
+                .observe(allowed_vpc_endpoint_ids.len() as f64);
+            let block_public_connections = body.block_public_connections.unwrap_or_default();
+            let block_vpc_connections = body.block_vpc_connections.unwrap_or_default();
             Ok(AuthInfo {
                 secret,
                 allowed_ips,
+                allowed_vpc_endpoint_ids,
                 project_id: body.project_id,
+                account_id: body.account_id,
+                access_blocker_flags: AccessBlockerFlags {
+                    public_access_blocked: block_public_connections,
+                    vpc_access_blocked: block_vpc_connections,
+                },
             })
         }
         .inspect_err(|e| tracing::debug!(error = ?e))
@@ -299,6 +310,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
             return Ok(role_secret);
         }
         let auth_info = self.do_get_auth_info(ctx, user_info).await?;
+        let account_id = auth_info.account_id;
         if let Some(project_id) = auth_info.project_id {
             let normalized_ep_int = normalized_ep.into();
             self.caches.project_info.insert_role_secret(
@@ -312,24 +324,35 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
                 normalized_ep_int,
                 Arc::new(auth_info.allowed_ips),
             );
+            self.caches.project_info.insert_allowed_vpc_endpoint_ids(
+                account_id,
+                project_id,
+                normalized_ep_int,
+                Arc::new(auth_info.allowed_vpc_endpoint_ids),
+            );
+            self.caches.project_info.insert_block_public_or_vpc_access(
+                project_id,
+                normalized_ep_int,
+                auth_info.access_blocker_flags,
+            );
             ctx.set_project_id(project_id);
         }
         // When we just got a secret, we don't need to invalidate it.
         Ok(Cached::new_uncached(auth_info.secret))
     }
 
-    async fn get_allowed_ips_and_secret(
+    async fn get_allowed_ips(
         &self,
         ctx: &RequestContext,
         user_info: &ComputeUserInfo,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
+    ) -> Result<CachedAllowedIps, GetAuthInfoError> {
         let normalized_ep = &user_info.endpoint.normalize();
         if let Some(allowed_ips) = self.caches.project_info.get_allowed_ips(normalized_ep) {
             Metrics::get()
                 .proxy
-                .allowed_ips_cache_misses
+                .allowed_ips_cache_misses // TODO SR: Should we rename this variable to something like allowed_ip_cache_stats?
                 .inc(CacheOutcome::Hit);
-            return Ok((allowed_ips, None));
+            return Ok(allowed_ips);
         }
         Metrics::get()
             .proxy
@@ -337,7 +360,10 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
             .inc(CacheOutcome::Miss);
         let auth_info = self.do_get_auth_info(ctx, user_info).await?;
         let allowed_ips = Arc::new(auth_info.allowed_ips);
+        let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
+        let access_blocker_flags = auth_info.access_blocker_flags;
         let user = &user_info.user;
+        let account_id = auth_info.account_id;
         if let Some(project_id) = auth_info.project_id {
             let normalized_ep_int = normalized_ep.into();
             self.caches.project_info.insert_role_secret(
@@ -351,12 +377,136 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
                 normalized_ep_int,
                 allowed_ips.clone(),
             );
+            self.caches.project_info.insert_allowed_vpc_endpoint_ids(
+                account_id,
+                project_id,
+                normalized_ep_int,
+                allowed_vpc_endpoint_ids.clone(),
+            );
+            self.caches.project_info.insert_block_public_or_vpc_access(
+                project_id,
+                normalized_ep_int,
+                access_blocker_flags,
+            );
+            ctx.set_project_id(project_id);
+        }
+        Ok(Cached::new_uncached(allowed_ips))
+    }
+
+    async fn get_allowed_vpc_endpoint_ids(
+        &self,
+        ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> Result<CachedAllowedVpcEndpointIds, GetAuthInfoError> {
+        let normalized_ep = &user_info.endpoint.normalize();
+        if let Some(allowed_vpc_endpoint_ids) = self
+            .caches
+            .project_info
+            .get_allowed_vpc_endpoint_ids(normalized_ep)
+        {
+            Metrics::get()
+                .proxy
+                .vpc_endpoint_id_cache_stats
+                .inc(CacheOutcome::Hit);
+            return Ok(allowed_vpc_endpoint_ids);
+        }
+
+        Metrics::get()
+            .proxy
+            .vpc_endpoint_id_cache_stats
+            .inc(CacheOutcome::Miss);
+
+        let auth_info = self.do_get_auth_info(ctx, user_info).await?;
+        let allowed_ips = Arc::new(auth_info.allowed_ips);
+        let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
+        let access_blocker_flags = auth_info.access_blocker_flags;
+        let user = &user_info.user;
+        let account_id = auth_info.account_id;
+        if let Some(project_id) = auth_info.project_id {
+            let normalized_ep_int = normalized_ep.into();
+            self.caches.project_info.insert_role_secret(
+                project_id,
+                normalized_ep_int,
+                user.into(),
+                auth_info.secret.clone(),
+            );
+            self.caches.project_info.insert_allowed_ips(
+                project_id,
+                normalized_ep_int,
+                allowed_ips.clone(),
+            );
+            self.caches.project_info.insert_allowed_vpc_endpoint_ids(
+                account_id,
+                project_id,
+                normalized_ep_int,
+                allowed_vpc_endpoint_ids.clone(),
+            );
+            self.caches.project_info.insert_block_public_or_vpc_access(
+                project_id,
+                normalized_ep_int,
+                access_blocker_flags,
+            );
+            ctx.set_project_id(project_id);
+        }
+        Ok(Cached::new_uncached(allowed_vpc_endpoint_ids))
+    }
+
+    async fn get_block_public_or_vpc_access(
+        &self,
+        ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> Result<CachedAccessBlockerFlags, GetAuthInfoError> {
+        let normalized_ep = &user_info.endpoint.normalize();
+        if let Some(access_blocker_flags) = self
+            .caches
+            .project_info
+            .get_block_public_or_vpc_access(normalized_ep)
+        {
+            Metrics::get()
+                .proxy
+                .access_blocker_flags_cache_stats
+                .inc(CacheOutcome::Hit);
+            return Ok(access_blocker_flags);
+        }
+
+        Metrics::get()
+            .proxy
+            .access_blocker_flags_cache_stats
+            .inc(CacheOutcome::Miss);
+
+        let auth_info = self.do_get_auth_info(ctx, user_info).await?;
+        let allowed_ips = Arc::new(auth_info.allowed_ips);
+        let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
+        let access_blocker_flags = auth_info.access_blocker_flags;
+        let user = &user_info.user;
+        let account_id = auth_info.account_id;
+        if let Some(project_id) = auth_info.project_id {
+            let normalized_ep_int = normalized_ep.into();
+            self.caches.project_info.insert_role_secret(
+                project_id,
+                normalized_ep_int,
+                user.into(),
+                auth_info.secret.clone(),
+            );
+            self.caches.project_info.insert_allowed_ips(
+                project_id,
+                normalized_ep_int,
+                allowed_ips.clone(),
+            );
+            self.caches.project_info.insert_allowed_vpc_endpoint_ids(
+                account_id,
+                project_id,
+                normalized_ep_int,
+                allowed_vpc_endpoint_ids.clone(),
+            );
+            self.caches.project_info.insert_block_public_or_vpc_access(
+                project_id,
+                normalized_ep_int,
+                access_blocker_flags.clone(),
+            );
             ctx.set_project_id(project_id);
         }
-        Ok((
-            Cached::new_uncached(allowed_ips),
-            Some(Cached::new_uncached(auth_info.secret)),
-        ))
+        Ok(Cached::new_uncached(access_blocker_flags))
     }
 
     #[tracing::instrument(skip_all)]
diff --git a/proxy/src/control_plane/client/mock.rs b/proxy/src/control_plane/client/mock.rs
index 5f8bda0f35ae..1e6cde8fb080 100644
--- a/proxy/src/control_plane/client/mock.rs
+++ b/proxy/src/control_plane/client/mock.rs
@@ -13,12 +13,14 @@ use crate::auth::backend::ComputeUserInfo;
 use crate::auth::IpPattern;
 use crate::cache::Cached;
 use crate::context::RequestContext;
-use crate::control_plane::client::{CachedAllowedIps, CachedRoleSecret};
+use crate::control_plane::client::{
+    CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedRoleSecret,
+};
 use crate::control_plane::errors::{
     ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError,
 };
 use crate::control_plane::messages::MetricsAuxInfo;
-use crate::control_plane::{AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo};
+use crate::control_plane::{AccessBlockerFlags, AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo};
 use crate::error::io_error;
 use crate::intern::RoleNameInt;
 use crate::types::{BranchId, EndpointId, ProjectId, RoleName};
@@ -121,7 +123,10 @@ impl MockControlPlane {
         Ok(AuthInfo {
             secret,
             allowed_ips,
+            allowed_vpc_endpoint_ids: vec![],
             project_id: None,
+            account_id: None,
+            access_blocker_flags: AccessBlockerFlags::default(),
         })
     }
 
@@ -214,16 +219,35 @@ impl super::ControlPlaneApi for MockControlPlane {
         ))
     }
 
-    async fn get_allowed_ips_and_secret(
+    async fn get_allowed_ips(
+        &self,
+        _ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> Result<CachedAllowedIps, GetAuthInfoError> {
+        Ok(Cached::new_uncached(Arc::new(
+            self.do_get_auth_info(user_info).await?.allowed_ips,
+        )))
+    }
+
+    async fn get_allowed_vpc_endpoint_ids(
+        &self,
+        _ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> Result<CachedAllowedVpcEndpointIds, super::errors::GetAuthInfoError> {
+        Ok(Cached::new_uncached(Arc::new(
+            self.do_get_auth_info(user_info)
+                .await?
+                .allowed_vpc_endpoint_ids,
+        )))
+    }
+
+    async fn get_block_public_or_vpc_access(
         &self,
         _ctx: &RequestContext,
         user_info: &ComputeUserInfo,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
-        Ok((
-            Cached::new_uncached(Arc::new(
-                self.do_get_auth_info(user_info).await?.allowed_ips,
-            )),
-            None,
+    ) -> Result<super::CachedAccessBlockerFlags, super::errors::GetAuthInfoError> {
+        Ok(Cached::new_uncached(
+            self.do_get_auth_info(user_info).await?.access_blocker_flags,
         ))
     }
 
diff --git a/proxy/src/control_plane/client/mod.rs b/proxy/src/control_plane/client/mod.rs
index d559d96bbc61..a06943726e50 100644
--- a/proxy/src/control_plane/client/mod.rs
+++ b/proxy/src/control_plane/client/mod.rs
@@ -6,7 +6,7 @@ use std::hash::Hash;
 use std::sync::Arc;
 use std::time::Duration;
 
-use dashmap::DashMap;
+use clashmap::ClashMap;
 use tokio::time::Instant;
 use tracing::{debug, info};
 
@@ -17,7 +17,8 @@ use crate::cache::project_info::ProjectInfoCacheImpl;
 use crate::config::{CacheOptions, EndpointCacheConfig, ProjectInfoCacheOptions};
 use crate::context::RequestContext;
 use crate::control_plane::{
-    errors, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi, NodeInfoCache,
+    errors, CachedAccessBlockerFlags, CachedAllowedIps, CachedAllowedVpcEndpointIds,
+    CachedNodeInfo, CachedRoleSecret, ControlPlaneApi, NodeInfoCache,
 };
 use crate::error::ReportableError;
 use crate::metrics::ApiLockMetrics;
@@ -55,17 +56,45 @@ impl ControlPlaneApi for ControlPlaneClient {
         }
     }
 
-    async fn get_allowed_ips_and_secret(
+    async fn get_allowed_ips(
         &self,
         ctx: &RequestContext,
         user_info: &ComputeUserInfo,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError> {
+    ) -> Result<CachedAllowedIps, errors::GetAuthInfoError> {
         match self {
-            Self::ProxyV1(api) => api.get_allowed_ips_and_secret(ctx, user_info).await,
+            Self::ProxyV1(api) => api.get_allowed_ips(ctx, user_info).await,
             #[cfg(any(test, feature = "testing"))]
-            Self::PostgresMock(api) => api.get_allowed_ips_and_secret(ctx, user_info).await,
+            Self::PostgresMock(api) => api.get_allowed_ips(ctx, user_info).await,
             #[cfg(test)]
-            Self::Test(api) => api.get_allowed_ips_and_secret(),
+            Self::Test(api) => api.get_allowed_ips(),
+        }
+    }
+
+    async fn get_allowed_vpc_endpoint_ids(
+        &self,
+        ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError> {
+        match self {
+            Self::ProxyV1(api) => api.get_allowed_vpc_endpoint_ids(ctx, user_info).await,
+            #[cfg(any(test, feature = "testing"))]
+            Self::PostgresMock(api) => api.get_allowed_vpc_endpoint_ids(ctx, user_info).await,
+            #[cfg(test)]
+            Self::Test(api) => api.get_allowed_vpc_endpoint_ids(),
+        }
+    }
+
+    async fn get_block_public_or_vpc_access(
+        &self,
+        ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError> {
+        match self {
+            Self::ProxyV1(api) => api.get_block_public_or_vpc_access(ctx, user_info).await,
+            #[cfg(any(test, feature = "testing"))]
+            Self::PostgresMock(api) => api.get_block_public_or_vpc_access(ctx, user_info).await,
+            #[cfg(test)]
+            Self::Test(api) => api.get_block_public_or_vpc_access(),
         }
     }
 
@@ -102,9 +131,15 @@ impl ControlPlaneApi for ControlPlaneClient {
 pub(crate) trait TestControlPlaneClient: Send + Sync + 'static {
     fn wake_compute(&self) -> Result<CachedNodeInfo, errors::WakeComputeError>;
 
-    fn get_allowed_ips_and_secret(
+    fn get_allowed_ips(&self) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;
+
+    fn get_allowed_vpc_endpoint_ids(
+        &self,
+    ) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError>;
+
+    fn get_block_public_or_vpc_access(
         &self,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
+    ) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError>;
 
     fn dyn_clone(&self) -> Box<dyn TestControlPlaneClient>;
 }
@@ -148,7 +183,7 @@ impl ApiCaches {
 /// Various caches for [`control_plane`](super).
 pub struct ApiLocks<K> {
     name: &'static str,
-    node_locks: DashMap<K, Arc<DynamicLimiter>>,
+    node_locks: ClashMap<K, Arc<DynamicLimiter>>,
     config: RateLimiterConfig,
     timeout: Duration,
     epoch: std::time::Duration,
@@ -180,7 +215,7 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
     ) -> prometheus::Result<Self> {
         Ok(Self {
             name,
-            node_locks: DashMap::with_shard_amount(shards),
+            node_locks: ClashMap::with_shard_amount(shards),
             config,
             timeout,
             epoch,
@@ -238,7 +273,7 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
                 let mut lock = shard.write();
                 let timer = self.metrics.reclamation_lag_seconds.start_timer();
                 let count = lock
-                    .extract_if(|_, semaphore| Arc::strong_count(semaphore.get_mut()) == 1)
+                    .extract_if(|(_, semaphore)| Arc::strong_count(semaphore) == 1)
                     .count();
                 drop(lock);
                 self.metrics.semaphores_unregistered.inc_by(count as u64);
diff --git a/proxy/src/control_plane/messages.rs b/proxy/src/control_plane/messages.rs
index d068614b24df..5883d02b92c7 100644
--- a/proxy/src/control_plane/messages.rs
+++ b/proxy/src/control_plane/messages.rs
@@ -4,7 +4,7 @@ use measured::FixedCardinalityLabel;
 use serde::{Deserialize, Serialize};
 
 use crate::auth::IpPattern;
-use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
+use crate::intern::{AccountIdInt, BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
 use crate::proxy::retry::CouldRetry;
 
 /// Generic error response with human-readable description.
@@ -227,8 +227,11 @@ pub(crate) struct UserFacingMessage {
 pub(crate) struct GetEndpointAccessControl {
     pub(crate) role_secret: Box<str>,
     pub(crate) allowed_ips: Option<Vec<IpPattern>>,
+    pub(crate) allowed_vpc_endpoint_ids: Option<Vec<String>>,
     pub(crate) project_id: Option<ProjectIdInt>,
-    pub(crate) allowed_vpc_endpoint_ids: Option<Vec<EndpointIdInt>>,
+    pub(crate) account_id: Option<AccountIdInt>,
+    pub(crate) block_public_connections: Option<bool>,
+    pub(crate) block_vpc_connections: Option<bool>,
 }
 
 /// Response which holds compute node's `host:port` pair.
@@ -282,6 +285,10 @@ pub(crate) struct DatabaseInfo {
     pub(crate) aux: MetricsAuxInfo,
     #[serde(default)]
     pub(crate) allowed_ips: Option<Vec<IpPattern>>,
+    #[serde(default)]
+    pub(crate) allowed_vpc_endpoint_ids: Option<Vec<String>>,
+    #[serde(default)]
+    pub(crate) public_access_allowed: Option<bool>,
 }
 
 // Manually implement debug to omit sensitive info.
@@ -293,6 +300,7 @@ impl fmt::Debug for DatabaseInfo {
             .field("dbname", &self.dbname)
             .field("user", &self.user)
             .field("allowed_ips", &self.allowed_ips)
+            .field("allowed_vpc_endpoint_ids", &self.allowed_vpc_endpoint_ids)
             .finish_non_exhaustive()
     }
 }
@@ -457,19 +465,31 @@ mod tests {
 
     #[test]
     fn parse_get_role_secret() -> anyhow::Result<()> {
-        // Empty `allowed_ips` field.
+        // Empty `allowed_ips` and `allowed_vpc_endpoint_ids` field.
+        let json = json!({
+            "role_secret": "secret",
+        });
+        serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;
+        let json = json!({
+            "role_secret": "secret",
+            "allowed_ips": ["8.8.8.8"],
+        });
+        serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;
         let json = json!({
             "role_secret": "secret",
+            "allowed_vpc_endpoint_ids": ["vpce-0abcd1234567890ef"],
         });
         serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;
         let json = json!({
             "role_secret": "secret",
             "allowed_ips": ["8.8.8.8"],
+            "allowed_vpc_endpoint_ids": ["vpce-0abcd1234567890ef"],
         });
         serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;
         let json = json!({
             "role_secret": "secret",
             "allowed_ips": ["8.8.8.8"],
+            "allowed_vpc_endpoint_ids": ["vpce-0abcd1234567890ef"],
             "project_id": "project",
         });
         serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;
diff --git a/proxy/src/control_plane/mod.rs b/proxy/src/control_plane/mod.rs
index 1dca26d6866c..f92e4f3f6055 100644
--- a/proxy/src/control_plane/mod.rs
+++ b/proxy/src/control_plane/mod.rs
@@ -19,6 +19,7 @@ use crate::cache::{Cached, TimedLru};
 use crate::config::ComputeConfig;
 use crate::context::RequestContext;
 use crate::control_plane::messages::{ControlPlaneErrorMessage, MetricsAuxInfo};
+use crate::intern::AccountIdInt;
 use crate::intern::ProjectIdInt;
 use crate::types::{EndpointCacheKey, EndpointId};
 use crate::{compute, scram};
@@ -52,8 +53,14 @@ pub(crate) struct AuthInfo {
     pub(crate) secret: Option<AuthSecret>,
     /// List of IP addresses allowed for the autorization.
     pub(crate) allowed_ips: Vec<IpPattern>,
+    /// List of VPC endpoints allowed for the autorization.
+    pub(crate) allowed_vpc_endpoint_ids: Vec<String>,
     /// Project ID. This is used for cache invalidation.
     pub(crate) project_id: Option<ProjectIdInt>,
+    /// Account ID. This is used for cache invalidation.
+    pub(crate) account_id: Option<AccountIdInt>,
+    /// Are public connections or VPC connections blocked?
+    pub(crate) access_blocker_flags: AccessBlockerFlags,
 }
 
 /// Info for establishing a connection to a compute node.
@@ -95,11 +102,21 @@ impl NodeInfo {
     }
 }
 
+#[derive(Clone, Default, Eq, PartialEq, Debug)]
+pub(crate) struct AccessBlockerFlags {
+    pub public_access_blocked: bool,
+    pub vpc_access_blocked: bool,
+}
+
 pub(crate) type NodeInfoCache =
     TimedLru<EndpointCacheKey, Result<NodeInfo, Box<ControlPlaneErrorMessage>>>;
 pub(crate) type CachedNodeInfo = Cached<&'static NodeInfoCache, NodeInfo>;
 pub(crate) type CachedRoleSecret = Cached<&'static ProjectInfoCacheImpl, Option<AuthSecret>>;
 pub(crate) type CachedAllowedIps = Cached<&'static ProjectInfoCacheImpl, Arc<Vec<IpPattern>>>;
+pub(crate) type CachedAllowedVpcEndpointIds =
+    Cached<&'static ProjectInfoCacheImpl, Arc<Vec<String>>>;
+pub(crate) type CachedAccessBlockerFlags =
+    Cached<&'static ProjectInfoCacheImpl, AccessBlockerFlags>;
 
 /// This will allocate per each call, but the http requests alone
 /// already require a few allocations, so it should be fine.
@@ -113,11 +130,23 @@ pub(crate) trait ControlPlaneApi {
         user_info: &ComputeUserInfo,
     ) -> Result<CachedRoleSecret, errors::GetAuthInfoError>;
 
-    async fn get_allowed_ips_and_secret(
+    async fn get_allowed_ips(
+        &self,
+        ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;
+
+    async fn get_allowed_vpc_endpoint_ids(
+        &self,
+        ctx: &RequestContext,
+        user_info: &ComputeUserInfo,
+    ) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError>;
+
+    async fn get_block_public_or_vpc_access(
         &self,
         ctx: &RequestContext,
         user_info: &ComputeUserInfo,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
+    ) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError>;
 
     async fn get_endpoint_jwks(
         &self,
diff --git a/proxy/src/intern.rs b/proxy/src/intern.rs
index 79c6020302af..0d1382679c81 100644
--- a/proxy/src/intern.rs
+++ b/proxy/src/intern.rs
@@ -7,7 +7,7 @@ use std::sync::OnceLock;
 use lasso::{Capacity, MemoryLimits, Spur, ThreadedRodeo};
 use rustc_hash::FxHasher;
 
-use crate::types::{BranchId, EndpointId, ProjectId, RoleName};
+use crate::types::{AccountId, BranchId, EndpointId, ProjectId, RoleName};
 
 pub trait InternId: Sized + 'static {
     fn get_interner() -> &'static StringInterner<Self>;
@@ -206,6 +206,26 @@ impl From<ProjectId> for ProjectIdInt {
     }
 }
 
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub struct AccountIdTag;
+impl InternId for AccountIdTag {
+    fn get_interner() -> &'static StringInterner<Self> {
+        static ROLE_NAMES: OnceLock<StringInterner<AccountIdTag>> = OnceLock::new();
+        ROLE_NAMES.get_or_init(Default::default)
+    }
+}
+pub type AccountIdInt = InternedString<AccountIdTag>;
+impl From<&AccountId> for AccountIdInt {
+    fn from(value: &AccountId) -> Self {
+        AccountIdTag::get_interner().get_or_intern(value)
+    }
+}
+impl From<AccountId> for AccountIdInt {
+    fn from(value: AccountId) -> Self {
+        AccountIdTag::get_interner().get_or_intern(&value)
+    }
+}
+
 #[cfg(test)]
 #[expect(clippy::unwrap_used)]
 mod tests {
diff --git a/proxy/src/logging.rs b/proxy/src/logging.rs
index 41f10f052ffa..97c9f5a59c27 100644
--- a/proxy/src/logging.rs
+++ b/proxy/src/logging.rs
@@ -1,10 +1,23 @@
-use tracing::Subscriber;
+use std::cell::{Cell, RefCell};
+use std::collections::HashMap;
+use std::hash::BuildHasher;
+use std::{env, io};
+
+use chrono::{DateTime, Utc};
+use opentelemetry::trace::TraceContextExt;
+use scopeguard::defer;
+use serde::ser::{SerializeMap, Serializer};
+use tracing::span;
+use tracing::subscriber::Interest;
+use tracing::{callsite, Event, Metadata, Span, Subscriber};
+use tracing_opentelemetry::OpenTelemetrySpanExt;
 use tracing_subscriber::filter::{EnvFilter, LevelFilter};
 use tracing_subscriber::fmt::format::{Format, Full};
 use tracing_subscriber::fmt::time::SystemTime;
 use tracing_subscriber::fmt::{FormatEvent, FormatFields};
+use tracing_subscriber::layer::{Context, Layer};
 use tracing_subscriber::prelude::*;
-use tracing_subscriber::registry::LookupSpan;
+use tracing_subscriber::registry::{LookupSpan, SpanRef};
 
 /// Initialize logging and OpenTelemetry tracing and exporter.
 ///
@@ -15,6 +28,8 @@ use tracing_subscriber::registry::LookupSpan;
 /// destination, set `OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4318`.
 /// See <https://opentelemetry.io/docs/reference/specification/sdk-environment-variables>
 pub async fn init() -> anyhow::Result<LoggingGuard> {
+    let logfmt = LogFormat::from_env()?;
+
     let env_filter = EnvFilter::builder()
         .with_default_directive(LevelFilter::INFO.into())
         .from_env_lossy()
@@ -29,17 +44,36 @@ pub async fn init() -> anyhow::Result<LoggingGuard> {
                 .expect("this should be a valid filter directive"),
         );
 
-    let fmt_layer = tracing_subscriber::fmt::layer()
-        .with_ansi(false)
-        .with_writer(std::io::stderr)
-        .with_target(false);
-
     let otlp_layer = tracing_utils::init_tracing("proxy").await;
 
+    let json_log_layer = if logfmt == LogFormat::Json {
+        Some(JsonLoggingLayer {
+            clock: RealClock,
+            skipped_field_indices: papaya::HashMap::default(),
+            writer: StderrWriter {
+                stderr: std::io::stderr(),
+            },
+        })
+    } else {
+        None
+    };
+
+    let text_log_layer = if logfmt == LogFormat::Text {
+        Some(
+            tracing_subscriber::fmt::layer()
+                .with_ansi(false)
+                .with_writer(std::io::stderr)
+                .with_target(false),
+        )
+    } else {
+        None
+    };
+
     tracing_subscriber::registry()
         .with(env_filter)
         .with(otlp_layer)
-        .with(fmt_layer)
+        .with(json_log_layer)
+        .with(text_log_layer)
         .try_init()?;
 
     Ok(LoggingGuard)
@@ -94,3 +128,857 @@ impl Drop for LoggingGuard {
         tracing_utils::shutdown_tracing();
     }
 }
+
+// TODO: make JSON the default
+#[derive(Copy, Clone, PartialEq, Eq, Default, Debug)]
+enum LogFormat {
+    #[default]
+    Text = 1,
+    Json,
+}
+
+impl LogFormat {
+    fn from_env() -> anyhow::Result<Self> {
+        let logfmt = env::var("LOGFMT");
+        Ok(match logfmt.as_deref() {
+            Err(_) => LogFormat::default(),
+            Ok("text") => LogFormat::Text,
+            Ok("json") => LogFormat::Json,
+            Ok(logfmt) => anyhow::bail!("unknown log format: {logfmt}"),
+        })
+    }
+}
+
+trait MakeWriter {
+    fn make_writer(&self) -> impl io::Write;
+}
+
+struct StderrWriter {
+    stderr: io::Stderr,
+}
+
+impl MakeWriter for StderrWriter {
+    #[inline]
+    fn make_writer(&self) -> impl io::Write {
+        self.stderr.lock()
+    }
+}
+
+// TODO: move into separate module or even separate crate.
+trait Clock {
+    fn now(&self) -> DateTime<Utc>;
+}
+
+struct RealClock;
+
+impl Clock for RealClock {
+    #[inline]
+    fn now(&self) -> DateTime<Utc> {
+        Utc::now()
+    }
+}
+
+/// Name of the field used by tracing crate to store the event message.
+const MESSAGE_FIELD: &str = "message";
+
+thread_local! {
+    /// Protects against deadlocks and double panics during log writing.
+    /// The current panic handler will use tracing to log panic information.
+    static REENTRANCY_GUARD: Cell<bool> = const { Cell::new(false) };
+    /// Thread-local instance with per-thread buffer for log writing.
+    static EVENT_FORMATTER: RefCell<EventFormatter> = RefCell::new(EventFormatter::new());
+    /// Cached OS thread ID.
+    static THREAD_ID: u64 = gettid::gettid();
+}
+
+/// Implements tracing layer to handle events specific to logging.
+struct JsonLoggingLayer<C: Clock, W: MakeWriter> {
+    clock: C,
+    skipped_field_indices: papaya::HashMap<callsite::Identifier, SkippedFieldIndices>,
+    writer: W,
+}
+
+impl<S, C: Clock + 'static, W: MakeWriter + 'static> Layer<S> for JsonLoggingLayer<C, W>
+where
+    S: Subscriber + for<'a> LookupSpan<'a>,
+{
+    fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) {
+        use std::io::Write;
+
+        // TODO: consider special tracing subscriber to grab timestamp very
+        //       early, before OTel machinery, and add as event extension.
+        let now = self.clock.now();
+
+        let res: io::Result<()> = REENTRANCY_GUARD.with(move |entered| {
+            if entered.get() {
+                let mut formatter = EventFormatter::new();
+                formatter.format(now, event, &ctx, &self.skipped_field_indices)?;
+                self.writer.make_writer().write_all(formatter.buffer())
+            } else {
+                entered.set(true);
+                defer!(entered.set(false););
+
+                EVENT_FORMATTER.with_borrow_mut(move |formatter| {
+                    formatter.reset();
+                    formatter.format(now, event, &ctx, &self.skipped_field_indices)?;
+                    self.writer.make_writer().write_all(formatter.buffer())
+                })
+            }
+        });
+
+        // In case logging fails we generate a simpler JSON object.
+        if let Err(err) = res {
+            if let Ok(mut line) = serde_json::to_vec(&serde_json::json!( {
+                "timestamp": now.to_rfc3339_opts(chrono::SecondsFormat::Micros, true),
+                "level": "ERROR",
+                "message": format_args!("cannot log event: {err:?}"),
+                "fields": {
+                    "event": format_args!("{event:?}"),
+                },
+            })) {
+                line.push(b'\n');
+                self.writer.make_writer().write_all(&line).ok();
+            }
+        }
+    }
+
+    /// Registers a SpanFields instance as span extension.
+    fn on_new_span(&self, attrs: &span::Attributes<'_>, id: &span::Id, ctx: Context<'_, S>) {
+        let span = ctx.span(id).expect("span must exist");
+        let fields = SpanFields::default();
+        fields.record_fields(attrs);
+        // This could deadlock when there's a panic somewhere in the tracing
+        // event handling and a read or write guard is still held. This includes
+        // the OTel subscriber.
+        span.extensions_mut().insert(fields);
+    }
+
+    fn on_record(&self, id: &span::Id, values: &span::Record<'_>, ctx: Context<'_, S>) {
+        let span = ctx.span(id).expect("span must exist");
+        let ext = span.extensions();
+        if let Some(data) = ext.get::<SpanFields>() {
+            data.record_fields(values);
+        }
+    }
+
+    /// Called (lazily) whenever a new log call is executed. We quickly check
+    /// for duplicate field names and record duplicates as skippable. Last one
+    /// wins.
+    fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest {
+        if !metadata.is_event() {
+            // Must not be never because we wouldn't get trace and span data.
+            return Interest::always();
+        }
+
+        let mut field_indices = SkippedFieldIndices::default();
+        let mut seen_fields = HashMap::<&'static str, usize>::new();
+        for field in metadata.fields() {
+            use std::collections::hash_map::Entry;
+            match seen_fields.entry(field.name()) {
+                Entry::Vacant(entry) => {
+                    // field not seen yet
+                    entry.insert(field.index());
+                }
+                Entry::Occupied(mut entry) => {
+                    // replace currently stored index
+                    let old_index = entry.insert(field.index());
+                    // ... and append it to list of skippable indices
+                    field_indices.push(old_index);
+                }
+            }
+        }
+
+        if !field_indices.is_empty() {
+            self.skipped_field_indices
+                .pin()
+                .insert(metadata.callsite(), field_indices);
+        }
+
+        Interest::always()
+    }
+}
+
+/// Stores span field values recorded during the spans lifetime.
+#[derive(Default)]
+struct SpanFields {
+    // TODO: Switch to custom enum with lasso::Spur for Strings?
+    fields: papaya::HashMap<&'static str, serde_json::Value>,
+}
+
+impl SpanFields {
+    #[inline]
+    fn record_fields<R: tracing_subscriber::field::RecordFields>(&self, fields: R) {
+        fields.record(&mut SpanFieldsRecorder {
+            fields: self.fields.pin(),
+        });
+    }
+}
+
+/// Implements a tracing field visitor to convert and store values.
+struct SpanFieldsRecorder<'m, S, G> {
+    fields: papaya::HashMapRef<'m, &'static str, serde_json::Value, S, G>,
+}
+
+impl<S: BuildHasher, G: papaya::Guard> tracing::field::Visit for SpanFieldsRecorder<'_, S, G> {
+    #[inline]
+    fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
+        self.fields
+            .insert(field.name(), serde_json::Value::from(value));
+    }
+
+    #[inline]
+    fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {
+        self.fields
+            .insert(field.name(), serde_json::Value::from(value));
+    }
+
+    #[inline]
+    fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {
+        self.fields
+            .insert(field.name(), serde_json::Value::from(value));
+    }
+
+    #[inline]
+    fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {
+        if let Ok(value) = i64::try_from(value) {
+            self.fields
+                .insert(field.name(), serde_json::Value::from(value));
+        } else {
+            self.fields
+                .insert(field.name(), serde_json::Value::from(format!("{value}")));
+        }
+    }
+
+    #[inline]
+    fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {
+        if let Ok(value) = u64::try_from(value) {
+            self.fields
+                .insert(field.name(), serde_json::Value::from(value));
+        } else {
+            self.fields
+                .insert(field.name(), serde_json::Value::from(format!("{value}")));
+        }
+    }
+
+    #[inline]
+    fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {
+        self.fields
+            .insert(field.name(), serde_json::Value::from(value));
+    }
+
+    #[inline]
+    fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {
+        self.fields
+            .insert(field.name(), serde_json::Value::from(value));
+    }
+
+    #[inline]
+    fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
+        self.fields
+            .insert(field.name(), serde_json::Value::from(value));
+    }
+
+    #[inline]
+    fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
+        self.fields
+            .insert(field.name(), serde_json::Value::from(format!("{value:?}")));
+    }
+
+    #[inline]
+    fn record_error(
+        &mut self,
+        field: &tracing::field::Field,
+        value: &(dyn std::error::Error + 'static),
+    ) {
+        self.fields
+            .insert(field.name(), serde_json::Value::from(format!("{value}")));
+    }
+}
+
+/// List of field indices skipped during logging. Can list duplicate fields or
+/// metafields not meant to be logged.
+#[derive(Clone, Default)]
+struct SkippedFieldIndices {
+    bits: u64,
+}
+
+impl SkippedFieldIndices {
+    #[inline]
+    fn is_empty(&self) -> bool {
+        self.bits == 0
+    }
+
+    #[inline]
+    fn push(&mut self, index: usize) {
+        self.bits |= 1u64
+            .checked_shl(index as u32)
+            .expect("field index too large");
+    }
+
+    #[inline]
+    fn contains(&self, index: usize) -> bool {
+        self.bits
+            & 1u64
+                .checked_shl(index as u32)
+                .expect("field index too large")
+            != 0
+    }
+}
+
+/// Formats a tracing event and writes JSON to its internal buffer including a newline.
+// TODO: buffer capacity management, truncate if too large
+struct EventFormatter {
+    logline_buffer: Vec<u8>,
+}
+
+impl EventFormatter {
+    #[inline]
+    fn new() -> Self {
+        EventFormatter {
+            logline_buffer: Vec::new(),
+        }
+    }
+
+    #[inline]
+    fn buffer(&self) -> &[u8] {
+        &self.logline_buffer
+    }
+
+    #[inline]
+    fn reset(&mut self) {
+        self.logline_buffer.clear();
+    }
+
+    fn format<S>(
+        &mut self,
+        now: DateTime<Utc>,
+        event: &Event<'_>,
+        ctx: &Context<'_, S>,
+        skipped_field_indices: &papaya::HashMap<callsite::Identifier, SkippedFieldIndices>,
+    ) -> io::Result<()>
+    where
+        S: Subscriber + for<'a> LookupSpan<'a>,
+    {
+        let timestamp = now.to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
+
+        use tracing_log::NormalizeEvent;
+        let normalized_meta = event.normalized_metadata();
+        let meta = normalized_meta.as_ref().unwrap_or_else(|| event.metadata());
+
+        let skipped_field_indices = skipped_field_indices.pin();
+        let skipped_field_indices = skipped_field_indices.get(&meta.callsite());
+
+        let mut serialize = || {
+            let mut serializer = serde_json::Serializer::new(&mut self.logline_buffer);
+
+            let mut serializer = serializer.serialize_map(None)?;
+
+            // Timestamp comes first, so raw lines can be sorted by timestamp.
+            serializer.serialize_entry("timestamp", &timestamp)?;
+
+            // Level next.
+            serializer.serialize_entry("level", &meta.level().as_str())?;
+
+            // Message next.
+            serializer.serialize_key("message")?;
+            let mut message_extractor =
+                MessageFieldExtractor::new(serializer, skipped_field_indices);
+            event.record(&mut message_extractor);
+            let mut serializer = message_extractor.into_serializer()?;
+
+            let mut fields_present = FieldsPresent(false, skipped_field_indices);
+            event.record(&mut fields_present);
+            if fields_present.0 {
+                serializer.serialize_entry(
+                    "fields",
+                    &SerializableEventFields(event, skipped_field_indices),
+                )?;
+            }
+
+            let pid = std::process::id();
+            if pid != 1 {
+                serializer.serialize_entry("process_id", &pid)?;
+            }
+
+            THREAD_ID.with(|tid| serializer.serialize_entry("thread_id", tid))?;
+
+            // TODO: tls cache? name could change
+            if let Some(thread_name) = std::thread::current().name() {
+                if !thread_name.is_empty() && thread_name != "tokio-runtime-worker" {
+                    serializer.serialize_entry("thread_name", thread_name)?;
+                }
+            }
+
+            if let Some(task_id) = tokio::task::try_id() {
+                serializer.serialize_entry("task_id", &format_args!("{task_id}"))?;
+            }
+
+            serializer.serialize_entry("target", meta.target())?;
+
+            if let Some(module) = meta.module_path() {
+                if module != meta.target() {
+                    serializer.serialize_entry("module", module)?;
+                }
+            }
+
+            if let Some(file) = meta.file() {
+                if let Some(line) = meta.line() {
+                    serializer.serialize_entry("src", &format_args!("{file}:{line}"))?;
+                } else {
+                    serializer.serialize_entry("src", file)?;
+                }
+            }
+
+            {
+                let otel_context = Span::current().context();
+                let otel_spanref = otel_context.span();
+                let span_context = otel_spanref.span_context();
+                if span_context.is_valid() {
+                    serializer.serialize_entry(
+                        "trace_id",
+                        &format_args!("{}", span_context.trace_id()),
+                    )?;
+                }
+            }
+
+            serializer.serialize_entry("spans", &SerializableSpanStack(ctx))?;
+
+            serializer.end()
+        };
+
+        serialize().map_err(io::Error::other)?;
+        self.logline_buffer.push(b'\n');
+        Ok(())
+    }
+}
+
+/// Extracts the message field that's mixed will other fields.
+struct MessageFieldExtractor<'a, S: serde::ser::SerializeMap> {
+    serializer: S,
+    skipped_field_indices: Option<&'a SkippedFieldIndices>,
+    state: Option<Result<(), S::Error>>,
+}
+
+impl<'a, S: serde::ser::SerializeMap> MessageFieldExtractor<'a, S> {
+    #[inline]
+    fn new(serializer: S, skipped_field_indices: Option<&'a SkippedFieldIndices>) -> Self {
+        Self {
+            serializer,
+            skipped_field_indices,
+            state: None,
+        }
+    }
+
+    #[inline]
+    fn into_serializer(mut self) -> Result<S, S::Error> {
+        match self.state {
+            Some(Ok(())) => {}
+            Some(Err(err)) => return Err(err),
+            None => self.serializer.serialize_value("")?,
+        }
+        Ok(self.serializer)
+    }
+
+    #[inline]
+    fn accept_field(&self, field: &tracing::field::Field) -> bool {
+        self.state.is_none()
+            && field.name() == MESSAGE_FIELD
+            && !self
+                .skipped_field_indices
+                .is_some_and(|i| i.contains(field.index()))
+    }
+}
+
+impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldExtractor<'_, S> {
+    #[inline]
+    fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
+        if self.accept_field(field) {
+            self.state = Some(self.serializer.serialize_value(&value));
+        }
+    }
+
+    #[inline]
+    fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {
+        if self.accept_field(field) {
+            self.state = Some(self.serializer.serialize_value(&value));
+        }
+    }
+
+    #[inline]
+    fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {
+        if self.accept_field(field) {
+            self.state = Some(self.serializer.serialize_value(&value));
+        }
+    }
+
+    #[inline]
+    fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {
+        if self.accept_field(field) {
+            self.state = Some(self.serializer.serialize_value(&value));
+        }
+    }
+
+    #[inline]
+    fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {
+        if self.accept_field(field) {
+            self.state = Some(self.serializer.serialize_value(&value));
+        }
+    }
+
+    #[inline]
+    fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {
+        if self.accept_field(field) {
+            self.state = Some(self.serializer.serialize_value(&value));
+        }
+    }
+
+    #[inline]
+    fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {
+        if self.accept_field(field) {
+            self.state = Some(self.serializer.serialize_value(&format_args!("{value:x?}")));
+        }
+    }
+
+    #[inline]
+    fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
+        if self.accept_field(field) {
+            self.state = Some(self.serializer.serialize_value(&value));
+        }
+    }
+
+    #[inline]
+    fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
+        if self.accept_field(field) {
+            self.state = Some(self.serializer.serialize_value(&format_args!("{value:?}")));
+        }
+    }
+
+    #[inline]
+    fn record_error(
+        &mut self,
+        field: &tracing::field::Field,
+        value: &(dyn std::error::Error + 'static),
+    ) {
+        if self.accept_field(field) {
+            self.state = Some(self.serializer.serialize_value(&format_args!("{value}")));
+        }
+    }
+}
+
+/// Checks if there's any fields and field values present. If not, the JSON subobject
+/// can be skipped.
+// This is entirely optional and only cosmetic, though maybe helps a
+// bit during log parsing in dashboards when there's no field with empty object.
+struct FieldsPresent<'a>(pub bool, Option<&'a SkippedFieldIndices>);
+
+// Even though some methods have an overhead (error, bytes) it is assumed the
+// compiler won't include this since we ignore the value entirely.
+impl tracing::field::Visit for FieldsPresent<'_> {
+    #[inline]
+    fn record_debug(&mut self, field: &tracing::field::Field, _: &dyn std::fmt::Debug) {
+        if !self.1.is_some_and(|i| i.contains(field.index()))
+            && field.name() != MESSAGE_FIELD
+            && !field.name().starts_with("log.")
+        {
+            self.0 |= true;
+        }
+    }
+}
+
+/// Serializes the fields directly supplied with a log event.
+struct SerializableEventFields<'a, 'event>(
+    &'a tracing::Event<'event>,
+    Option<&'a SkippedFieldIndices>,
+);
+
+impl serde::ser::Serialize for SerializableEventFields<'_, '_> {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        use serde::ser::SerializeMap;
+        let serializer = serializer.serialize_map(None)?;
+        let mut message_skipper = MessageFieldSkipper::new(serializer, self.1);
+        self.0.record(&mut message_skipper);
+        let serializer = message_skipper.into_serializer()?;
+        serializer.end()
+    }
+}
+
+/// A tracing field visitor that skips the message field.
+struct MessageFieldSkipper<'a, S: serde::ser::SerializeMap> {
+    serializer: S,
+    skipped_field_indices: Option<&'a SkippedFieldIndices>,
+    state: Result<(), S::Error>,
+}
+
+impl<'a, S: serde::ser::SerializeMap> MessageFieldSkipper<'a, S> {
+    #[inline]
+    fn new(serializer: S, skipped_field_indices: Option<&'a SkippedFieldIndices>) -> Self {
+        Self {
+            serializer,
+            skipped_field_indices,
+            state: Ok(()),
+        }
+    }
+
+    #[inline]
+    fn accept_field(&self, field: &tracing::field::Field) -> bool {
+        self.state.is_ok()
+            && field.name() != MESSAGE_FIELD
+            && !field.name().starts_with("log.")
+            && !self
+                .skipped_field_indices
+                .is_some_and(|i| i.contains(field.index()))
+    }
+
+    #[inline]
+    fn into_serializer(self) -> Result<S, S::Error> {
+        self.state?;
+        Ok(self.serializer)
+    }
+}
+
+impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldSkipper<'_, S> {
+    #[inline]
+    fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
+        if self.accept_field(field) {
+            self.state = self.serializer.serialize_entry(field.name(), &value);
+        }
+    }
+
+    #[inline]
+    fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {
+        if self.accept_field(field) {
+            self.state = self.serializer.serialize_entry(field.name(), &value);
+        }
+    }
+
+    #[inline]
+    fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {
+        if self.accept_field(field) {
+            self.state = self.serializer.serialize_entry(field.name(), &value);
+        }
+    }
+
+    #[inline]
+    fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {
+        if self.accept_field(field) {
+            self.state = self.serializer.serialize_entry(field.name(), &value);
+        }
+    }
+
+    #[inline]
+    fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {
+        if self.accept_field(field) {
+            self.state = self.serializer.serialize_entry(field.name(), &value);
+        }
+    }
+
+    #[inline]
+    fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {
+        if self.accept_field(field) {
+            self.state = self.serializer.serialize_entry(field.name(), &value);
+        }
+    }
+
+    #[inline]
+    fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {
+        if self.accept_field(field) {
+            self.state = self
+                .serializer
+                .serialize_entry(field.name(), &format_args!("{value:x?}"));
+        }
+    }
+
+    #[inline]
+    fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
+        if self.accept_field(field) {
+            self.state = self.serializer.serialize_entry(field.name(), &value);
+        }
+    }
+
+    #[inline]
+    fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
+        if self.accept_field(field) {
+            self.state = self
+                .serializer
+                .serialize_entry(field.name(), &format_args!("{value:?}"));
+        }
+    }
+
+    #[inline]
+    fn record_error(
+        &mut self,
+        field: &tracing::field::Field,
+        value: &(dyn std::error::Error + 'static),
+    ) {
+        if self.accept_field(field) {
+            self.state = self.serializer.serialize_value(&format_args!("{value}"));
+        }
+    }
+}
+
+/// Serializes the span stack from root to leaf (parent of event) enumerated
+/// inside an object where the keys are just the number padded with zeroes
+/// to retain sorting order.
+// The object is necessary because Loki cannot flatten arrays.
+struct SerializableSpanStack<'a, 'b, Span>(&'b Context<'a, Span>)
+where
+    Span: Subscriber + for<'lookup> LookupSpan<'lookup>;
+
+impl<Span> serde::ser::Serialize for SerializableSpanStack<'_, '_, Span>
+where
+    Span: Subscriber + for<'lookup> LookupSpan<'lookup>,
+{
+    fn serialize<Ser>(&self, serializer: Ser) -> Result<Ser::Ok, Ser::Error>
+    where
+        Ser: serde::ser::Serializer,
+    {
+        let mut serializer = serializer.serialize_map(None)?;
+
+        if let Some(leaf_span) = self.0.lookup_current() {
+            for (i, span) in leaf_span.scope().from_root().enumerate() {
+                serializer.serialize_entry(&format_args!("{i:02}"), &SerializableSpan(&span))?;
+            }
+        }
+
+        serializer.end()
+    }
+}
+
+/// Serializes a single span. Include the span ID, name and its fields as
+/// recorded up to this point.
+struct SerializableSpan<'a, 'b, Span>(&'b SpanRef<'a, Span>)
+where
+    Span: for<'lookup> LookupSpan<'lookup>;
+
+impl<Span> serde::ser::Serialize for SerializableSpan<'_, '_, Span>
+where
+    Span: for<'lookup> LookupSpan<'lookup>,
+{
+    fn serialize<Ser>(&self, serializer: Ser) -> Result<Ser::Ok, Ser::Error>
+    where
+        Ser: serde::ser::Serializer,
+    {
+        let mut serializer = serializer.serialize_map(None)?;
+        // TODO: the span ID is probably only useful for debugging tracing.
+        serializer.serialize_entry("span_id", &format_args!("{:016x}", self.0.id().into_u64()))?;
+        serializer.serialize_entry("span_name", self.0.metadata().name())?;
+
+        let ext = self.0.extensions();
+        if let Some(data) = ext.get::<SpanFields>() {
+            for (key, value) in &data.fields.pin() {
+                serializer.serialize_entry(key, value)?;
+            }
+        }
+
+        serializer.end()
+    }
+}
+
+#[cfg(test)]
+#[allow(clippy::unwrap_used)]
+mod tests {
+    use std::sync::{Arc, Mutex, MutexGuard};
+
+    use assert_json_diff::assert_json_eq;
+    use tracing::info_span;
+
+    use super::*;
+
+    struct TestClock {
+        current_time: Mutex<DateTime<Utc>>,
+    }
+
+    impl Clock for Arc<TestClock> {
+        fn now(&self) -> DateTime<Utc> {
+            *self.current_time.lock().expect("poisoned")
+        }
+    }
+
+    struct VecWriter<'a> {
+        buffer: MutexGuard<'a, Vec<u8>>,
+    }
+
+    impl MakeWriter for Arc<Mutex<Vec<u8>>> {
+        fn make_writer(&self) -> impl io::Write {
+            VecWriter {
+                buffer: self.lock().expect("poisoned"),
+            }
+        }
+    }
+
+    impl io::Write for VecWriter<'_> {
+        fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+            self.buffer.write(buf)
+        }
+
+        fn flush(&mut self) -> io::Result<()> {
+            Ok(())
+        }
+    }
+
+    #[test]
+    fn test_field_collection() {
+        let clock = Arc::new(TestClock {
+            current_time: Mutex::new(Utc::now()),
+        });
+        let buffer = Arc::new(Mutex::new(Vec::new()));
+        let log_layer = JsonLoggingLayer {
+            clock: clock.clone(),
+            skipped_field_indices: papaya::HashMap::default(),
+            writer: buffer.clone(),
+        };
+
+        let registry = tracing_subscriber::Registry::default().with(log_layer);
+
+        tracing::subscriber::with_default(registry, || {
+            info_span!("span1", x = 40, x = 41, x = 42).in_scope(|| {
+                info_span!("span2").in_scope(|| {
+                    tracing::error!(
+                        a = 1,
+                        a = 2,
+                        a = 3,
+                        message = "explicit message field",
+                        "implicit message field"
+                    );
+                });
+            });
+        });
+
+        let buffer = Arc::try_unwrap(buffer)
+            .expect("no other reference")
+            .into_inner()
+            .expect("poisoned");
+        let actual: serde_json::Value = serde_json::from_slice(&buffer).expect("valid JSON");
+        let expected: serde_json::Value = serde_json::json!(
+            {
+                "timestamp": clock.now().to_rfc3339_opts(chrono::SecondsFormat::Micros, true),
+                "level": "ERROR",
+                "message": "explicit message field",
+                "fields": {
+                    "a": 3,
+                },
+                "spans": {
+                    "00":{
+                        "span_id": "0000000000000001",
+                        "span_name": "span1",
+                        "x": 42,
+                    },
+                    "01": {
+                        "span_id": "0000000000000002",
+                        "span_name": "span2",
+                    }
+                },
+                "src": actual.as_object().unwrap().get("src").unwrap().as_str().unwrap(),
+                "target": "proxy::logging::tests",
+                "process_id": actual.as_object().unwrap().get("process_id").unwrap().as_number().unwrap(),
+                "thread_id": actual.as_object().unwrap().get("thread_id").unwrap().as_number().unwrap(),
+                "thread_name": "logging::tests::test_field_collection",
+            }
+        );
+
+        assert_json_eq!(actual, expected);
+    }
+}
diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs
index f3d281a26b59..25bcc81108b6 100644
--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -96,6 +96,16 @@ pub struct ProxyMetrics {
     #[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0]))]
     pub allowed_ips_number: Histogram<10>,
 
+    /// Number of cache hits/misses for VPC endpoint IDs.
+    pub vpc_endpoint_id_cache_stats: CounterVec<StaticLabelSet<CacheOutcome>>,
+
+    /// Number of cache hits/misses for access blocker flags.
+    pub access_blocker_flags_cache_stats: CounterVec<StaticLabelSet<CacheOutcome>>,
+
+    /// Number of allowed VPC endpoints IDs
+    #[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0]))]
+    pub allowed_vpc_endpoint_ids: Histogram<10>,
+
     /// Number of connections (per sni).
     pub accepted_connections_by_sni: CounterVec<StaticLabelSet<SniKind>>,
 
@@ -570,6 +580,9 @@ pub enum RedisEventsCount {
     CancelSession,
     PasswordUpdate,
     AllowedIpsUpdate,
+    AllowedVpcEndpointIdsUpdateForProjects,
+    AllowedVpcEndpointIdsUpdateForAllProjectsInOrg,
+    BlockPublicOrVpcAccessUpdate,
 }
 
 pub struct ThreadPoolWorkers(usize);
diff --git a/proxy/src/proxy/copy_bidirectional.rs b/proxy/src/proxy/copy_bidirectional.rs
index 3336a9556a5b..861f1766e84c 100644
--- a/proxy/src/proxy/copy_bidirectional.rs
+++ b/proxy/src/proxy/copy_bidirectional.rs
@@ -201,25 +201,26 @@ impl CopyBuffer {
         W: AsyncWrite + ?Sized,
     {
         loop {
-            // If our buffer is empty, then we need to read some data to
-            // continue.
-            if self.pos == self.cap && !self.read_done {
-                self.pos = 0;
-                self.cap = 0;
-
+            // If there is some space left in our buffer, then we try to read some
+            // data to continue, thus maximizing the chances of a large write.
+            if self.cap < self.buf.len() && !self.read_done {
                 match self.poll_fill_buf(cx, reader.as_mut()) {
                     Poll::Ready(Ok(())) => (),
                     Poll::Ready(Err(err)) => return Poll::Ready(Err(ErrorDirection::Read(err))),
                     Poll::Pending => {
-                        // Try flushing when the reader has no progress to avoid deadlock
-                        // when the reader depends on buffered writer.
-                        if self.need_flush {
-                            ready!(writer.as_mut().poll_flush(cx))
-                                .map_err(ErrorDirection::Write)?;
-                            self.need_flush = false;
+                        // Ignore pending reads when our buffer is not empty, because
+                        // we can try to write data immediately.
+                        if self.pos == self.cap {
+                            // Try flushing when the reader has no progress to avoid deadlock
+                            // when the reader depends on buffered writer.
+                            if self.need_flush {
+                                ready!(writer.as_mut().poll_flush(cx))
+                                    .map_err(ErrorDirection::Write)?;
+                                self.need_flush = false;
+                            }
+
+                            return Poll::Pending;
                         }
-
-                        return Poll::Pending;
                     }
                 }
             }
@@ -246,9 +247,13 @@ impl CopyBuffer {
                 "writer returned length larger than input slice"
             );
 
+            // All data has been written, the buffer can be considered empty again
+            self.pos = 0;
+            self.cap = 0;
+
             // If we've written all the data and we've seen EOF, flush out the
             // data and finish the transfer.
-            if self.pos == self.cap && self.read_done {
+            if self.read_done {
                 ready!(writer.as_mut().poll_flush(cx)).map_err(ErrorDirection::Write)?;
                 return Poll::Ready(Ok(self.amt));
             }
diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs
index ab173bd0d052..8a407c811971 100644
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -283,7 +283,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
                             cancel_key_data,
                             ctx,
                             config.authentication_config.ip_allowlist_check_enabled,
-                            auth_backend,
+                            config.authentication_config.is_vpc_acccess_proxy,
+                            auth_backend.get_api(),
                         )
                         .await
                         .inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs
index 10db2bcb303f..d8c00a9b4177 100644
--- a/proxy/src/proxy/tests/mod.rs
+++ b/proxy/src/proxy/tests/mod.rs
@@ -26,7 +26,7 @@ use crate::config::{ComputeConfig, RetryConfig};
 use crate::control_plane::client::{ControlPlaneClient, TestControlPlaneClient};
 use crate::control_plane::messages::{ControlPlaneErrorMessage, Details, MetricsAuxInfo, Status};
 use crate::control_plane::{
-    self, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, NodeInfo, NodeInfoCache,
+    self, CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedNodeInfo, NodeInfo, NodeInfoCache,
 };
 use crate::error::ErrorKind;
 use crate::tls::client_config::compute_client_config_with_certs;
@@ -526,9 +526,19 @@ impl TestControlPlaneClient for TestConnectMechanism {
         }
     }
 
-    fn get_allowed_ips_and_secret(
+    fn get_allowed_ips(&self) -> Result<CachedAllowedIps, control_plane::errors::GetAuthInfoError> {
+        unimplemented!("not used in tests")
+    }
+
+    fn get_allowed_vpc_endpoint_ids(
+        &self,
+    ) -> Result<CachedAllowedVpcEndpointIds, control_plane::errors::GetAuthInfoError> {
+        unimplemented!("not used in tests")
+    }
+
+    fn get_block_public_or_vpc_access(
         &self,
-    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), control_plane::errors::GetAuthInfoError>
+    ) -> Result<control_plane::CachedAccessBlockerFlags, control_plane::errors::GetAuthInfoError>
     {
         unimplemented!("not used in tests")
     }
diff --git a/proxy/src/rate_limiter/leaky_bucket.rs b/proxy/src/rate_limiter/leaky_bucket.rs
index bff800f0a2f0..9645eaf725b1 100644
--- a/proxy/src/rate_limiter/leaky_bucket.rs
+++ b/proxy/src/rate_limiter/leaky_bucket.rs
@@ -2,7 +2,7 @@ use std::hash::Hash;
 use std::sync::atomic::{AtomicUsize, Ordering};
 
 use ahash::RandomState;
-use dashmap::DashMap;
+use clashmap::ClashMap;
 use rand::{thread_rng, Rng};
 use tokio::time::Instant;
 use tracing::info;
@@ -14,7 +14,7 @@ use crate::intern::EndpointIdInt;
 pub type EndpointRateLimiter = LeakyBucketRateLimiter<EndpointIdInt>;
 
 pub struct LeakyBucketRateLimiter<Key> {
-    map: DashMap<Key, LeakyBucketState, RandomState>,
+    map: ClashMap<Key, LeakyBucketState, RandomState>,
     config: utils::leaky_bucket::LeakyBucketConfig,
     access_count: AtomicUsize,
 }
@@ -27,7 +27,7 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
 
     pub fn new_with_shards(config: LeakyBucketConfig, shards: usize) -> Self {
         Self {
-            map: DashMap::with_hasher_and_shard_amount(RandomState::new(), shards),
+            map: ClashMap::with_hasher_and_shard_amount(RandomState::new(), shards),
             config: config.into(),
             access_count: AtomicUsize::new(0),
         }
@@ -58,7 +58,7 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
         let shard = thread_rng().gen_range(0..n);
         self.map.shards()[shard]
             .write()
-            .retain(|_, value| !value.get().bucket_is_empty(now));
+            .retain(|(_, value)| !value.bucket_is_empty(now));
     }
 }
 
diff --git a/proxy/src/rate_limiter/limiter.rs b/proxy/src/rate_limiter/limiter.rs
index ec080f270b77..ef6c39f230f4 100644
--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -5,7 +5,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Mutex;
 
 use anyhow::bail;
-use dashmap::DashMap;
+use clashmap::ClashMap;
 use itertools::Itertools;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
@@ -62,7 +62,7 @@ impl GlobalRateLimiter {
 pub type WakeComputeRateLimiter = BucketRateLimiter<EndpointIdInt, StdRng, RandomState>;
 
 pub struct BucketRateLimiter<Key, Rand = StdRng, Hasher = RandomState> {
-    map: DashMap<Key, Vec<RateBucket>, Hasher>,
+    map: ClashMap<Key, Vec<RateBucket>, Hasher>,
     info: Cow<'static, [RateBucketInfo]>,
     access_count: AtomicUsize,
     rand: Mutex<Rand>,
@@ -202,7 +202,7 @@ impl<K: Hash + Eq, R: Rng, S: BuildHasher + Clone> BucketRateLimiter<K, R, S> {
         info!(buckets = ?info, "endpoint rate limiter");
         Self {
             info,
-            map: DashMap::with_hasher_and_shard_amount(hasher, 64),
+            map: ClashMap::with_hasher_and_shard_amount(hasher, 64),
             access_count: AtomicUsize::new(1), // start from 1 to avoid GC on the first request
             rand: Mutex::new(rand),
         }
diff --git a/proxy/src/redis/keys.rs b/proxy/src/redis/keys.rs
index dddc7e2054db..dcb9a59f873b 100644
--- a/proxy/src/redis/keys.rs
+++ b/proxy/src/redis/keys.rs
@@ -1,7 +1,8 @@
+use std::io::ErrorKind;
+
 use anyhow::Ok;
 use pq_proto::{id_to_cancel_key, CancelKeyData};
 use serde::{Deserialize, Serialize};
-use std::io::ErrorKind;
 
 pub mod keyspace {
     pub const CANCEL_PREFIX: &str = "cancel";
diff --git a/proxy/src/redis/kv_ops.rs b/proxy/src/redis/kv_ops.rs
index dcc6aac51bb7..3689bf7ae29b 100644
--- a/proxy/src/redis/kv_ops.rs
+++ b/proxy/src/redis/kv_ops.rs
@@ -1,7 +1,6 @@
 use redis::{AsyncCommands, ToRedisArgs};
 
 use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
-
 use crate::rate_limiter::{GlobalRateLimiter, RateBucketInfo};
 
 pub struct RedisKVClient {
diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs
index 19fdd3280dfc..1a7024588aa1 100644
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -10,7 +10,7 @@ use uuid::Uuid;
 
 use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
 use crate::cache::project_info::ProjectInfoCache;
-use crate::intern::{ProjectIdInt, RoleNameInt};
+use crate::intern::{AccountIdInt, ProjectIdInt, RoleNameInt};
 use crate::metrics::{Metrics, RedisErrors, RedisEventsCount};
 
 const CPLANE_CHANNEL_NAME: &str = "neondb-proxy-ws-updates";
@@ -86,9 +86,7 @@ pub(crate) struct BlockPublicOrVpcAccessUpdated {
 
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
 pub(crate) struct AllowedVpcEndpointsUpdatedForOrg {
-    // TODO: change type once the implementation is more fully fledged.
-    // See e.g. https://github.com/neondatabase/neon/pull/10073.
-    account_id: ProjectIdInt,
+    account_id: AccountIdInt,
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
@@ -205,6 +203,24 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                         .proxy
                         .redis_events_count
                         .inc(RedisEventsCount::PasswordUpdate);
+                } else if matches!(
+                    msg,
+                    Notification::AllowedVpcEndpointsUpdatedForProjects { .. }
+                ) {
+                    Metrics::get()
+                        .proxy
+                        .redis_events_count
+                        .inc(RedisEventsCount::AllowedVpcEndpointIdsUpdateForProjects);
+                } else if matches!(msg, Notification::AllowedVpcEndpointsUpdatedForOrg { .. }) {
+                    Metrics::get()
+                        .proxy
+                        .redis_events_count
+                        .inc(RedisEventsCount::AllowedVpcEndpointIdsUpdateForAllProjectsInOrg);
+                } else if matches!(msg, Notification::BlockPublicOrVpcAccessUpdated { .. }) {
+                    Metrics::get()
+                        .proxy
+                        .redis_events_count
+                        .inc(RedisEventsCount::BlockPublicOrVpcAccessUpdate);
                 }
                 // TODO: add additional metrics for the other event types.
 
@@ -230,20 +246,26 @@ fn invalidate_cache<C: ProjectInfoCache>(cache: Arc<C>, msg: Notification) {
         Notification::AllowedIpsUpdate { allowed_ips_update } => {
             cache.invalidate_allowed_ips_for_project(allowed_ips_update.project_id);
         }
+        Notification::BlockPublicOrVpcAccessUpdated {
+            block_public_or_vpc_access_updated,
+        } => cache.invalidate_block_public_or_vpc_access_for_project(
+            block_public_or_vpc_access_updated.project_id,
+        ),
+        Notification::AllowedVpcEndpointsUpdatedForOrg {
+            allowed_vpc_endpoints_updated_for_org,
+        } => cache.invalidate_allowed_vpc_endpoint_ids_for_org(
+            allowed_vpc_endpoints_updated_for_org.account_id,
+        ),
+        Notification::AllowedVpcEndpointsUpdatedForProjects {
+            allowed_vpc_endpoints_updated_for_projects,
+        } => cache.invalidate_allowed_vpc_endpoint_ids_for_projects(
+            allowed_vpc_endpoints_updated_for_projects.project_ids,
+        ),
         Notification::PasswordUpdate { password_update } => cache
             .invalidate_role_secret_for_project(
                 password_update.project_id,
                 password_update.role_name,
             ),
-        Notification::BlockPublicOrVpcAccessUpdated { .. } => {
-            // https://github.com/neondatabase/neon/pull/10073
-        }
-        Notification::AllowedVpcEndpointsUpdatedForOrg { .. } => {
-            // https://github.com/neondatabase/neon/pull/10073
-        }
-        Notification::AllowedVpcEndpointsUpdatedForProjects { .. } => {
-            // https://github.com/neondatabase/neon/pull/10073
-        }
         Notification::UnknownTopic => unreachable!(),
     }
 }
diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs
index 6d5fb13681e9..0fb4a8a6cc70 100644
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -30,6 +30,7 @@ use crate::control_plane::locks::ApiLocks;
 use crate::control_plane::CachedNodeInfo;
 use crate::error::{ErrorKind, ReportableError, UserFacingError};
 use crate::intern::EndpointIdInt;
+use crate::protocol2::ConnectionInfoExtra;
 use crate::proxy::connect_compute::ConnectMechanism;
 use crate::proxy::retry::{CouldRetry, ShouldRetryWakeCompute};
 use crate::rate_limiter::EndpointRateLimiter;
@@ -57,23 +58,52 @@ impl PoolingBackend {
 
         let user_info = user_info.clone();
         let backend = self.auth_backend.as_ref().map(|()| user_info.clone());
-        let (allowed_ips, maybe_secret) = backend.get_allowed_ips_and_secret(ctx).await?;
+        let allowed_ips = backend.get_allowed_ips(ctx).await?;
+
         if self.config.authentication_config.ip_allowlist_check_enabled
             && !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
         {
             return Err(AuthError::ip_address_not_allowed(ctx.peer_addr()));
         }
+
+        let access_blocker_flags = backend.get_block_public_or_vpc_access(ctx).await?;
+        if self.config.authentication_config.is_vpc_acccess_proxy {
+            if access_blocker_flags.vpc_access_blocked {
+                return Err(AuthError::NetworkNotAllowed);
+            }
+
+            let extra = ctx.extra();
+            let incoming_endpoint_id = match extra {
+                None => String::new(),
+                Some(ConnectionInfoExtra::Aws { vpce_id }) => {
+                    // Convert the vcpe_id to a string
+                    String::from_utf8(vpce_id.to_vec()).unwrap_or_default()
+                }
+                Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
+            };
+
+            if incoming_endpoint_id.is_empty() {
+                return Err(AuthError::MissingVPCEndpointId);
+            }
+
+            let allowed_vpc_endpoint_ids = backend.get_allowed_vpc_endpoint_ids(ctx).await?;
+            // TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
+            if !allowed_vpc_endpoint_ids.is_empty()
+                && !allowed_vpc_endpoint_ids.contains(&incoming_endpoint_id)
+            {
+                return Err(AuthError::vpc_endpoint_id_not_allowed(incoming_endpoint_id));
+            }
+        } else if access_blocker_flags.public_access_blocked {
+            return Err(AuthError::NetworkNotAllowed);
+        }
+
         if !self
             .endpoint_rate_limiter
             .check(user_info.endpoint.clone().into(), 1)
         {
             return Err(AuthError::too_many_connections());
         }
-        let cached_secret = match maybe_secret {
-            Some(secret) => secret,
-            None => backend.get_role_secret(ctx).await?,
-        };
-
+        let cached_secret = backend.get_role_secret(ctx).await?;
         let secret = match cached_secret.value.clone() {
             Some(secret) => self.config.authentication_config.check_rate_limit(
                 ctx,
diff --git a/proxy/src/serverless/conn_pool_lib.rs b/proxy/src/serverless/conn_pool_lib.rs
index 44eac77e8f94..a300198de449 100644
--- a/proxy/src/serverless/conn_pool_lib.rs
+++ b/proxy/src/serverless/conn_pool_lib.rs
@@ -5,7 +5,7 @@ use std::sync::atomic::{self, AtomicUsize};
 use std::sync::{Arc, Weak};
 use std::time::Duration;
 
-use dashmap::DashMap;
+use clashmap::ClashMap;
 use parking_lot::RwLock;
 use postgres_client::ReadyForQueryStatus;
 use rand::Rng;
@@ -351,11 +351,11 @@ where
     //
     // That should be a fairly conteded map, so return reference to the per-endpoint
     // pool as early as possible and release the lock.
-    pub(crate) global_pool: DashMap<EndpointCacheKey, Arc<RwLock<P>>>,
+    pub(crate) global_pool: ClashMap<EndpointCacheKey, Arc<RwLock<P>>>,
 
     /// Number of endpoint-connection pools
     ///
-    /// [`DashMap::len`] iterates over all inner pools and acquires a read lock on each.
+    /// [`ClashMap::len`] iterates over all inner pools and acquires a read lock on each.
     /// That seems like far too much effort, so we're using a relaxed increment counter instead.
     /// It's only used for diagnostics.
     pub(crate) global_pool_size: AtomicUsize,
@@ -396,7 +396,7 @@ where
     pub(crate) fn new(config: &'static crate::config::HttpConfig) -> Arc<Self> {
         let shards = config.pool_options.pool_shards;
         Arc::new(Self {
-            global_pool: DashMap::with_shard_amount(shards),
+            global_pool: ClashMap::with_shard_amount(shards),
             global_pool_size: AtomicUsize::new(0),
             config,
             global_connections_count: Arc::new(AtomicUsize::new(0)),
@@ -442,10 +442,10 @@ where
             .start_timer();
         let current_len = shard.len();
         let mut clients_removed = 0;
-        shard.retain(|endpoint, x| {
+        shard.retain(|(endpoint, x)| {
             // if the current endpoint pool is unique (no other strong or weak references)
             // then it is currently not in use by any connections.
-            if let Some(pool) = Arc::get_mut(x.get_mut()) {
+            if let Some(pool) = Arc::get_mut(x) {
                 let endpoints = pool.get_mut();
                 clients_removed = endpoints.clear_closed();
 
diff --git a/proxy/src/types.rs b/proxy/src/types.rs
index 6e0bd61c9442..d5952d1d8b0a 100644
--- a/proxy/src/types.rs
+++ b/proxy/src/types.rs
@@ -97,6 +97,8 @@ smol_str_wrapper!(EndpointId);
 smol_str_wrapper!(BranchId);
 // 90% of project strings are 23 characters or less.
 smol_str_wrapper!(ProjectId);
+// 90% of account strings are 23 characters or less.
+smol_str_wrapper!(AccountId);
 
 // will usually equal endpoint ID
 smol_str_wrapper!(EndpointCacheKey);
diff --git a/proxy/src/usage_metrics.rs b/proxy/src/usage_metrics.rs
index e1cc7e87b4a4..d369e3742f82 100644
--- a/proxy/src/usage_metrics.rs
+++ b/proxy/src/usage_metrics.rs
@@ -10,9 +10,9 @@ use anyhow::{bail, Context};
 use async_compression::tokio::write::GzipEncoder;
 use bytes::Bytes;
 use chrono::{DateTime, Datelike, Timelike, Utc};
+use clashmap::mapref::entry::Entry;
+use clashmap::ClashMap;
 use consumption_metrics::{idempotency_key, Event, EventChunk, EventType, CHUNK_SIZE};
-use dashmap::mapref::entry::Entry;
-use dashmap::DashMap;
 use once_cell::sync::Lazy;
 use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};
 use serde::{Deserialize, Serialize};
@@ -137,7 +137,7 @@ type FastHasher = std::hash::BuildHasherDefault<rustc_hash::FxHasher>;
 
 #[derive(Default)]
 pub(crate) struct Metrics {
-    endpoints: DashMap<Ids, Arc<MetricCounter>, FastHasher>,
+    endpoints: ClashMap<Ids, Arc<MetricCounter>, FastHasher>,
 }
 
 impl Metrics {
@@ -213,7 +213,7 @@ pub async fn task_main(config: &MetricCollectionConfig) -> anyhow::Result<Infall
 }
 
 fn collect_and_clear_metrics<C: Clearable>(
-    endpoints: &DashMap<Ids, Arc<C>, FastHasher>,
+    endpoints: &ClashMap<Ids, Arc<C>, FastHasher>,
 ) -> Vec<(Ids, u64)> {
     let mut metrics_to_clear = Vec::new();
 
@@ -271,7 +271,7 @@ fn create_event_chunks<'a>(
 #[expect(clippy::too_many_arguments)]
 #[instrument(skip_all)]
 async fn collect_metrics_iteration(
-    endpoints: &DashMap<Ids, Arc<MetricCounter>, FastHasher>,
+    endpoints: &ClashMap<Ids, Arc<MetricCounter>, FastHasher>,
     client: &http::ClientWithMiddleware,
     metric_collection_endpoint: &reqwest::Url,
     storage: Option<&GenericRemoteStorage>,
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index 06746d3e1dd5..38a7f202ba0a 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,5 +1,5 @@
 [toolchain]
-channel = "1.84.0"
+channel = "1.84.1"
 profile = "default"
 # The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
 # https://rust-lang.github.io/rustup/concepts/profiles.html
diff --git a/safekeeper/src/wal_backup_partial.rs b/safekeeper/src/wal_backup_partial.rs
index 4e5b34a9bf65..5ecb23e8e04b 100644
--- a/safekeeper/src/wal_backup_partial.rs
+++ b/safekeeper/src/wal_backup_partial.rs
@@ -535,6 +535,10 @@ pub async fn main_task(
         // limit concurrent uploads
         let _upload_permit = tokio::select! {
             acq = limiter.acquire_partial_backup() => acq,
+            _ = backup.tli.cancel.cancelled() => {
+                info!("timeline canceled");
+                return None;
+            }
             _ = cancel.cancelled() => {
                 info!("task canceled");
                 return None;
diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml
index 9860bd5d0e44..63f43cdf62e0 100644
--- a/storage_controller/Cargo.toml
+++ b/storage_controller/Cargo.toml
@@ -32,6 +32,7 @@ postgres_connection.workspace = true
 rand.workspace = true
 reqwest = { workspace = true, features = ["stream"] }
 routerify.workspace = true
+rustls-native-certs.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 thiserror.workspace = true
@@ -39,9 +40,12 @@ tokio.workspace = true
 tokio-util.workspace = true
 tracing.workspace = true
 measured.workspace = true
+rustls.workspace = true
 scopeguard.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
+tokio-postgres.workspace = true
+tokio-postgres-rustls.workspace = true
 
 diesel = { version = "2.2.6", features = [
     "serde_json",
diff --git a/storage_controller/src/compute_hook.rs b/storage_controller/src/compute_hook.rs
index 3884a6df4601..5bc3c81f02c8 100644
--- a/storage_controller/src/compute_hook.rs
+++ b/storage_controller/src/compute_hook.rs
@@ -225,7 +225,7 @@ pub(crate) enum NotifyError {
     // We shutdown while sending
     #[error("Shutting down")]
     ShuttingDown,
-    // A response indicates we will never succeed, such as 400 or 404
+    // A response indicates we will never succeed, such as 400 or 403
     #[error("Non-retryable error {0}")]
     Fatal(StatusCode),
 
diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs
index 35eb15b29791..c4e5b3958912 100644
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -1,6 +1,7 @@
 pub(crate) mod split_state;
 use std::collections::HashMap;
 use std::str::FromStr;
+use std::sync::Arc;
 use std::time::Duration;
 use std::time::Instant;
 
@@ -9,8 +10,11 @@ use diesel::prelude::*;
 use diesel_async::async_connection_wrapper::AsyncConnectionWrapper;
 use diesel_async::pooled_connection::bb8::Pool;
 use diesel_async::pooled_connection::AsyncDieselConnectionManager;
+use diesel_async::pooled_connection::ManagerConfig;
+use diesel_async::AsyncPgConnection;
 use diesel_async::RunQueryDsl;
-use diesel_async::{AsyncConnection, AsyncPgConnection};
+use futures::future::BoxFuture;
+use futures::FutureExt;
 use itertools::Itertools;
 use pageserver_api::controller_api::AvailabilityZone;
 use pageserver_api::controller_api::MetadataHealthRecord;
@@ -23,6 +27,9 @@ use pageserver_api::shard::ShardConfigError;
 use pageserver_api::shard::ShardIdentity;
 use pageserver_api::shard::ShardStripeSize;
 use pageserver_api::shard::{ShardCount, ShardNumber, TenantShardId};
+use rustls::client::danger::{ServerCertVerified, ServerCertVerifier};
+use rustls::client::WebPkiServerVerifier;
+use rustls::crypto::ring;
 use scoped_futures::ScopedBoxFuture;
 use serde::{Deserialize, Serialize};
 use utils::generation::Generation;
@@ -156,7 +163,13 @@ impl Persistence {
     const MAX_CONNECTION_LIFETIME: Duration = Duration::from_secs(60);
 
     pub async fn new(database_url: String) -> Self {
-        let manager = AsyncDieselConnectionManager::<AsyncPgConnection>::new(database_url);
+        let mut mgr_config = ManagerConfig::default();
+        mgr_config.custom_setup = Box::new(establish_connection_rustls);
+
+        let manager = AsyncDieselConnectionManager::<AsyncPgConnection>::new_with_config(
+            database_url,
+            mgr_config,
+        );
 
         // We will use a connection pool: this is primarily to _limit_ our connection count, rather than to optimize time
         // to execute queries (database queries are not generally on latency-sensitive paths).
@@ -181,8 +194,10 @@ impl Persistence {
         timeout: Duration,
     ) -> Result<(), diesel::ConnectionError> {
         let started_at = Instant::now();
+        log_postgres_connstr_info(database_url)
+            .map_err(|e| diesel::ConnectionError::InvalidConnectionUrl(e.to_string()))?;
         loop {
-            match AsyncPgConnection::establish(database_url).await {
+            match establish_connection_rustls(database_url).await {
                 Ok(_) => {
                     tracing::info!("Connected to database.");
                     return Ok(());
@@ -1256,6 +1271,130 @@ impl Persistence {
     }
 }
 
+pub(crate) fn load_certs() -> anyhow::Result<Arc<rustls::RootCertStore>> {
+    let der_certs = rustls_native_certs::load_native_certs();
+
+    if !der_certs.errors.is_empty() {
+        anyhow::bail!("could not parse certificates: {:?}", der_certs.errors);
+    }
+
+    let mut store = rustls::RootCertStore::empty();
+    store.add_parsable_certificates(der_certs.certs);
+    Ok(Arc::new(store))
+}
+
+#[derive(Debug)]
+/// A verifier that accepts all certificates (but logs an error still)
+struct AcceptAll(Arc<WebPkiServerVerifier>);
+impl ServerCertVerifier for AcceptAll {
+    fn verify_server_cert(
+        &self,
+        end_entity: &rustls::pki_types::CertificateDer<'_>,
+        intermediates: &[rustls::pki_types::CertificateDer<'_>],
+        server_name: &rustls::pki_types::ServerName<'_>,
+        ocsp_response: &[u8],
+        now: rustls::pki_types::UnixTime,
+    ) -> Result<ServerCertVerified, rustls::Error> {
+        let r =
+            self.0
+                .verify_server_cert(end_entity, intermediates, server_name, ocsp_response, now);
+        if let Err(err) = r {
+            tracing::info!(
+                ?server_name,
+                "ignoring db connection TLS validation error: {err:?}"
+            );
+            return Ok(ServerCertVerified::assertion());
+        }
+        r
+    }
+    fn verify_tls12_signature(
+        &self,
+        message: &[u8],
+        cert: &rustls::pki_types::CertificateDer<'_>,
+        dss: &rustls::DigitallySignedStruct,
+    ) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
+        self.0.verify_tls12_signature(message, cert, dss)
+    }
+    fn verify_tls13_signature(
+        &self,
+        message: &[u8],
+        cert: &rustls::pki_types::CertificateDer<'_>,
+        dss: &rustls::DigitallySignedStruct,
+    ) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
+        self.0.verify_tls13_signature(message, cert, dss)
+    }
+    fn supported_verify_schemes(&self) -> Vec<rustls::SignatureScheme> {
+        self.0.supported_verify_schemes()
+    }
+}
+
+/// Loads the root certificates and constructs a client config suitable for connecting.
+/// This function is blocking.
+fn client_config_with_root_certs() -> anyhow::Result<rustls::ClientConfig> {
+    let client_config =
+        rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))
+            .with_safe_default_protocol_versions()
+            .expect("ring should support the default protocol versions");
+    static DO_CERT_CHECKS: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
+    let do_cert_checks =
+        DO_CERT_CHECKS.get_or_init(|| std::env::var("STORCON_DB_CERT_CHECKS").is_ok());
+    Ok(if *do_cert_checks {
+        client_config
+            .with_root_certificates(load_certs()?)
+            .with_no_client_auth()
+    } else {
+        let verifier = AcceptAll(
+            WebPkiServerVerifier::builder_with_provider(
+                load_certs()?,
+                Arc::new(ring::default_provider()),
+            )
+            .build()?,
+        );
+        client_config
+            .dangerous()
+            .with_custom_certificate_verifier(Arc::new(verifier))
+            .with_no_client_auth()
+    })
+}
+
+fn establish_connection_rustls(config: &str) -> BoxFuture<ConnectionResult<AsyncPgConnection>> {
+    let fut = async {
+        // We first set up the way we want rustls to work.
+        let rustls_config = client_config_with_root_certs()
+            .map_err(|err| ConnectionError::BadConnection(format!("{err:?}")))?;
+        let tls = tokio_postgres_rustls::MakeRustlsConnect::new(rustls_config);
+        let (client, conn) = tokio_postgres::connect(config, tls)
+            .await
+            .map_err(|e| ConnectionError::BadConnection(e.to_string()))?;
+
+        AsyncPgConnection::try_from_client_and_connection(client, conn).await
+    };
+    fut.boxed()
+}
+
+#[cfg_attr(test, test)]
+fn test_config_debug_censors_password() {
+    let has_pw =
+        "host=/var/lib/postgresql,localhost port=1234 user=specialuser password='NOT ALLOWED TAG'";
+    let has_pw_cfg = has_pw.parse::<tokio_postgres::Config>().unwrap();
+    assert!(format!("{has_pw_cfg:?}").contains("specialuser"));
+    // Ensure that the password is not leaked by the debug impl
+    assert!(!format!("{has_pw_cfg:?}").contains("NOT ALLOWED TAG"));
+}
+
+fn log_postgres_connstr_info(config_str: &str) -> anyhow::Result<()> {
+    let config = config_str
+        .parse::<tokio_postgres::Config>()
+        .map_err(|_e| anyhow::anyhow!("Couldn't parse config str"))?;
+    // We use debug formatting here, and use a unit test to ensure that we don't leak the password.
+    // To make extra sure the test gets ran, run it every time the function is called
+    // (this is rather cold code, we can afford it).
+    #[cfg(not(test))]
+    test_config_debug_censors_password();
+    tracing::info!("database connection config: {config:?}");
+    Ok(())
+}
+
 /// Parts of [`crate::tenant_shard::TenantShard`] that are stored durably
 #[derive(
     QueryableByName, Queryable, Selectable, Insertable, Serialize, Deserialize, Clone, Eq, PartialEq,
diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs
index 03db94726315..58bc0ba1cd86 100644
--- a/storage_controller/src/reconciler.rs
+++ b/storage_controller/src/reconciler.rs
@@ -115,6 +115,15 @@ impl ReconcilerConfigBuilder {
         }
     }
 
+    pub(crate) fn tenant_creation_hint(self, hint: bool) -> Self {
+        Self {
+            config: ReconcilerConfig {
+                tenant_creation_hint: hint,
+                ..self.config
+            },
+        }
+    }
+
     pub(crate) fn build(self) -> ReconcilerConfig {
         self.config
     }
@@ -129,6 +138,10 @@ pub(crate) struct ReconcilerConfig {
     // During live migrations this is the amount of time that
     // the pagserver will hold our poll.
     secondary_download_request_timeout: Option<Duration>,
+
+    // A hint indicating whether this reconciliation is done on the
+    // creation of a new tenant. This only informs logging behaviour.
+    tenant_creation_hint: bool,
 }
 
 impl ReconcilerConfig {
@@ -143,6 +156,10 @@ impl ReconcilerConfig {
         self.secondary_download_request_timeout
             .unwrap_or(SECONDARY_DOWNLOAD_REQUEST_TIMEOUT_DEFAULT)
     }
+
+    pub(crate) fn tenant_creation_hint(&self) -> bool {
+        self.tenant_creation_hint
+    }
 }
 
 /// RAII resource units granted to a Reconciler, which it should keep alive until it finishes doing I/O
@@ -934,16 +951,35 @@ impl Reconciler {
                 )
                 .await;
             if let Err(e) = &result {
+                // Set this flag so that in our ReconcileResult we will set the flag on the shard that it
+                // needs to retry at some point.
+                self.compute_notify_failure = true;
+
                 // It is up to the caller whether they want to drop out on this error, but they don't have to:
                 // in general we should avoid letting unavailability of the cloud control plane stop us from
                 // making progress.
-                if !matches!(e, NotifyError::ShuttingDown) {
-                    tracing::warn!("Failed to notify compute of attached pageserver {node}: {e}");
+                match e {
+                    // 404s from cplane during tenant creation are expected.
+                    // Cplane only persists the shards to the database after
+                    // creating the tenant and the timeline. If we notify before
+                    // that, we'll get a 404.
+                    //
+                    // This is fine because tenant creations happen via /location_config
+                    // and that returns the list of locations in the response. Hence, we
+                    // silence the error and return Ok(()) here. Reconciliation will still
+                    // be retried because we set [`Reconciler::compute_notify_failure`] above.
+                    NotifyError::Unexpected(hyper::StatusCode::NOT_FOUND)
+                        if self.reconciler_config.tenant_creation_hint() =>
+                    {
+                        return Ok(());
+                    }
+                    NotifyError::ShuttingDown => {}
+                    _ => {
+                        tracing::warn!(
+                            "Failed to notify compute of attached pageserver {node}: {e}"
+                        );
+                    }
                 }
-
-                // Set this flag so that in our ReconcileResult we will set the flag on the shard that it
-                // needs to retry at some point.
-                self.compute_notify_failure = true;
             }
             result
         } else {
diff --git a/storage_controller/src/scheduler.rs b/storage_controller/src/scheduler.rs
index f5cab9dd5746..f9e72862ae3d 100644
--- a/storage_controller/src/scheduler.rs
+++ b/storage_controller/src/scheduler.rs
@@ -774,8 +774,9 @@ impl Scheduler {
 
         if !matches!(context.mode, ScheduleMode::Speculative) {
             tracing::info!(
-            "scheduler selected node {node_id} (elegible nodes {:?}, hard exclude: {hard_exclude:?}, soft exclude: {context:?})",
-            scores.iter().map(|i| i.node_id().0).collect::<Vec<_>>()
+            "scheduler selected node {node_id} (elegible nodes {:?}, hard exclude: {hard_exclude:?}, soft exclude: {context:?}, preferred_az: {:?})",
+            scores.iter().map(|i| i.node_id().0).collect::<Vec<_>>(),
+            preferred_az,
        );
         }
 
diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index 9ac9ee17cad4..4028cd702343 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -2238,9 +2238,14 @@ impl Service {
         let waiters = {
             let mut locked = self.inner.write().unwrap();
             let (nodes, tenants, _scheduler) = locked.parts_mut();
+            let config = ReconcilerConfigBuilder::new()
+                .tenant_creation_hint(true)
+                .build();
             tenants
                 .range_mut(TenantShardId::tenant_range(tenant_id))
-                .filter_map(|(_shard_id, shard)| self.maybe_reconcile_shard(shard, nodes))
+                .filter_map(|(_shard_id, shard)| {
+                    self.maybe_configured_reconcile_shard(shard, nodes, config)
+                })
                 .collect::<Vec<_>>()
         };
 
diff --git a/storage_controller/src/service/chaos_injector.rs b/storage_controller/src/service/chaos_injector.rs
index 98034421d6ea..91d7183fde8d 100644
--- a/storage_controller/src/service/chaos_injector.rs
+++ b/storage_controller/src/service/chaos_injector.rs
@@ -96,28 +96,37 @@ impl ChaosInjector {
         let batch_size = 128;
         let mut inner = self.service.inner.write().unwrap();
         let (nodes, tenants, scheduler) = inner.parts_mut();
-        let tenant_ids = tenants.keys().cloned().collect::<Vec<_>>();
 
         // Prefer to migrate tenants that are currently outside their home AZ.  This avoids the chaos injector
         // continuously pushing tenants outside their home AZ: instead, we'll tend to cycle between picking some
         // random tenants to move, and then on next chaos iteration moving them back, then picking some new
         // random tenants on the next iteration.
-        let mut victims = Vec::with_capacity(batch_size);
-        for shard in tenants.values() {
-            if shard.is_attached_outside_preferred_az(nodes) {
-                victims.push(shard.tenant_shard_id);
-            }
+        let (out_of_home_az, in_home_az): (Vec<_>, Vec<_>) = tenants
+            .values()
+            .map(|shard| {
+                (
+                    shard.tenant_shard_id,
+                    shard.is_attached_outside_preferred_az(nodes),
+                )
+            })
+            .partition(|(_id, is_outside)| *is_outside);
+
+        let mut out_of_home_az: Vec<_> = out_of_home_az.into_iter().map(|(id, _)| id).collect();
+        let mut in_home_az: Vec<_> = in_home_az.into_iter().map(|(id, _)| id).collect();
 
-            if victims.len() >= batch_size {
-                break;
-            }
-        }
+        let mut victims = Vec::with_capacity(batch_size);
+        if out_of_home_az.len() >= batch_size {
+            tracing::info!("Injecting chaos: found {batch_size} shards to migrate back to home AZ (total {} out of home AZ)", out_of_home_az.len());
 
-        let choose_random = batch_size.saturating_sub(victims.len());
-        tracing::info!("Injecting chaos: found {} shards to migrate back to home AZ, picking {choose_random} random shards to migrate", victims.len());
+            out_of_home_az.shuffle(&mut thread_rng());
+            victims.extend(out_of_home_az.into_iter().take(batch_size));
+        } else {
+            tracing::info!("Injecting chaos: found {} shards to migrate back to home AZ, picking {} random shards to migrate", out_of_home_az.len(), std::cmp::min(batch_size - out_of_home_az.len(), in_home_az.len()));
 
-        let random_victims = tenant_ids.choose_multiple(&mut thread_rng(), choose_random);
-        victims.extend(random_victims);
+            victims.extend(out_of_home_az);
+            in_home_az.shuffle(&mut thread_rng());
+            victims.extend(in_home_az.into_iter().take(batch_size - victims.len()));
+        }
 
         for victim in victims {
             self.maybe_migrate_to_secondary(victim, nodes, tenants, scheduler);
diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs
index cbc2696b260d..219c0dffe7c6 100644
--- a/storage_controller/src/tenant_shard.rs
+++ b/storage_controller/src/tenant_shard.rs
@@ -707,10 +707,15 @@ impl TenantShard {
                 if let Some(node_id) = self.intent.get_attached() {
                     // Populate secondary by demoting the attached node
                     self.intent.demote_attached(scheduler, *node_id);
+
                     modified = true;
                 } else if self.intent.secondary.is_empty() {
                     // Populate secondary by scheduling a fresh node
-                    let node_id = scheduler.schedule_shard::<SecondaryShardTag>(
+                    //
+                    // We use [`AttachedShardTag`] because when a secondary location is the only one
+                    // a shard has, we expect that its next use will be as an attached location: we want
+                    // the tenant to be ready to warm up and run fast in their preferred AZ.
+                    let node_id = scheduler.schedule_shard::<AttachedShardTag>(
                         &[],
                         &self.intent.preferred_az_id,
                         context,
@@ -719,9 +724,17 @@ impl TenantShard {
                     modified = true;
                 }
                 while self.intent.secondary.len() > 1 {
-                    // We have no particular preference for one secondary location over another: just
-                    // arbitrarily drop from the end
-                    self.intent.pop_secondary(scheduler);
+                    // If we have multiple secondaries (e.g. when transitioning from Attached to Secondary and
+                    // having just demoted our attached location), then we should prefer to keep the location
+                    // in our preferred AZ.  Tenants in Secondary mode want to be in the preferred AZ so that
+                    // they have a warm location to become attached when transitioning back into Attached.
+
+                    let mut candidates = self.intent.get_secondary().clone();
+                    // Sort to get secondaries outside preferred AZ last
+                    candidates
+                        .sort_by_key(|n| scheduler.get_node_az(n).as_ref() != self.preferred_az());
+                    let secondary_to_remove = candidates.pop().unwrap();
+                    self.intent.remove_secondary(scheduler, secondary_to_remove);
                     modified = true;
                 }
             }
@@ -967,24 +980,51 @@ impl TenantShard {
                         ),
                     )
                 })
-                .collect::<Vec<_>>();
+                .collect::<HashMap<_, _>>();
 
             if secondary_scores.iter().any(|score| score.1.is_none()) {
-                // Don't have full list of scores, so can't make a good decision about which to drop unless
-                // there is an obvious one in the wrong AZ
-                for secondary in self.intent.get_secondary() {
-                    if scheduler.get_node_az(secondary) == self.intent.preferred_az_id {
+                // Trivial case: if we only have one secondary, drop that one
+                if self.intent.get_secondary().len() == 1 {
+                    return Some(ScheduleOptimization {
+                        sequence: self.sequence,
+                        action: ScheduleOptimizationAction::RemoveSecondary(
+                            *self.intent.get_secondary().first().unwrap(),
+                        ),
+                    });
+                }
+
+                // Try to find a "good" secondary to keep, without relying on scores (one or more nodes is in a state
+                // where its score can't be calculated), and drop the others.  This enables us to make progress in
+                // most cases, even if some nodes are offline or have scheduling=pause set.
+
+                debug_assert!(self.intent.attached.is_some()); // We should not make it here unless attached -- this
+                                                               // logic presumes we are in a mode where we want secondaries to be in non-home AZ
+                if let Some(retain_secondary) = self.intent.get_secondary().iter().find(|n| {
+                    let in_home_az = scheduler.get_node_az(n) == self.intent.preferred_az_id;
+                    let is_available = secondary_scores
+                        .get(n)
+                        .expect("Built from same list of nodes")
+                        .is_some();
+                    is_available && !in_home_az
+                }) {
+                    // Great, we found one to retain.  Pick some other to drop.
+                    if let Some(victim) = self
+                        .intent
+                        .get_secondary()
+                        .iter()
+                        .find(|n| n != &retain_secondary)
+                    {
                         return Some(ScheduleOptimization {
                             sequence: self.sequence,
-                            action: ScheduleOptimizationAction::RemoveSecondary(*secondary),
+                            action: ScheduleOptimizationAction::RemoveSecondary(*victim),
                         });
                     }
                 }
 
                 // Fall through: we didn't identify one to remove.  This ought to be rare.
                 tracing::warn!("Keeping extra secondaries: can't determine which of {:?} to remove (some nodes offline?)",
-                self.intent.get_secondary()
-            );
+                    self.intent.get_secondary()
+                );
             } else {
                 let victim = secondary_scores
                     .iter()
@@ -993,7 +1033,7 @@ impl TenantShard {
                     .0;
                 return Some(ScheduleOptimization {
                     sequence: self.sequence,
-                    action: ScheduleOptimizationAction::RemoveSecondary(victim),
+                    action: ScheduleOptimizationAction::RemoveSecondary(*victim),
                 });
             }
         }
@@ -1079,12 +1119,31 @@ impl TenantShard {
                 None => vec![],
             };
 
-            let replacement = self.find_better_location::<SecondaryShardTag>(
-                scheduler,
-                &schedule_context,
-                *secondary,
-                &exclude,
-            );
+            let replacement = match &self.policy {
+                PlacementPolicy::Attached(_) => {
+                    // Secondaries for an attached shard should be scheduled using `SecondaryShardTag`
+                    // to avoid placing them in the preferred AZ.
+                    self.find_better_location::<SecondaryShardTag>(
+                        scheduler,
+                        &schedule_context,
+                        *secondary,
+                        &exclude,
+                    )
+                }
+                PlacementPolicy::Secondary => {
+                    // In secondary-only mode, we want our secondary locations in the preferred AZ,
+                    // so that they're ready to take over as an attached location when we transition
+                    // into PlacementPolicy::Attached.
+                    self.find_better_location::<AttachedShardTag>(
+                        scheduler,
+                        &schedule_context,
+                        *secondary,
+                        &exclude,
+                    )
+                }
+                PlacementPolicy::Detached => None,
+            };
+
             assert!(replacement != Some(*secondary));
             if let Some(replacement) = replacement {
                 // We have found a candidate and confirmed that its score is preferable
@@ -1806,7 +1865,7 @@ impl TenantShard {
                         .get(&node_id)
                         .expect("referenced node exists")
                         .get_availability_zone_id(),
-                ) == self.intent.preferred_az_id.as_ref()
+                ) != self.intent.preferred_az_id.as_ref()
             })
             .unwrap_or(false)
     }
@@ -2348,6 +2407,110 @@ pub(crate) mod tests {
         Ok(())
     }
 
+    /// Test how the optimisation code behaves with an extra secondary
+    #[test]
+    fn optimize_removes_secondary() -> anyhow::Result<()> {
+        let az_a_tag = AvailabilityZone("az-a".to_string());
+        let az_b_tag = AvailabilityZone("az-b".to_string());
+        let mut nodes = make_test_nodes(
+            4,
+            &[
+                az_a_tag.clone(),
+                az_b_tag.clone(),
+                az_a_tag.clone(),
+                az_b_tag.clone(),
+            ],
+        );
+        let mut scheduler = Scheduler::new(nodes.values());
+
+        let mut schedule_context = ScheduleContext::default();
+
+        let mut shard_a = make_test_tenant_shard(PlacementPolicy::Attached(1));
+        shard_a.intent.preferred_az_id = Some(az_a_tag.clone());
+        shard_a
+            .schedule(&mut scheduler, &mut schedule_context)
+            .unwrap();
+
+        // Attached on node 1, secondary on node 2
+        assert_eq!(shard_a.intent.get_attached(), &Some(NodeId(1)));
+        assert_eq!(shard_a.intent.get_secondary(), &vec![NodeId(2)]);
+
+        // Initially optimiser is idle
+        assert_eq!(
+            shard_a.optimize_attachment(&mut scheduler, &schedule_context),
+            None
+        );
+        assert_eq!(
+            shard_a.optimize_secondary(&mut scheduler, &schedule_context),
+            None
+        );
+
+        // A spare secondary in the home AZ: it should be removed -- this is the situation when we're midway through a graceful migration, after cutting over
+        // to our new location
+        shard_a.intent.push_secondary(&mut scheduler, NodeId(3));
+        let optimization = shard_a.optimize_attachment(&mut scheduler, &schedule_context);
+        assert_eq!(
+            optimization,
+            Some(ScheduleOptimization {
+                sequence: shard_a.sequence,
+                action: ScheduleOptimizationAction::RemoveSecondary(NodeId(3))
+            })
+        );
+        shard_a.apply_optimization(&mut scheduler, optimization.unwrap());
+
+        // A spare secondary in the non-home AZ, and one of them is offline
+        shard_a.intent.push_secondary(&mut scheduler, NodeId(4));
+        nodes
+            .get_mut(&NodeId(4))
+            .unwrap()
+            .set_availability(NodeAvailability::Offline);
+        scheduler.node_upsert(nodes.get(&NodeId(4)).unwrap());
+        let optimization = shard_a.optimize_attachment(&mut scheduler, &schedule_context);
+        assert_eq!(
+            optimization,
+            Some(ScheduleOptimization {
+                sequence: shard_a.sequence,
+                action: ScheduleOptimizationAction::RemoveSecondary(NodeId(4))
+            })
+        );
+        shard_a.apply_optimization(&mut scheduler, optimization.unwrap());
+
+        // A spare secondary when should have none
+        shard_a.policy = PlacementPolicy::Attached(0);
+        let optimization = shard_a.optimize_attachment(&mut scheduler, &schedule_context);
+        assert_eq!(
+            optimization,
+            Some(ScheduleOptimization {
+                sequence: shard_a.sequence,
+                action: ScheduleOptimizationAction::RemoveSecondary(NodeId(2))
+            })
+        );
+        shard_a.apply_optimization(&mut scheduler, optimization.unwrap());
+        assert_eq!(shard_a.intent.get_attached(), &Some(NodeId(1)));
+        assert_eq!(shard_a.intent.get_secondary(), &vec![]);
+
+        // Check that in secondary mode, we preserve the secondary in the preferred AZ
+        let mut schedule_context = ScheduleContext::default(); // Fresh context, we're about to call schedule()
+        shard_a.policy = PlacementPolicy::Secondary;
+        shard_a
+            .schedule(&mut scheduler, &mut schedule_context)
+            .unwrap();
+        assert_eq!(shard_a.intent.get_attached(), &None);
+        assert_eq!(shard_a.intent.get_secondary(), &vec![NodeId(1)]);
+        assert_eq!(
+            shard_a.optimize_attachment(&mut scheduler, &schedule_context),
+            None
+        );
+        assert_eq!(
+            shard_a.optimize_secondary(&mut scheduler, &schedule_context),
+            None
+        );
+
+        shard_a.intent.clear(&mut scheduler);
+
+        Ok(())
+    }
+
     // Optimize til quiescent: this emulates what Service::optimize_all does, when
     // called repeatedly in the background.
     // Returns the applied optimizations
@@ -2687,4 +2850,108 @@ pub(crate) mod tests {
         }
         Ok(())
     }
+
+    /// Check how the shard's scheduling behaves when in PlacementPolicy::Secondary mode.
+    #[test]
+    fn tenant_secondary_scheduling() -> anyhow::Result<()> {
+        let az_a = AvailabilityZone("az-a".to_string());
+        let nodes = make_test_nodes(
+            3,
+            &[
+                az_a.clone(),
+                AvailabilityZone("az-b".to_string()),
+                AvailabilityZone("az-c".to_string()),
+            ],
+        );
+
+        let mut scheduler = Scheduler::new(nodes.values());
+        let mut context = ScheduleContext::default();
+
+        let mut tenant_shard = make_test_tenant_shard(PlacementPolicy::Secondary);
+        tenant_shard.intent.preferred_az_id = Some(az_a.clone());
+        tenant_shard
+            .schedule(&mut scheduler, &mut context)
+            .expect("we have enough nodes, scheduling should work");
+        assert_eq!(tenant_shard.intent.secondary.len(), 1);
+        assert!(tenant_shard.intent.attached.is_none());
+
+        // Should have scheduled into the preferred AZ
+        assert_eq!(
+            scheduler
+                .get_node_az(&tenant_shard.intent.secondary[0])
+                .as_ref(),
+            tenant_shard.preferred_az()
+        );
+
+        // Optimizer should agree
+        assert_eq!(
+            tenant_shard.optimize_attachment(&mut scheduler, &context),
+            None
+        );
+        assert_eq!(
+            tenant_shard.optimize_secondary(&mut scheduler, &context),
+            None
+        );
+
+        // Switch to PlacementPolicy::Attached
+        tenant_shard.policy = PlacementPolicy::Attached(1);
+        tenant_shard
+            .schedule(&mut scheduler, &mut context)
+            .expect("we have enough nodes, scheduling should work");
+        assert_eq!(tenant_shard.intent.secondary.len(), 1);
+        assert!(tenant_shard.intent.attached.is_some());
+        // Secondary should now be in non-preferred AZ
+        assert_ne!(
+            scheduler
+                .get_node_az(&tenant_shard.intent.secondary[0])
+                .as_ref(),
+            tenant_shard.preferred_az()
+        );
+        // Attached should be in preferred AZ
+        assert_eq!(
+            scheduler
+                .get_node_az(&tenant_shard.intent.attached.unwrap())
+                .as_ref(),
+            tenant_shard.preferred_az()
+        );
+
+        // Optimizer should agree
+        assert_eq!(
+            tenant_shard.optimize_attachment(&mut scheduler, &context),
+            None
+        );
+        assert_eq!(
+            tenant_shard.optimize_secondary(&mut scheduler, &context),
+            None
+        );
+
+        // Switch back to PlacementPolicy::Secondary
+        tenant_shard.policy = PlacementPolicy::Secondary;
+        tenant_shard
+            .schedule(&mut scheduler, &mut context)
+            .expect("we have enough nodes, scheduling should work");
+        assert_eq!(tenant_shard.intent.secondary.len(), 1);
+        assert!(tenant_shard.intent.attached.is_none());
+        // When we picked a location to keep, we should have kept the one in the preferred AZ
+        assert_eq!(
+            scheduler
+                .get_node_az(&tenant_shard.intent.secondary[0])
+                .as_ref(),
+            tenant_shard.preferred_az()
+        );
+
+        // Optimizer should agree
+        assert_eq!(
+            tenant_shard.optimize_attachment(&mut scheduler, &context),
+            None
+        );
+        assert_eq!(
+            tenant_shard.optimize_secondary(&mut scheduler, &context),
+            None
+        );
+
+        tenant_shard.intent.clear(&mut scheduler);
+
+        Ok(())
+    }
 }
diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py
index fd7e193778bb..83a1a8761153 100644
--- a/test_runner/fixtures/metrics.py
+++ b/test_runner/fixtures/metrics.py
@@ -158,6 +158,9 @@ def counter(name: str) -> str:
     "pageserver_pitr_history_size",
     "pageserver_layer_bytes",
     "pageserver_layer_count",
+    "pageserver_layers_per_read_bucket",
+    "pageserver_layers_per_read_count",
+    "pageserver_layers_per_read_sum",
     "pageserver_visible_physical_size",
     "pageserver_storage_operations_seconds_count_total",
     "pageserver_storage_operations_seconds_sum_total",
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 7e3cc1982960..7c4991ffabda 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -2766,6 +2766,11 @@ def read_tenant_location_conf(
             log.error(f"Failed to decode LocationConf, raw content ({len(bytes)} bytes): {bytes}")
             raise
 
+    def heatmap_content(self, tenant_shard_id: TenantId | TenantShardId) -> Any:
+        path = self.tenant_dir(tenant_shard_id) / "heatmap-v1.json"
+        with open(path) as f:
+            return json.load(f)
+
     def tenant_create(
         self,
         tenant_id: TenantId,
@@ -4996,13 +5001,35 @@ def check_restored_datadir_content(
     assert (mismatch, error) == ([], [])
 
 
+# wait for subscriber to catch up with publisher
 def logical_replication_sync(
     subscriber: PgProtocol,
     publisher: PgProtocol,
+    # pass subname explicitly to avoid confusion
+    # when multiple subscriptions are present
+    subname: str,
     sub_dbname: str | None = None,
     pub_dbname: str | None = None,
-) -> Lsn:
+):
     """Wait logical replication subscriber to sync with publisher."""
+
+    def initial_sync():
+        # first check if the subscription is active `s`=`synchronized`, `r` = `ready`
+        query = f"""SELECT 1 FROM pg_subscription_rel join pg_catalog.pg_subscription
+                    on pg_subscription_rel.srsubid = pg_subscription.oid
+                    WHERE srsubstate NOT IN ('r', 's') and subname='{subname}'"""
+
+        if sub_dbname is not None:
+            res = subscriber.safe_psql(query, dbname=sub_dbname)
+        else:
+            res = subscriber.safe_psql(query)
+
+        assert (res is None) or (len(res) == 0)
+
+    wait_until(initial_sync)
+
+    # wait for the subscription to catch up with current state of publisher
+    # caller is responsible to call checkpoint before calling this function
     if pub_dbname is not None:
         publisher_lsn = Lsn(
             publisher.safe_psql("SELECT pg_current_wal_flush_lsn()", dbname=pub_dbname)[0][0]
@@ -5010,23 +5037,23 @@ def logical_replication_sync(
     else:
         publisher_lsn = Lsn(publisher.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
 
-    while True:
+    def subscriber_catch_up():
+        query = f"select latest_end_lsn from pg_catalog.pg_stat_subscription where latest_end_lsn is NOT NULL and subname='{subname}'"
+
         if sub_dbname is not None:
-            res = subscriber.safe_psql(
-                "select latest_end_lsn from pg_catalog.pg_stat_subscription", dbname=sub_dbname
-            )[0][0]
+            res = subscriber.safe_psql(query, dbname=sub_dbname)
         else:
-            res = subscriber.safe_psql(
-                "select latest_end_lsn from pg_catalog.pg_stat_subscription"
-            )[0][0]
-
-        if res:
-            log.info(f"subscriber_lsn={res}")
-            subscriber_lsn = Lsn(res)
-            log.info(f"Subscriber LSN={subscriber_lsn}, publisher LSN={publisher_lsn}")
-            if subscriber_lsn >= publisher_lsn:
-                return subscriber_lsn
-        time.sleep(0.5)
+            res = subscriber.safe_psql(query)
+
+        assert res is not None
+
+        res_lsn = res[0][0]
+        log.info(f"subscriber_lsn={res_lsn}")
+        subscriber_lsn = Lsn(res_lsn)
+        log.info(f"Subscriber LSN={subscriber_lsn}, publisher LSN={publisher_lsn}")
+        assert subscriber_lsn >= publisher_lsn
+
+    wait_until(subscriber_catch_up)
 
 
 def tenant_get_shards(
diff --git a/test_runner/logical_repl/README.md b/test_runner/logical_repl/README.md
index 8eca056dda23..449e56e21df6 100644
--- a/test_runner/logical_repl/README.md
+++ b/test_runner/logical_repl/README.md
@@ -1,13 +1,18 @@
 # Logical replication tests
 
+> [!NOTE]
+> Neon project should have logical replication enabled:
+>
+> https://neon.tech/docs/guides/logical-replication-postgres#enable-logical-replication-in-the-source-neon-project
+
 ## Clickhouse
 
 ```bash
 export BENCHMARK_CONNSTR=postgres://user:pass@ep-abc-xyz-123.us-east-2.aws.neon.build/neondb
 
-docker compose -f clickhouse/docker-compose.yml up -d
-pytest -m remote_cluster -k test_clickhouse
-docker compose -f clickhouse/docker-compose.yml down
+docker compose -f test_runner/logical_repl/clickhouse/docker-compose.yml up -d
+./scripts/pytest -m remote_cluster -k test_clickhouse
+docker compose -f test_runner/logical_repl/clickhouse/docker-compose.yml down
 ```
 
 ## Debezium
@@ -15,8 +20,7 @@ docker compose -f clickhouse/docker-compose.yml down
 ```bash
 export BENCHMARK_CONNSTR=postgres://user:pass@ep-abc-xyz-123.us-east-2.aws.neon.build/neondb
 
-docker compose -f debezium/docker-compose.yml up -d
-pytest -m remote_cluster -k test_debezium
-docker compose -f debezium/docker-compose.yml down
-
-```
\ No newline at end of file
+docker compose -f test_runner/logical_repl/debezium/docker-compose.yml up -d
+./scripts/pytest -m remote_cluster -k test_debezium
+docker compose -f test_runner/logical_repl/debezium/docker-compose.yml down
+```
diff --git a/test_runner/performance/test_layer_map.py b/test_runner/performance/test_layer_map.py
index efc7fa59dbea..6c009440058c 100644
--- a/test_runner/performance/test_layer_map.py
+++ b/test_runner/performance/test_layer_map.py
@@ -34,16 +34,20 @@ def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark):
     cur.execute("set log_statement = 'all'")
     cur.execute("create table t(x integer)")
     for _ in range(n_iters):
-        cur.execute(f"insert into t values (generate_series(1,{n_records}))")
+        with zenbenchmark.record_duration(f"insert into t values (generate_series(1,{n_records}))"):
+            cur.execute(f"insert into t values (generate_series(1,{n_records}))")
         time.sleep(1)
 
-    cur.execute("vacuum t")
+    with zenbenchmark.record_duration("vacuum t"):
+        cur.execute("vacuum t")
 
-    with zenbenchmark.record_duration("test_query"):
+    with zenbenchmark.record_duration("SELECT count(*) from t"):
         cur.execute("SELECT count(*) from t")
         assert cur.fetchone() == (n_iters * n_records,)
 
-    flush_ep_to_pageserver(env, endpoint, tenant, timeline)
-    env.pageserver.http_client().timeline_checkpoint(
-        tenant, timeline, compact=False, wait_until_uploaded=True
-    )
+    with zenbenchmark.record_duration("flush_ep_to_pageserver"):
+        flush_ep_to_pageserver(env, endpoint, tenant, timeline)
+    with zenbenchmark.record_duration("timeline_checkpoint"):
+        env.pageserver.http_client().timeline_checkpoint(
+            tenant, timeline, compact=False, wait_until_uploaded=True
+        )
diff --git a/test_runner/performance/test_logical_replication.py b/test_runner/performance/test_logical_replication.py
index 9d653d1a1ef8..fdc56cc496e5 100644
--- a/test_runner/performance/test_logical_replication.py
+++ b/test_runner/performance/test_logical_replication.py
@@ -44,13 +44,13 @@ def test_logical_replication(neon_simple_env: NeonEnv, pg_bin: PgBin, vanilla_pg
     vanilla_pg.safe_psql(f"create subscription sub1 connection '{connstr}' publication pub1")
 
     # Wait logical replication channel to be established
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
 
     pg_bin.run_capture(["pgbench", "-c10", "-T100", "-Mprepared", endpoint.connstr()])
 
     # Wait logical replication to sync
     start = time.time()
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
     log.info(f"Sync with master took {time.time() - start} seconds")
 
     sum_master = cast("int", endpoint.safe_psql("select sum(abalance) from pgbench_accounts")[0][0])
diff --git a/test_runner/performance/test_sharding_autosplit.py b/test_runner/performance/test_sharding_autosplit.py
index 76c3ad01a4a7..e5a9f17da8c2 100644
--- a/test_runner/performance/test_sharding_autosplit.py
+++ b/test_runner/performance/test_sharding_autosplit.py
@@ -247,7 +247,7 @@ def assert_all_split():
         log.info(f"{shard_zero_id} timeline: {timeline_info}")
 
     # Run compaction for all tenants, restart endpoint so that on subsequent reads we will
-    # definitely hit pageserver for reads.  This compaction passis expected to drop unwanted
+    # definitely hit pageserver for reads.  This compaction pass is expected to drop unwanted
     # layers but not do any rewrites (we're still in the same generation)
     for tenant_id, tenant_state in tenants.items():
         tenant_state.endpoint.stop()
@@ -296,6 +296,16 @@ def assert_all_split():
         for fut in pgbench_futs:
             fut.result()
 
+    # Run a full forced compaction, to detect any data corruption.
+    for tenant_id, tenant_state in tenants.items():
+        for shard_id, shard_ps in tenant_get_shards(env, tenant_id):
+            shard_ps.http_client().timeline_compact(
+                shard_id,
+                tenant_state.timeline_id,
+                force_image_layer_creation=True,
+                force_l0_compaction=True,
+            )
+
     # Assert that some rewrites happened
     # TODO: uncomment this after https://github.com/neondatabase/neon/pull/7531 is merged
     # assert any(ps.log_contains(".*Rewriting layer after shard split.*") for ps in env.pageservers)
diff --git a/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock b/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock
index 354fc1574510..0b138bf1677a 100644
--- a/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock
+++ b/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock
@@ -421,9 +421,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
 
 [[package]]
 name = "openssl"
-version = "0.10.66"
+version = "0.10.70"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1"
+checksum = "61cfb4e166a8bb8c9b55c500bc2308550148ece889be90f609377e58140f42c6"
 dependencies = [
  "bitflags 2.6.0",
  "cfg-if",
@@ -453,9 +453,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
 
 [[package]]
 name = "openssl-sys"
-version = "0.9.103"
+version = "0.9.105"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6"
+checksum = "8b22d5b84be05a8d6947c7cb71f7c849aa0f112acd4bf51c2a7c1c988ac0a9dc"
 dependencies = [
  "cc",
  "libc",
diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py
index e88d245c8ff4..a4b9eabf8e63 100644
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -184,6 +184,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
         "gc_compaction_enabled": True,
         "gc_compaction_initial_threshold_kb": 1024000,
         "gc_compaction_ratio_percent": 200,
+        "image_creation_preempt_threshold": 5,
     }
 
     vps_http = env.storage_controller.pageserver_api()
diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py
index 2edfc884add6..f3347b594e32 100644
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -29,6 +29,21 @@
     # "lsn_lease_length": "0s", -- TODO: would cause branch creation errors, should fix later
 }
 
+PREEMPT_COMPACTION_TENANT_CONF = {
+    "gc_period": "5s",
+    "compaction_period": "5s",
+    # Small checkpoint distance to create many layers
+    "checkpoint_distance": 1024**2,
+    # Compact small layers
+    "compaction_target_size": 1024**2,
+    "image_creation_threshold": 1,
+    "image_creation_preempt_threshold": 1,
+    # compact more frequently
+    "compaction_threshold": 3,
+    "compaction_upper_limit": 6,
+    "lsn_lease_length": "0s",
+}
+
 
 @skip_in_debug_build("only run with release build")
 @pytest.mark.parametrize(
@@ -36,7 +51,8 @@
     [PageserverWalReceiverProtocol.VANILLA, PageserverWalReceiverProtocol.INTERPRETED],
 )
 def test_pageserver_compaction_smoke(
-    neon_env_builder: NeonEnvBuilder, wal_receiver_protocol: PageserverWalReceiverProtocol
+    neon_env_builder: NeonEnvBuilder,
+    wal_receiver_protocol: PageserverWalReceiverProtocol,
 ):
     """
     This is a smoke test that compaction kicks in. The workload repeatedly churns
@@ -54,7 +70,8 @@ def test_pageserver_compaction_smoke(
 page_cache_size=10
 """
 
-    env = neon_env_builder.init_start(initial_tenant_conf=AGGRESSIVE_COMPACTION_TENANT_CONF)
+    conf = AGGRESSIVE_COMPACTION_TENANT_CONF.copy()
+    env = neon_env_builder.init_start(initial_tenant_conf=conf)
 
     tenant_id = env.initial_tenant
     timeline_id = env.initial_timeline
@@ -86,9 +103,9 @@ def test_pageserver_compaction_smoke(
     log.info("Checking layer access metrics ...")
 
     layer_access_metric_names = [
-        "pageserver_layers_visited_per_vectored_read_global_sum",
-        "pageserver_layers_visited_per_vectored_read_global_count",
-        "pageserver_layers_visited_per_vectored_read_global_bucket",
+        "pageserver_layers_per_read_global_sum",
+        "pageserver_layers_per_read_global_count",
+        "pageserver_layers_per_read_global_bucket",
     ]
 
     metrics = env.pageserver.http_client().get_metrics()
@@ -96,8 +113,8 @@ def test_pageserver_compaction_smoke(
         layer_access_metrics = metrics.query_all(name)
         log.info(f"Got metrics: {layer_access_metrics}")
 
-    vectored_sum = metrics.query_one("pageserver_layers_visited_per_vectored_read_global_sum")
-    vectored_count = metrics.query_one("pageserver_layers_visited_per_vectored_read_global_count")
+    vectored_sum = metrics.query_one("pageserver_layers_per_read_global_sum")
+    vectored_count = metrics.query_one("pageserver_layers_per_read_global_count")
     if vectored_count.value > 0:
         assert vectored_sum.value > 0
         vectored_average = vectored_sum.value / vectored_count.value
@@ -113,6 +130,41 @@ def test_pageserver_compaction_smoke(
     assert vectored_average < 8
 
 
+@skip_in_debug_build("only run with release build")
+def test_pageserver_compaction_preempt(
+    neon_env_builder: NeonEnvBuilder,
+):
+    # Ideally we should be able to do unit tests for this, but we need real Postgres
+    # WALs in order to do unit testing...
+
+    conf = PREEMPT_COMPACTION_TENANT_CONF.copy()
+    env = neon_env_builder.init_start(initial_tenant_conf=conf)
+
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    row_count = 200000
+    churn_rounds = 10
+
+    ps_http = env.pageserver.http_client()
+
+    workload = Workload(env, tenant_id, timeline_id)
+    workload.init(env.pageserver.id)
+
+    log.info("Writing initial data ...")
+    workload.write_rows(row_count, env.pageserver.id)
+
+    for i in range(1, churn_rounds + 1):
+        log.info(f"Running churn round {i}/{churn_rounds} ...")
+        workload.churn_rows(row_count, env.pageserver.id, upload=False)
+        workload.validate(env.pageserver.id)
+    ps_http.timeline_compact(tenant_id, timeline_id, wait_until_uploaded=True)
+    log.info("Validating at workload end ...")
+    workload.validate(env.pageserver.id)
+    # ensure image layer creation gets preempted and then resumed
+    env.pageserver.assert_log_contains("resuming image layer creation")
+
+
 @skip_in_debug_build("only run with release build")
 @pytest.mark.parametrize(
     "with_branches",
@@ -250,6 +302,9 @@ def compaction_finished():
     workload.churn_rows(row_count, env.pageserver.id)
     # compact 3 times if mode is before_restart
     n_compactions = 3 if compaction_mode == "before_restart" else 1
+    ps_http.timeline_compact(
+        tenant_id, timeline_id, force_l0_compaction=True, wait_until_uploaded=True
+    )
     for _ in range(n_compactions):
         # Force refresh gc info to have gc_cutoff generated
         ps_http.timeline_gc(tenant_id, timeline_id, None)
diff --git a/test_runner/regress/test_compute_catalog.py b/test_runner/regress/test_compute_catalog.py
index f0878b2631d1..50a922a616b0 100644
--- a/test_runner/regress/test_compute_catalog.py
+++ b/test_runner/regress/test_compute_catalog.py
@@ -183,6 +183,7 @@ def test_dropdb_with_subscription(neon_simple_env: NeonEnv):
         cursor.execute("select pg_catalog.pg_create_logical_replication_slot('mysub', 'pgoutput');")
         cursor.execute("CREATE TABLE t(a int)")
         cursor.execute("INSERT INTO t VALUES (1)")
+        cursor.execute("CHECKPOINT")
 
     # connect to the subscriber_db and create a subscription
     # Note that we need to create subscription with
@@ -195,7 +196,11 @@ def test_dropdb_with_subscription(neon_simple_env: NeonEnv):
 
     # wait for the subscription to be active
     logical_replication_sync(
-        endpoint, endpoint, sub_dbname="subscriber_db", pub_dbname="publisher_db"
+        endpoint,
+        endpoint,
+        "mysub",
+        sub_dbname="subscriber_db",
+        pub_dbname="publisher_db",
     )
 
     # Check that replication is working
diff --git a/test_runner/regress/test_download_extensions.py b/test_runner/regress/test_download_extensions.py
index d7e6e9de5642..7f12c140731c 100644
--- a/test_runner/regress/test_download_extensions.py
+++ b/test_runner/regress/test_download_extensions.py
@@ -95,6 +95,8 @@ def endpoint_handler_build_tag(request: Request) -> Response:
 
     # mock remote_extensions spec
     spec: dict[str, Any] = {
+        "public_extensions": ["anon"],
+        "custom_extensions": None,
         "library_index": {
             "anon": "anon",
         },
diff --git a/test_runner/regress/test_import_pgdata.py b/test_runner/regress/test_import_pgdata.py
index 182f715b0ef4..6b35f3c6bb37 100644
--- a/test_runner/regress/test_import_pgdata.py
+++ b/test_runner/regress/test_import_pgdata.py
@@ -59,6 +59,9 @@ def handler(request: Request) -> Response:
     neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
     env = neon_env_builder.init_start()
 
+    # The test needs LocalFs support, which is only built in testing mode.
+    env.pageserver.is_testing_enabled_or_skip()
+
     env.pageserver.patch_config_toml_nonrecursive(
         {
             "import_pgdata_upcall_api": f"http://{cplane_mgmt_api_server.host}:{cplane_mgmt_api_server.port}/path/to/mgmt/api"
@@ -67,6 +70,12 @@ def handler(request: Request) -> Response:
     env.pageserver.stop()
     env.pageserver.start()
 
+    # By default our tests run with a tiny shared_buffers=1MB setting. That
+    # doesn't allow any prefetching on v17 and above, where the new streaming
+    # read machinery keeps buffers pinned while prefetching them.  Use a higher
+    # setting to enable prefetching and speed up the tests
+    ep_config = ["shared_buffers=64MB"]
+
     #
     # Put data in vanilla pg
     #
@@ -243,7 +252,11 @@ def validate_vanilla_equivalence(ep):
     #
 
     ro_endpoint = env.endpoints.create_start(
-        branch_name=import_branch_name, endpoint_id="ro", tenant_id=tenant_id, lsn=last_record_lsn
+        branch_name=import_branch_name,
+        endpoint_id="ro",
+        tenant_id=tenant_id,
+        lsn=last_record_lsn,
+        config_lines=ep_config,
     )
 
     validate_vanilla_equivalence(ro_endpoint)
@@ -273,7 +286,10 @@ def validate_vanilla_equivalence(ep):
     # validate that we can write
     #
     rw_endpoint = env.endpoints.create_start(
-        branch_name=import_branch_name, endpoint_id="rw", tenant_id=tenant_id
+        branch_name=import_branch_name,
+        endpoint_id="rw",
+        tenant_id=tenant_id,
+        config_lines=ep_config,
     )
     rw_endpoint.safe_psql("create table othertable(values text)")
     rw_lsn = Lsn(rw_endpoint.safe_psql_scalar("select pg_current_wal_flush_lsn()"))
@@ -293,7 +309,7 @@ def validate_vanilla_equivalence(ep):
         ancestor_start_lsn=rw_lsn,
     )
     br_tip_endpoint = env.endpoints.create_start(
-        branch_name="br-tip", endpoint_id="br-tip-ro", tenant_id=tenant_id
+        branch_name="br-tip", endpoint_id="br-tip-ro", tenant_id=tenant_id, config_lines=ep_config
     )
     validate_vanilla_equivalence(br_tip_endpoint)
     br_tip_endpoint.safe_psql("select * from othertable")
@@ -306,7 +322,10 @@ def validate_vanilla_equivalence(ep):
         ancestor_start_lsn=initdb_lsn,
     )
     br_initdb_endpoint = env.endpoints.create_start(
-        branch_name="br-initdb", endpoint_id="br-initdb-ro", tenant_id=tenant_id
+        branch_name="br-initdb",
+        endpoint_id="br-initdb-ro",
+        tenant_id=tenant_id,
+        config_lines=ep_config,
     )
     validate_vanilla_equivalence(br_initdb_endpoint)
     with pytest.raises(psycopg2.errors.UndefinedTable):
diff --git a/test_runner/regress/test_layer_bloating.py b/test_runner/regress/test_layer_bloating.py
index d9043fef7fb3..0260704ebf47 100644
--- a/test_runner/regress/test_layer_bloating.py
+++ b/test_runner/regress/test_layer_bloating.py
@@ -63,7 +63,7 @@ def test_layer_bloating(neon_env_builder: NeonEnvBuilder, vanilla_pg):
     cur.execute("set statement_timeout=0")
     cur.execute("select create_snapshots(10000)")
     # Wait logical replication to sync
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
     wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, timeline)
     env.pageserver.http_client().timeline_checkpoint(env.initial_tenant, timeline, compact=False)
 
diff --git a/test_runner/regress/test_logical_replication.py b/test_runner/regress/test_logical_replication.py
index 89087631092d..3a92f0d1d1f1 100644
--- a/test_runner/regress/test_logical_replication.py
+++ b/test_runner/regress/test_logical_replication.py
@@ -55,13 +55,13 @@ def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg: VanillaPostgr
     vanilla_pg.safe_psql(f"create subscription sub1 connection '{connstr}' publication pub1")
 
     # Wait logical replication channel to be established
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
 
     # insert some data
     cur.execute("insert into t values (generate_series(1,1000), 0)")
 
     # Wait logical replication to sync
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
     assert vanilla_pg.safe_psql("select count(*) from t")[0][0] == 1000
 
     # now stop subscriber...
@@ -78,7 +78,7 @@ def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg: VanillaPostgr
     vanilla_pg.start()
 
     # Wait logical replication to sync
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
 
     # Check that subscribers receives all data
     assert vanilla_pg.safe_psql("select count(*) from t")[0][0] == 2000
@@ -148,7 +148,7 @@ def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg: VanillaPostgr
     endpoint.start()
 
     vanilla_pg.start()
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
     eq_q = "select testcolumn1, testcolumn2, testcolumn3 from replication_example order by 1, 2, 3"
     assert vanilla_pg.safe_psql(eq_q) == endpoint.safe_psql(eq_q)
     log.info("rewriteheap synced")
@@ -285,7 +285,7 @@ def test_lr_with_slow_safekeeper(neon_env_builder: NeonEnvBuilder, vanilla_pg: V
     vanilla_pg.safe_psql("create table t(a int)")
     connstr = endpoint.connstr().replace("'", "''")
     vanilla_pg.safe_psql(f"create subscription sub1 connection '{connstr}' publication pub")
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
 
     vanilla_pg.stop()
 
@@ -321,13 +321,13 @@ def test_lr_with_slow_safekeeper(neon_env_builder: NeonEnvBuilder, vanilla_pg: V
         sk_http = sk.http_client()
         sk_http.configure_failpoints([("sk-pause-send", "off")])
 
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
     assert [r[0] for r in vanilla_pg.safe_psql("select * from t")] == [1, 2]
 
     # Check that local reads also work
     with endpoint.connect().cursor() as cur:
         cur.execute("insert into t values (3)")
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
     assert [r[0] for r in vanilla_pg.safe_psql("select * from t")] == [1, 2, 3]
 
     log_path = vanilla_pg.pgdatadir / "pg.log"
@@ -365,7 +365,7 @@ def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg: VanillaPostgres)
     log.info(f"ep connstr is {endpoint.connstr()}, subscriber connstr {vanilla_pg.connstr()}")
     connstr = endpoint.connstr().replace("'", "''")
     vanilla_pg.safe_psql(f"create subscription sub1 connection '{connstr}' publication pub1")
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
     vanilla_pg.stop()
 
     wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
@@ -375,7 +375,7 @@ def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg: VanillaPostgres)
     # this should flush current wal page
     cur.execute("insert into replication_example values (3, 4)")
     vanilla_pg.start()
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
     assert vanilla_pg.safe_psql(
         "select sum(somedata) from replication_example"
     ) == endpoint.safe_psql("select sum(somedata) from replication_example")
@@ -409,18 +409,18 @@ def test_large_records(neon_simple_env: NeonEnv, vanilla_pg: VanillaPostgres):
     # Test simple insert, update, delete. But with very large values
     value = random_string(10_000_000)
     cur.execute(f"INSERT INTO reptbl VALUES (1, '{value}')")
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
     assert vanilla_pg.safe_psql("select id, largeval from reptbl") == [(1, value)]
 
     # Test delete, and reinsert another value
     cur.execute("DELETE FROM reptbl WHERE id = 1")
     cur.execute(f"INSERT INTO reptbl VALUES (2, '{value}')")
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
     assert vanilla_pg.safe_psql("select id, largeval from reptbl") == [(2, value)]
 
     value = random_string(10_000_000)
     cur.execute(f"UPDATE reptbl SET largeval='{value}'")
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
     assert vanilla_pg.safe_psql("select id, largeval from reptbl") == [(2, value)]
 
     endpoint.stop()
@@ -428,7 +428,7 @@ def test_large_records(neon_simple_env: NeonEnv, vanilla_pg: VanillaPostgres):
     cur = endpoint.connect().cursor()
     value = random_string(10_000_000)
     cur.execute(f"UPDATE reptbl SET largeval='{value}'")
-    logical_replication_sync(vanilla_pg, endpoint)
+    logical_replication_sync(vanilla_pg, endpoint, "sub1")
     assert vanilla_pg.safe_psql("select id, largeval from reptbl") == [(2, value)]
 
 
@@ -608,7 +608,7 @@ def test_subscriber_synchronous_commit(neon_simple_env: NeonEnv, vanilla_pg: Van
         for i in range(0, 1000):
             pcur.execute("INSERT into t values (%s, random()*100000)", (i,))
     # wait until sub receives all data
-    logical_replication_sync(sub, vanilla_pg)
+    logical_replication_sync(sub, vanilla_pg, "sub")
     # Update confirmed_flush_lsn of the slot. If subscriber ack'ed recevied data
     # as flushed we'll now lose it if subscriber restars. That's why
     # logical_replication_wait_flush_lsn_sync is expected to hang while
diff --git a/test_runner/regress/test_ondemand_download.py b/test_runner/regress/test_ondemand_download.py
index 028d1c2e49b8..c344f30f4d9c 100644
--- a/test_runner/regress/test_ondemand_download.py
+++ b/test_runner/regress/test_ondemand_download.py
@@ -27,6 +27,7 @@
 )
 from fixtures.remote_storage import RemoteStorageKind, S3Storage, s3_storage
 from fixtures.utils import query_scalar, wait_until
+from urllib3 import Retry
 
 if TYPE_CHECKING:
     from typing import Any
@@ -676,16 +677,14 @@ def test_layer_download_cancelled_by_config_location(neon_env_builder: NeonEnvBu
             "compaction_period": "0s",
         }
     )
-    client = env.pageserver.http_client()
+
+    # Disable retries, because we'll hit code paths that can give us
+    # 503 and want to see that directly
+    client = env.pageserver.http_client(retries=Retry(status=0))
+
     failpoint = "before-downloading-layer-stream-pausable"
     client.configure_failpoints((failpoint, "pause"))
 
-    env.pageserver.allowed_errors.extend(
-        [
-            ".*downloading failed, possibly for shutdown.*",
-        ]
-    )
-
     info = client.layer_map_info(env.initial_tenant, env.initial_timeline)
     assert len(info.delta_layers()) == 1
 
@@ -720,13 +719,9 @@ def test_layer_download_cancelled_by_config_location(neon_env_builder: NeonEnvBu
 
         client.configure_failpoints((failpoint, "off"))
 
-        with pytest.raises(
-            PageserverApiException, match="downloading failed, possibly for shutdown"
-        ):
+        with pytest.raises(PageserverApiException, match="Shutting down"):
             download.result()
 
-        env.pageserver.assert_log_contains(".*downloading failed, possibly for shutdown.*")
-
         detach.result()
 
         client.configure_failpoints((failpoint, "pause"))
diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py
index 1292682f9e3d..590093d23c37 100644
--- a/test_runner/regress/test_pageserver_secondary.py
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -443,7 +443,7 @@ def test_heatmap_uploads(neon_env_builder: NeonEnvBuilder):
     workload.write_rows(256, env.pageservers[0].id)
     env.pageserver.http_client().tenant_heatmap_upload(tenant_id)
 
-    def validate_heatmap(heatmap):
+    def validate_heatmap(heatmap, on_disk_heatmap):
         assert len(heatmap["timelines"]) == 1
         assert heatmap["timelines"][0]["timeline_id"] == str(timeline_id)
         assert len(heatmap["timelines"][0]["layers"]) > 0
@@ -452,10 +452,13 @@ def validate_heatmap(heatmap):
         # Each layer appears at most once
         assert len(set(layer["name"] for layer in layers)) == len(layers)
 
+        assert heatmap == on_disk_heatmap
+
     # Download and inspect the heatmap that the pageserver uploaded
     heatmap_first = env.pageserver_remote_storage.heatmap_content(tenant_id)
+    heatmap_first_on_disk = env.pageserver.heatmap_content(tenant_id)
     log.info(f"Read back heatmap: {heatmap_first}")
-    validate_heatmap(heatmap_first)
+    validate_heatmap(heatmap_first, heatmap_first_on_disk)
 
     # Do some more I/O to generate more layers
     workload.churn_rows(64, env.pageservers[0].id)
@@ -463,9 +466,10 @@ def validate_heatmap(heatmap):
 
     # Ensure that another heatmap upload includes the new layers
     heatmap_second = env.pageserver_remote_storage.heatmap_content(tenant_id)
+    heatmap_second_on_disk = env.pageserver.heatmap_content(tenant_id)
     log.info(f"Read back heatmap: {heatmap_second}")
     assert heatmap_second != heatmap_first
-    validate_heatmap(heatmap_second)
+    validate_heatmap(heatmap_second, heatmap_second_on_disk)
 
 
 def list_elegible_layers(
diff --git a/test_runner/regress/test_physical_and_logical_replicaiton.py b/test_runner/regress/test_physical_and_logical_replicaiton.py
index 3f9824ee677a..229439106b82 100644
--- a/test_runner/regress/test_physical_and_logical_replicaiton.py
+++ b/test_runner/regress/test_physical_and_logical_replicaiton.py
@@ -43,7 +43,7 @@ def test_physical_and_logical_replication_slot_not_copied(neon_simple_env: NeonE
     s_cur.execute("select count(*) from t")
     assert s_cur.fetchall()[0][0] == n_records
 
-    logical_replication_sync(vanilla_pg, primary)
+    logical_replication_sync(vanilla_pg, primary, "sub1")
     assert vanilla_pg.safe_psql("select count(*) from t")[0][0] == n_records
 
     # Check that LR slot is not copied to replica
@@ -87,7 +87,7 @@ def test_aux_not_logged_at_replica(neon_simple_env: NeonEnv, vanilla_pg):
     s_con = secondary.connect()
     s_cur = s_con.cursor()
 
-    logical_replication_sync(vanilla_pg, primary)
+    logical_replication_sync(vanilla_pg, primary, "sub1")
 
     assert vanilla_pg.safe_psql("select count(*) from t")[0][0] == n_records
     s_cur.execute("select count(*) from t")
diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index 350fe31099fb..11a4d0920210 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -373,6 +373,7 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
     but imports the generation number.
     """
 
+    neon_env_builder.num_azs = 3
     env, origin_ps, tenant_id, generation = prepare_onboarding_env(neon_env_builder)
 
     virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
@@ -409,6 +410,9 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
             "node_secondary"
         ][0]
 
+        # Check that the secondary's scheduling is stable
+        assert env.storage_controller.reconcile_all() == 0
+
     # Call into storage controller to onboard the tenant
     generation += 1
     r = virtual_ps_http.tenant_location_conf(
@@ -460,6 +464,9 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
     )
     assert len(r["shards"]) == 1
 
+    # Check that onboarding did not result in an unstable scheduling state
+    assert env.storage_controller.reconcile_all() == 0
+
     # We should see the tenant is now attached to the pageserver managed
     # by the sharding service
     origin_tenants = origin_ps.http_client().tenant_list()
diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py
index 1304d302b79c..0f4e5688a957 100644
--- a/test_runner/regress/test_storage_scrubber.py
+++ b/test_runner/regress/test_storage_scrubber.py
@@ -32,6 +32,12 @@ def test_scrubber_tenant_snapshot(neon_env_builder: NeonEnvBuilder, shard_count:
     neon_env_builder.num_pageservers = shard_count if shard_count is not None else 1
 
     env = neon_env_builder.init_start()
+    # We restart pageserver(s), which will cause storage storage controller
+    # requests to fail and warn.
+    env.storage_controller.allowed_errors.append(".*management API still failed.*")
+    env.storage_controller.allowed_errors.append(
+        ".*Reconcile error.*error sending request for url.*"
+    )
     tenant_id = env.initial_tenant
     timeline_id = env.initial_timeline
     branch = "main"
@@ -65,6 +71,10 @@ def test_scrubber_tenant_snapshot(neon_env_builder: NeonEnvBuilder, shard_count:
     else:
         tenant_shard_ids = [TenantShardId(tenant_id, 0, 0)]
 
+    # Let shards finish rescheduling to other pageservers: this makes the rest of the test more stable
+    # is it won't overlap with migrations
+    env.storage_controller.reconcile_until_idle(max_interval=0.1, timeout_secs=120)
+
     output_path = neon_env_builder.test_output_dir / "snapshot"
     os.makedirs(output_path)
 
diff --git a/test_runner/regress/test_subscriber_branching.py b/test_runner/regress/test_subscriber_branching.py
index 645572da8e79..849d4f024d65 100644
--- a/test_runner/regress/test_subscriber_branching.py
+++ b/test_runner/regress/test_subscriber_branching.py
@@ -3,7 +3,7 @@
 import time
 
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnv, logical_replication_sync
+from fixtures.neon_fixtures import NeonEnv
 from fixtures.utils import query_scalar, wait_until
 
 
@@ -208,7 +208,6 @@ def insert_data(pub, start):
         # wake the sub and ensure that it catches up with the new data
         sub.start(create_test_user=True)
         with sub.cursor(dbname="neondb", user="test", password="testpwd") as scur:
-            logical_replication_sync(sub, pub)
             wait_until(check_that_changes_propagated)
             scur.execute("SELECT count(*) FROM t")
             res = scur.fetchall()
diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py
index bec827058270..306e9716578e 100644
--- a/test_runner/regress/test_timeline_archive.py
+++ b/test_runner/regress/test_timeline_archive.py
@@ -582,12 +582,12 @@ def worker():
                 # This is expected: we are injecting chaos, API calls will sometimes fail.
                 # TODO: can we narrow this to assert we are getting friendly 503s?
                 log.info(f"Iteration error, will retry: {e}")
-                shutdown.wait(random.random())
+                shutdown.wait(random.random() * 0.5)
             except requests.exceptions.RetryError as e:
                 # Retryable error repeated more times than `requests` is configured to tolerate, this
                 # is expected when a pageserver remains unavailable for a couple seconds
                 log.info(f"Iteration error, will retry: {e}")
-                shutdown.wait(random.random())
+                shutdown.wait(random.random() * 0.5)
             except Exception as e:
                 log.warning(
                     f"Unexpected worker exception (current timeline {state.timeline_id}): {e}"
@@ -632,7 +632,7 @@ def worker():
 
                 # Make sure we're up for as long as we spent restarting, to ensure operations can make progress
                 log.info(f"Staying alive for {restart_duration}s")
-                time.sleep(restart_duration)
+                time.sleep(restart_duration * 2)
             else:
                 # Migrate our tenant between pageservers
                 origin_ps = env.get_tenant_pageserver(tenant_shard_id)
@@ -651,7 +651,7 @@ def worker():
 
     # Sanity check that during our run we did exercise some full timeline lifecycles, in case
     # one of our workers got stuck
-    assert len(timelines_deleted) > 10
+    assert len(timelines_deleted) > 5
 
     # That no invariant-violations were reported by workers
     assert violations == []
diff --git a/vendor/postgres-v16 b/vendor/postgres-v16
index 3cf7ce1afab7..86d9ea96ebb9 160000
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
@@ -1 +1 @@
-Subproject commit 3cf7ce1afab75027716d14223f95ddb300754162
+Subproject commit 86d9ea96ebb9088eac62f57f1f5ace68e70e0d1c
diff --git a/vendor/postgres-v17 b/vendor/postgres-v17
index b654fa88b6fd..8dfd5a7030d3 160000
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
@@ -1 +1 @@
-Subproject commit b654fa88b6fd2ad24a03a14a7cd417ec66e518f9
+Subproject commit 8dfd5a7030d3e8a98b60265ebe045788892ac7f3
diff --git a/vendor/revisions.json b/vendor/revisions.json
index 982f53769238..efddaef46a4f 100644
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,11 +1,11 @@
 {
   "v17": [
     "17.2",
-    "b654fa88b6fd2ad24a03a14a7cd417ec66e518f9"
+    "8dfd5a7030d3e8a98b60265ebe045788892ac7f3"
   ],
   "v16": [
     "16.6",
-    "3cf7ce1afab75027716d14223f95ddb300754162"
+    "86d9ea96ebb9088eac62f57f1f5ace68e70e0d1c"
   ],
   "v15": [
     "15.10",
diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml
index a3dffa8f195a..2c65401154a8 100644
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -92,6 +92,7 @@ tonic = { version = "0.12", default-features = false, features = ["codegen", "pr
 tower = { version = "0.4", default-features = false, features = ["balance", "buffer", "limit", "util"] }
 tracing = { version = "0.1", features = ["log"] }
 tracing-core = { version = "0.1" }
+tracing-log = { version = "0.2" }
 url = { version = "2", features = ["serde"] }
 zerocopy = { version = "0.7", features = ["derive", "simd"] }
 zeroize = { version = "1", features = ["derive", "serde"] }