Skip to content

Commit cda1c64

Browse files
authored
Implement matrix sharding (#492)
Since we've encountered the GitHub matrix size limit of 256 entries a couple times now (recently in #388), this adds sharding to our matrix generation script. There's now a `--max-shards` option which splits the matrix into (up to) the requested number of shards. We don't split it evenly, we eagerly fill each shard first. To consume the shards, we just copy the `build` job into two identical child jobs which consume each matrix shard. Ideally, we'd avoid code duplication here by using a re-usable action or something for the build-steps — but that's overkill at this point. We only need this for Linux for the foreseeable future.
1 parent e1deaa1 commit cda1c64

File tree

2 files changed

+124
-13
lines changed

2 files changed

+124
-13
lines changed

.github/workflows/linux.yml

+88-5
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ jobs:
125125
generate-matrix:
126126
runs-on: ubuntu-latest
127127
outputs:
128-
matrix: ${{ steps.set-matrix.outputs.matrix }}
128+
matrix-0: ${{ steps.set-matrix.outputs.matrix-0 }}
129+
matrix-1: ${{ steps.set-matrix.outputs.matrix-1 }}
129130
any_builds: ${{ steps.set-matrix.outputs.any_builds }}
130131
pythonbuild_changed: ${{ steps.changed.outputs.pythonbuild_any_changed }}
131132
steps:
@@ -144,11 +145,19 @@ jobs:
144145
- name: Generate build matrix
145146
id: set-matrix
146147
run: |
147-
uv run ci-matrix.py --platform linux --labels '${{ steps.get-labels.outputs.labels }}' > matrix.json && echo "matrix=$(cat matrix.json)" >> $GITHUB_OUTPUT
148+
uv run ci-matrix.py \
149+
--platform linux \
150+
--labels '${{ steps.get-labels.outputs.labels }}' \
151+
--max-shards 2 \
152+
> matrix.json
153+
154+
echo "matrix-0=$(jq -c '.["0"]' matrix.json)" >> $GITHUB_OUTPUT
155+
echo "matrix-1=$(jq -c '.["1"]' matrix.json)" >> $GITHUB_OUTPUT
156+
148157
# Display the matrix for debugging too
149158
cat matrix.json | jq
150159
151-
if jq -e '.include | length > 0' matrix.json > /dev/null; then
160+
if jq -e '.["0"].include | length > 0' matrix.json > /dev/null; then
152161
# Build matrix has entries
153162
echo "any_builds=true" >> $GITHUB_OUTPUT
154163
else
@@ -163,7 +172,7 @@ jobs:
163172
pythonbuild:
164173
- "src/*.rs"
165174
166-
build:
175+
build-0:
167176
needs:
168177
- generate-matrix
169178
- pythonbuild
@@ -174,7 +183,81 @@ jobs:
174183
attestations: write
175184
runs-on: ${{ matrix.runner }}
176185
strategy:
177-
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
186+
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix-0) }}
187+
fail-fast: false
188+
name: ${{ matrix.target_triple }} / ${{ matrix.python }} / ${{ matrix.build_options }}
189+
steps:
190+
- uses: actions/checkout@v4
191+
with:
192+
fetch-depth: 0
193+
194+
- name: Install Python
195+
uses: actions/setup-python@v5
196+
with:
197+
python-version: '3.11'
198+
199+
- name: Download pythonbuild
200+
uses: actions/download-artifact@v4
201+
with:
202+
name: pythonbuild
203+
path: build
204+
205+
- name: Download images
206+
uses: actions/download-artifact@v4
207+
with:
208+
pattern: image-*
209+
path: build
210+
merge-multiple: true
211+
212+
- name: Load Docker Images
213+
run: |
214+
for f in build/image-*.tar.zst; do
215+
echo "decompressing $f"
216+
zstd -d --rm ${f}
217+
done
218+
219+
for f in build/image-*.tar; do
220+
echo "loading $f"
221+
docker load --input $f
222+
done
223+
224+
- name: Build
225+
if: ${{ ! matrix.dry-run }}
226+
run: |
227+
# Do empty target so all generated files are touched.
228+
./build-linux.py --make-target empty
229+
230+
# Touch mtimes of all images so they are newer than autogenerated files above.
231+
touch build/image-*
232+
233+
./build-linux.py --target-triple ${{ matrix.target_triple }} --python cpython-${{ matrix.python }} --options ${{ matrix.build_options }}
234+
235+
- name: Validate Distribution
236+
if: ${{ ! matrix.dry-run }}
237+
run: |
238+
chmod +x build/pythonbuild
239+
240+
if [ "${{ matrix.run }}" == "true" ]; then
241+
EXTRA_ARGS="--run"
242+
fi
243+
244+
build/pythonbuild validate-distribution ${EXTRA_ARGS} dist/*.tar.zst
245+
246+
- name: Upload Distribution
247+
if: ${{ ! matrix.dry-run }}
248+
uses: actions/upload-artifact@v4
249+
with:
250+
name: cpython-${{ matrix.python }}-${{ matrix.target_triple }}-${{ matrix.build_options }}
251+
path: dist/*
252+
253+
build-1:
254+
needs:
255+
- generate-matrix
256+
- pythonbuild
257+
- image
258+
runs-on: ${{ matrix.runner }}
259+
strategy:
260+
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix-1) }}
178261
fail-fast: false
179262
name: ${{ matrix.target_triple }} / ${{ matrix.python }} / ${{ matrix.build_options }}
180263
steps:

ci-matrix.py

+36-8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import argparse
1010
import json
11+
import sys
1112
from typing import Any, Optional
1213

1314
import yaml
@@ -16,6 +17,7 @@
1617
CI_TARGETS_YAML = "ci-targets.yaml"
1718
CI_RUNNERS_YAML = "ci-runners.yaml"
1819
CI_EXTRA_SKIP_LABELS = ["documentation"]
20+
CI_MATRIX_SIZE_LIMIT = 256 # The maximum size of a matrix in GitHub Actions
1921

2022

2123
def meets_conditional_version(version: str, min_version: str) -> bool:
@@ -216,6 +218,12 @@ def parse_args() -> argparse.Namespace:
216218
choices=["darwin", "linux", "windows"],
217219
help="Filter matrix entries by platform",
218220
)
221+
parser.add_argument(
222+
"--max-shards",
223+
type=int,
224+
default=0,
225+
help="The maximum number of shards allowed; set to zero to disable ",
226+
)
219227
parser.add_argument(
220228
"--labels",
221229
help="Comma-separated list of labels to filter by (e.g., 'platform:darwin,python:3.13,build:debug'), all must match.",
@@ -246,14 +254,34 @@ def main() -> None:
246254
if runner_config.get("free")
247255
}
248256

249-
matrix = {
250-
"include": generate_matrix_entries(
251-
config,
252-
runners,
253-
args.platform,
254-
labels,
255-
)
256-
}
257+
entries = generate_matrix_entries(
258+
config,
259+
runners,
260+
args.platform,
261+
labels,
262+
)
263+
264+
if args.max_shards:
265+
matrix = {}
266+
shards = (len(entries) // CI_MATRIX_SIZE_LIMIT) + 1
267+
if shards > args.max_shards:
268+
print(
269+
f"error: matrix of size {len(entries)} requires {shards} shards, but the maximum is {args.max_shards}; consider increasing `--max-shards`",
270+
file=sys.stderr,
271+
)
272+
sys.exit(1)
273+
for shard in range(args.max_shards):
274+
shard_entries = entries[
275+
shard * CI_MATRIX_SIZE_LIMIT : (shard + 1) * CI_MATRIX_SIZE_LIMIT
276+
]
277+
matrix[str(shard)] = {"include": shard_entries}
278+
else:
279+
if len(entries) > CI_MATRIX_SIZE_LIMIT:
280+
print(
281+
f"warning: matrix of size {len(entries)} exceeds limit of {CI_MATRIX_SIZE_LIMIT} but sharding is not enabled; consider setting `--max-shards`",
282+
file=sys.stderr,
283+
)
284+
matrix = {"include": entries}
257285

258286
print(json.dumps(matrix))
259287

0 commit comments

Comments
 (0)