Skip to content

Commit bb69b8f

Browse files
committed
ci : update build-self-hosted.yml
1 parent 5d246a7 commit bb69b8f

4 files changed

Lines changed: 167 additions & 100 deletions

File tree

.github/workflows/build-self-hosted.yml

Lines changed: 76 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ env:
5757
jobs:
5858
determine-tag:
5959
name: Determine tag name
60-
runs-on: ubuntu-slim
60+
runs-on: [self-hosted, fast]
6161
outputs:
6262
tag_name: ${{ steps.tag.outputs.name }}
6363
steps:
@@ -86,7 +86,7 @@ jobs:
8686
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
8787
run: |
8888
nvidia-smi
89-
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
89+
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
9090
9191
ggml-ci-nvidia-vulkan-cm:
9292
needs: determine-tag
@@ -103,7 +103,7 @@ jobs:
103103
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
104104
run: |
105105
vulkaninfo --summary
106-
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
106+
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
107107
108108
ggml-ci-nvidia-vulkan-cm2:
109109
needs: determine-tag
@@ -120,10 +120,11 @@ jobs:
120120
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
121121
run: |
122122
vulkaninfo --summary
123-
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
123+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
124124
125125
ggml-ci-nvidia-webgpu:
126-
runs-on: [self-hosted, Linux, NVIDIA]
126+
needs: determine-tag
127+
runs-on: [self-hosted, Linux, NVIDIA, X64]
127128

128129
steps:
129130
- name: Clone
@@ -149,10 +150,11 @@ jobs:
149150
GG_BUILD_WEBGPU=1 \
150151
GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
151152
GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \
152-
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
153+
bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
153154
154155
# TODO: provision AMX-compatible machine
155156
#ggml-ci-cpu-amx:
157+
# needs: determine-tag
156158
# runs-on: [self-hosted, Linux, CPU, AMX]
157159

158160
# steps:
@@ -163,10 +165,11 @@ jobs:
163165
# - name: Test
164166
# id: ggml-ci
165167
# run: |
166-
# bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
168+
# bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
167169

168170
# TODO: provision AMD GPU machine
169171
# ggml-ci-amd-vulkan:
172+
# needs: determine-tag
170173
# runs-on: [self-hosted, Linux, AMD]
171174

172175
# steps:
@@ -178,10 +181,11 @@ jobs:
178181
# id: ggml-ci
179182
# run: |
180183
# vulkaninfo --summary
181-
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
184+
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
182185

183186
# TODO: provision AMD GPU machine
184187
# ggml-ci-amd-rocm:
188+
# needs: determine-tag
185189
# runs-on: [self-hosted, Linux, AMD]
186190

187191
# steps:
@@ -193,7 +197,7 @@ jobs:
193197
# id: ggml-ci
194198
# run: |
195199
# amd-smi static
196-
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
200+
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
197201

198202
ggml-ci-mac-metal:
199203
needs: determine-tag
@@ -337,4 +341,66 @@ jobs:
337341
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
338342
run: |
339343
source ./openvino_toolkit/setupvars.sh
340-
GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
344+
GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
345+
346+
ggml-ci-arm64-cpu-low-perf:
347+
needs: determine-tag
348+
runs-on: [self-hosted, Linux, ARM64]
349+
350+
steps:
351+
- name: Clone
352+
id: checkout
353+
uses: actions/checkout@v6
354+
355+
- name: Test
356+
id: ggml-ci
357+
run: |
358+
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
359+
360+
ggml-ci-arm64-cpu-high-perf:
361+
needs: determine-tag
362+
runs-on: [self-hosted, Linux, ARM64]
363+
364+
steps:
365+
- name: Clone
366+
id: checkout
367+
uses: actions/checkout@v6
368+
369+
- name: Test
370+
id: ggml-ci
371+
run: |
372+
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
373+
374+
# TODO: not sure how to detect ARM flags on DGX Spark. currently get this error during cmake:
375+
# CMake Warning at ggml/src/ggml-cpu/CMakeLists.txt:147 (message):
376+
# ARM -march/-mcpu not found, -mcpu=native will be used
377+
#
378+
# if we resolve this, we should be able to offload these jobs to the self-hosted runners
379+
#
380+
# ggml-ci-arm64-cpu-high-perf-sve:
381+
# needs: determine-tag
382+
# runs-on: [self-hosted, Linux, NVIDIA, ARM64]
383+
#
384+
# steps:
385+
# - name: Clone
386+
# id: checkout
387+
# uses: actions/checkout@v6
388+
#
389+
# - name: Test
390+
# id: ggml-ci
391+
# run: |
392+
# LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
393+
#
394+
# ggml-ci-arm64-cpu-kleidiai:
395+
# needs: determine-tag
396+
# runs-on: [self-hosted, Linux, NVIDIA, ARM64]
397+
#
398+
# steps:
399+
# - name: Clone
400+
# id: checkout
401+
# uses: actions/checkout@v6
402+
#
403+
# - name: Test
404+
# id: ggml-ci
405+
# run: |
406+
# GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

.github/workflows/build.yml

Lines changed: 52 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -931,31 +931,32 @@ jobs:
931931
run: |
932932
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
933933
934-
ggml-ci-arm64-cpu-low-perf:
935-
runs-on: ubuntu-22.04-arm
936-
937-
steps:
938-
- name: Clone
939-
id: checkout
940-
uses: actions/checkout@v6
941-
942-
- name: ccache
943-
uses: ggml-org/ccache-action@v1.2.21
944-
with:
945-
key: ggml-ci-arm64-cpu-low-perf
946-
evict-old-files: 1d
947-
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
948-
949-
- name: Dependencies
950-
id: depends
951-
run: |
952-
sudo apt-get update
953-
sudo apt-get install build-essential
954-
955-
- name: Test
956-
id: ggml-ci
957-
run: |
958-
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
934+
# note: moved to build-self-hosted.yml - can remove from here when everything is stable
935+
# ggml-ci-arm64-cpu-low-perf:
936+
# runs-on: ubuntu-22.04-arm
937+
#
938+
# steps:
939+
# - name: Clone
940+
# id: checkout
941+
# uses: actions/checkout@v6
942+
#
943+
# - name: ccache
944+
# uses: ggml-org/ccache-action@v1.2.21
945+
# with:
946+
# key: ggml-ci-arm64-cpu-low-perf
947+
# evict-old-files: 1d
948+
# save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
949+
#
950+
# - name: Dependencies
951+
# id: depends
952+
# run: |
953+
# sudo apt-get update
954+
# sudo apt-get install build-essential
955+
#
956+
# - name: Test
957+
# id: ggml-ci
958+
# run: |
959+
# LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
959960

960961
ggml-ci-x64-cpu-high-perf:
961962
runs-on: ubuntu-22.04
@@ -983,31 +984,32 @@ jobs:
983984
run: |
984985
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
985986
986-
ggml-ci-arm64-cpu-high-perf:
987-
runs-on: ubuntu-22.04-arm
988-
989-
steps:
990-
- name: Clone
991-
id: checkout
992-
uses: actions/checkout@v6
993-
994-
- name: ccache
995-
uses: ggml-org/ccache-action@v1.2.21
996-
with:
997-
key: ggml-ci-arm64-cpu-high-perf
998-
evict-old-files: 1d
999-
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
1000-
1001-
- name: Dependencies
1002-
id: depends
1003-
run: |
1004-
sudo apt-get update
1005-
sudo apt-get install build-essential
1006-
1007-
- name: Test
1008-
id: ggml-ci
1009-
run: |
1010-
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
987+
# note: moved to build-self-hosted.yml - can remove from here when everything is stable
988+
# ggml-ci-arm64-cpu-high-perf:
989+
# runs-on: ubuntu-22.04-arm
990+
#
991+
# steps:
992+
# - name: Clone
993+
# id: checkout
994+
# uses: actions/checkout@v6
995+
#
996+
# - name: ccache
997+
# uses: ggml-org/ccache-action@v1.2.21
998+
# with:
999+
# key: ggml-ci-arm64-cpu-high-perf
1000+
# evict-old-files: 1d
1001+
# save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
1002+
#
1003+
# - name: Dependencies
1004+
# id: depends
1005+
# run: |
1006+
# sudo apt-get update
1007+
# sudo apt-get install build-essential
1008+
#
1009+
# - name: Test
1010+
# id: ggml-ci
1011+
# run: |
1012+
# LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
10111013

10121014
ggml-ci-arm64-cpu-high-perf-sve:
10131015
runs-on: ubuntu-22.04-arm

.github/workflows/server-self-hosted.yml

Lines changed: 38 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -91,45 +91,44 @@ jobs:
9191
export ${{ matrix.extra_args }}
9292
pytest -v -x -m "not slow"
9393
94-
# TODO: provision CUDA runner
95-
# server-cuda:
96-
# runs-on: [self-hosted, llama-server, Linux, NVIDIA]
97-
#
98-
# name: server-cuda (${{ matrix.wf_name }})
99-
# strategy:
100-
# matrix:
101-
# build_type: [Release]
102-
# wf_name: ["GPUx1"]
103-
# include:
104-
# - build_type: Release
105-
# extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
106-
# wf_name: "GPUx1, backend-sampling"
107-
# fail-fast: false
108-
#
109-
# steps:
110-
# - name: Clone
111-
# id: checkout
112-
# uses: actions/checkout@v6
113-
# with:
114-
# fetch-depth: 0
115-
# ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
116-
#
117-
# - name: Build
118-
# id: cmake_build
119-
# run: |
120-
# cmake -B build -DGGML_SCHED_NO_REALLOC=ON
121-
# cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server
122-
#
123-
# - name: Tests
124-
# id: server_integration_tests
125-
# if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }}
126-
# run: |
127-
# cd tools/server/tests
128-
# python3 -m venv venv
129-
# source venv/bin/activate
130-
# pip install -r requirements.txt
131-
# export ${{ matrix.extra_args }}
132-
# pytest -v -x -m "not slow"
94+
server-cuda:
95+
runs-on: [self-hosted, llama-server, Linux, NVIDIA]
96+
97+
name: server-cuda (${{ matrix.wf_name }})
98+
strategy:
99+
matrix:
100+
build_type: [Release]
101+
wf_name: ["GPUx1"]
102+
include:
103+
- build_type: Release
104+
extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
105+
wf_name: "GPUx1, backend-sampling"
106+
fail-fast: false
107+
108+
steps:
109+
- name: Clone
110+
id: checkout
111+
uses: actions/checkout@v6
112+
with:
113+
fetch-depth: 0
114+
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
115+
116+
- name: Build
117+
id: cmake_build
118+
run: |
119+
cmake -B build -DGGML_CUDA=ON -DGGML_SCHED_NO_REALLOC=ON
120+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
121+
122+
- name: Tests
123+
id: server_integration_tests
124+
if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }}
125+
run: |
126+
cd tools/server/tests
127+
python3 -m venv venv
128+
source venv/bin/activate
129+
pip install -r requirements.txt
130+
export ${{ matrix.extra_args }}
131+
pytest -v -x -m "not slow"
133132
134133
server-kleidiai:
135134
runs-on: ah-ubuntu_22_04-c8g_8x

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ function gg_run_ctest_debug {
238238
(cmake -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
239239
(time cmake --build . --config Debug -j$(nproc)) 2>&1 | tee -a $OUT/${ci}-make.log
240240

241-
(time ctest -C Debug --output-on-failure -L main -E "test-opt|test-backend-ops" ${CTEST_EXTRA}) 2>&1 | tee -a $OUT/${ci}-ctest.log
241+
(time ctest -C Debug --output-on-failure -L main -E "test-opt|test-backend-ops|test-llama-archs" ${CTEST_EXTRA}) 2>&1 | tee -a $OUT/${ci}-ctest.log
242242

243243
set +e
244244
}

0 commit comments

Comments
 (0)