Skip to content

Commit fa9f88c

Browse files
committed
Build quantize binary in setup.py
- remove pre-built binaries from git - create a proper sdist with sources - add `setup.py` to compile `quantize` binary for current platform - update to latest release of `llama.cpp` Signed-off-by: Christian Heimes <[email protected]>
1 parent 8ed18f8 commit fa9f88c

File tree

12 files changed

+230
-26
lines changed

12 files changed

+230
-26
lines changed

.github/workflows/pypi.yaml

+52-10
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,65 @@ on:
1414
types:
1515
- published
1616

17-
permissions:
18-
# allow gh release upload
19-
contents: write
20-
# see https://docs.pypi.org/trusted-publishers/
21-
id-token: write
22-
2317
jobs:
2418
build-package:
2519
name: Build and check packages
2620
runs-on: ubuntu-latest
21+
if: 0
2722
steps:
2823
- uses: actions/checkout@v4
2924
with:
3025
# for setuptools-scm
3126
fetch-depth: 0
27+
submodules: true
3228

3329
- uses: hynek/build-and-inspect-python-package@v2
3430

31+
build_wheels:
32+
name: Build wheels on ${{ matrix.os }}
33+
runs-on: ${{ matrix.os }}
34+
strategy:
35+
matrix:
36+
# macos-13 is an intel runner, macos-14 is apple silicon
37+
os: [ubuntu-latest, ubuntu-24.04-arm, macos-14]
38+
39+
steps:
40+
- uses: actions/checkout@v4
41+
with:
42+
# for setuptools-scm
43+
fetch-depth: 0
44+
submodules: true
45+
46+
- name: Build wheels
47+
uses: pypa/[email protected]
48+
49+
- uses: actions/upload-artifact@v4
50+
with:
51+
name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
52+
path: ./wheelhouse/*.whl
53+
54+
build_sdist:
55+
name: Build source distribution
56+
runs-on: ubuntu-latest
57+
steps:
58+
- uses: actions/checkout@v4
59+
with:
60+
# for setuptools-scm
61+
fetch-depth: 0
62+
submodules: true
63+
64+
- name: Build sdist
65+
run: pipx run build --sdist
66+
67+
- uses: actions/upload-artifact@v4
68+
with:
69+
name: cibw-sdist
70+
path: dist/*.tar.gz
71+
3572
publish-test-pypi:
3673
name: Publish packages to test.pypi.org
74+
permissions:
75+
id-token: write
3776
# environment: publish-test-pypi
3877
# TODO: move to instructlab
3978
if: |
@@ -48,8 +87,9 @@ jobs:
4887
- name: Fetch build artifacts
4988
uses: actions/download-artifact@v4
5089
with:
51-
name: Packages
90+
pattern: cibw-*
5291
path: dist
92+
merge-multiple: true
5393

5494
- name: Upload to Test PyPI
5595
uses: pypa/gh-action-pypi-publish@release/v1
@@ -60,17 +100,19 @@ jobs:
60100
name: Publish release to pypi.org
61101
# environment: publish-pypi
62102
# TODO: move to instructlab
63-
if: |
64-
github.repository_owner == 'tiran' && github.event.action == 'published'
103+
#if: |
104+
# github.repository_owner == 'tiran' && github.event.action == 'published'
105+
if: 0
65106
runs-on: ubuntu-latest
66107
needs: build-package
67108

68109
steps:
69110
- name: Fetch build artifacts
70111
uses: actions/download-artifact@v4
71112
with:
72-
name: Packages
113+
pattern: cibw-*
73114
path: dist
115+
merge-multiple: true
74116

75117
- uses: sigstore/[email protected]
76118
with:

.github/workflows/tests.yml

+6-4
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,21 @@ jobs:
2626
- "3.10"
2727
- "3.11"
2828
- "3.12"
29-
- "3.13-dev"
29+
- "3.13"
3030
steps:
3131
- uses: "actions/checkout@v4"
3232
with:
33-
submodules: true
33+
submodules: true
34+
# for setuptools-scm
35+
fetch-depth: 0
3436

3537
- uses: "actions/setup-python@v5"
3638
with:
3739
python-version: "${{ matrix.python-version }}"
3840
allow-prereleases: true
3941

4042
- name: "Update pip"
41-
run: python -m pip install --upgrade pip setuptools wheel
43+
run: python -m pip install --upgrade pip
4244

4345
- name: "Install tox dependencies"
4446
run: python -m pip install --upgrade tox tox-gh-actions
@@ -55,7 +57,7 @@ jobs:
5557
submodules: true
5658

5759
- name: "Update pip"
58-
run: python -m pip install --upgrade pip setuptools wheel
60+
run: python -m pip install --upgrade pip
5961

6062
- name: "Install tox dependencies"
6163
run: python -m pip install --upgrade tox

MANIFEST.in

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
include tox.ini tests.py .pylintrc
2+
recursive-include llama.cpp *
3+
exclude llama.cpp/.git
4+
5+
global-exclude gguf.inp gguf.out
6+
exclude llama.cpp/models/ggml-vocab-aquila.gguf
7+
exclude llama.cpp/models/ggml-vocab-baichuan.gguf
8+
exclude llama.cpp/models/ggml-vocab-bert-bge.gguf
9+
exclude llama.cpp/models/ggml-vocab-command-r.gguf
10+
exclude llama.cpp/models/ggml-vocab-deepseek-coder.gguf
11+
exclude llama.cpp/models/ggml-vocab-deepseek-llm.gguf
12+
exclude llama.cpp/models/ggml-vocab-falcon.gguf
13+
exclude llama.cpp/models/ggml-vocab-gpt2.gguf
14+
exclude llama.cpp/models/ggml-vocab-gpt-neox.gguf
15+
# used in tests.py
16+
# exclude llama.cpp/models/ggml-vocab-llama.gguf
17+
exclude llama.cpp/models/ggml-vocab-mpt.gguf
18+
exclude llama.cpp/models/ggml-vocab-llama-bpe.gguf
19+
exclude llama.cpp/models/ggml-vocab-llama-spm.gguf
20+
exclude llama.cpp/models/ggml-vocab-phi-3.gguf
21+
exclude llama.cpp/models/ggml-vocab-qwen2.gguf
22+
exclude llama.cpp/models/ggml-vocab-refact.gguf
23+
exclude llama.cpp/models/ggml-vocab-stablelm-3b-4e1t.gguf
24+
exclude llama.cpp/models/ggml-vocab-starcoder.gguf

Makefile

+17-9
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3-
CMAKE_ARGS ?=
3+
CMAKE_ARGS ?= -GNinja \
4+
-DCMAKE_BUILD_TYPE=Release \
5+
-DBUILD_SHARED_LIBS=OFF \
6+
-DGGML_NATIVE=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF \
7+
-DLLAMA_BUILD_TESTS=OFF \
8+
-DLLAMA_BUILD_SERVER=OFF
49

510
UNAME_MACHINE = $(shell uname -m | tr A-Z a-z)
611
UNAME_OS = $(shell uname -s | tr A-Z a-z)
712
QUANTIZE = build/quantize-$(UNAME_MACHINE)-$(UNAME_OS)
813
LLAMA_BUILDDIR = build/llama.cpp-$(UNAME_MACHINE)-$(UNAME_OS)
914
LLAMA_DIR = llama.cpp
1015

11-
1216
.PHONY: all
13-
all: test $(QUANTIZE)
17+
all:
1418

1519
.PHONY: test
1620
test:
@@ -23,17 +27,21 @@ fix:
2327

2428
.PHONY: clean
2529
clean:
26-
rm -rf .tox .ruff_cache dist build
30+
rm -rf dist build
31+
32+
.PHONY: realclean
33+
realclean: clean
34+
rm -rf .tox .ruff_cache .mypy_cache
2735

28-
$(LLAMA_BUILDDIR)/Makefile: $(LLAMA_DIR)/CMakeLists.txt
36+
$(LLAMA_BUILDDIR)/Makefile: $(LLAMA_DIR)/CMakeLists.txt $(MAKEFILE_LIST)
2937
@mkdir -p $(dir $@)
30-
CMAKE_ARGS="$(CMAKE_ARGS)" cmake -S $(dir $<) -B $(dir $@)
38+
cmake -S $(dir $<) -B $(dir $@) $(CMAKE_ARGS)
3139

32-
$(LLAMA_BUILDDIR)/bin/quantize: $(LLAMA_BUILDDIR)/Makefile
33-
cmake --build $(dir $<) --parallel 2 --config Release --target quantize
40+
$(LLAMA_BUILDDIR)/bin/llama-quantize: $(LLAMA_BUILDDIR)/Makefile
41+
cmake --build $(dir $<) --config Release --target llama-quantize
3442

3543
.PHONY: quantize
3644
quantize: $(QUANTIZE)
3745

38-
$(QUANTIZE): $(LLAMA_BUILDDIR)/bin/quantize
46+
$(QUANTIZE): $(LLAMA_BUILDDIR)/bin/llama-quantize
3947
cp -a $< $@

llama.cpp

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
[build-system]
4-
requires = ["setuptools>=64", "setuptools_scm>=8"]
4+
requires = ["setuptools>=64", "setuptools_scm>=8", "wheel"]
55
build-backend = "setuptools.build_meta"
66

77
[project]

setup.py

+117
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import os
2+
import platform
3+
import subprocess
4+
import sys
5+
6+
from setuptools import setup
7+
from setuptools.command.build_py import build_py
8+
from setuptools.dist import Distribution
9+
from wheel.bdist_wheel import bdist_wheel as bdist_wheel
10+
11+
CMAKE_ARGS = [
12+
"-GNinja",
13+
"-DCMAKE_BUILD_TYPE=Release",
14+
"-DBUILD_SHARED_LIBS=OFF",
15+
# build with base ISA
16+
"-DGGML_NATIVE=OFF",
17+
"-DLLAMA_NATIVE=OFF",
18+
"-DLLAMA_BUILD_TESTS=OFF",
19+
"-DLLAMA_BUILD_SERVER=OFF",
20+
]
21+
CMAKE_ARGS_X86_64 = [
22+
# force x86_64-v2 ISA
23+
"-DGGML_AVX=OFF",
24+
"-DGGML_AVX2=OFF",
25+
"-DGGML_FMA=OFF",
26+
"-DGGML_F16C=OFF",
27+
"-DLLAMA_AVX=OFF",
28+
"-DLLAMA_AVX2=OFF",
29+
"-DLLAMA_FMA=OFF",
30+
"-DLLAMA_F16C=OFF",
31+
]
32+
CMAKE_ARGS_DARWIN_AARCH64 = [
33+
# build and embed METAL on Apple M
34+
"-DGGML_METAL=ON",
35+
"-DGGML_METAL_EMBED_LIBRARY=ON",
36+
"-DLLAMA_METAL=ON",
37+
"-DLLAMA_METAL_EMBED_LIBRARY=ON",
38+
]
39+
QUANTIZE_BINARY = "llama-quantize"
40+
41+
42+
class Py3NoneBdistWheel(bdist_wheel):
43+
"""Tag wheel as py3-none-{tag}"""
44+
45+
def finalize_options(self) -> None:
46+
super().finalize_options()
47+
self.root_is_pure = False
48+
49+
def get_tag(self) -> tuple[str, str, str]:
50+
_py, _abi, plat_name = super().get_tag()
51+
return "py3", "none", plat_name
52+
53+
54+
class QuantizeBuildPy(build_py):
55+
"""Hack to build and copy quantize binary with Python files"""
56+
57+
def build_quantize(self) -> None:
58+
# Switch to scikit-build-core? I have not found an example how to
59+
# ship a program with scikit-build-core.
60+
arch = platform.uname().machine
61+
build_cmd = self.get_finalized_command("build")
62+
package_name = self.distribution.packages[0]
63+
build_temp = build_cmd.build_temp
64+
cmake_args = [
65+
"cmake",
66+
"-S",
67+
"llama.cpp",
68+
"-B",
69+
build_temp,
70+
]
71+
cmake_args.extend(CMAKE_ARGS)
72+
if sys.platform == "darwin" and arch == "aarch64":
73+
cmake_args.extend(CMAKE_ARGS_DARWIN_AARCH64)
74+
elif arch == "x86_64":
75+
cmake_args.extend(CMAKE_ARGS_X86_64)
76+
print(f"Run {' '.join(cmake_args)}")
77+
subprocess.check_call(cmake_args)
78+
79+
build_args = [
80+
"cmake",
81+
"--build",
82+
build_temp,
83+
"--config",
84+
"Release",
85+
"--target",
86+
QUANTIZE_BINARY,
87+
]
88+
print(f"Run {' '.join(build_args)}")
89+
subprocess.check_call(build_args)
90+
91+
infile = os.path.join(build_temp, "bin", QUANTIZE_BINARY)
92+
outname = f"quantize-{arch}-{sys.platform}"
93+
outfile = os.path.join(self.build_lib, package_name, outname)
94+
directory = os.path.dirname(outfile)
95+
os.makedirs(directory, exist_ok=True)
96+
self.copy_file(infile, outfile, preserve_mode=True)
97+
self.package_data[package_name] = [outname]
98+
99+
def run(self) -> None:
100+
self.build_quantize()
101+
return super().run()
102+
103+
104+
class BinaryDistribution(Distribution):
105+
"""Mark package has platlib package"""
106+
107+
def has_ext_modules(foo) -> bool:
108+
return True
109+
110+
111+
setup(
112+
distclass=BinaryDistribution,
113+
cmdclass={
114+
"bdist_wheel": Py3NoneBdistWheel,
115+
"build_py": QuantizeBuildPy,
116+
},
117+
)
-1.39 MB
Binary file not shown.
-1.58 MB
Binary file not shown.
-1.59 MB
Binary file not shown.

tests.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77
import sys
88
from unittest import mock
99

10-
import instructlab_quantize
1110
import pytest
1211

12+
import instructlab_quantize
13+
1314
PKG_DIR = pathlib.Path(instructlab_quantize.__file__).absolute().parent
1415

1516

tox.ini

+10
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,16 @@ deps =
4141
commands =
4242
ruff format {posargs:--check}
4343

44+
[testenv:fix]
45+
description = fix code with Ruff
46+
skip_install = True
47+
skipsdist = true
48+
deps =
49+
ruff
50+
commands =
51+
ruff format
52+
ruff check --fix
53+
4454
[gh-actions]
4555
python =
4656
3.9: py39

0 commit comments

Comments
 (0)