Skip to content

Commit f24f85e

Browse files
committed
Build quantize for Linux
Signed-off-by: Christian Heimes <[email protected]>
1 parent c27cdd6 commit f24f85e

File tree

10 files changed

+95
-20
lines changed

10 files changed

+95
-20
lines changed

.github/workflows/build.yaml

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,70 @@ jobs:
3030
- name: make build/quantize from llama.cpp sources
3131
env:
3232
CMAKE_ARGS: "-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON"
33-
run: make build/quantize
33+
run: make quantize
3434

3535
- name: file info
36-
run: file build/quantize
36+
run: file build/quantize*
3737

3838
- uses: actions/upload-artifact@v4
3939
with:
40-
name: "quantize-macos"
41-
path: build/quantize
40+
name: "quantize-macos-arm64"
41+
path: build/quantize*
42+
43+
linux-build:
44+
name: "Build quantize on Linux for ${{ matrix.arch }}"
45+
runs-on: "ubuntu-latest"
46+
strategy:
47+
fail-fast: true
48+
matrix:
49+
arch:
50+
- "amd64"
51+
- "arm64"
52+
# - "ppc64le"
53+
# - "s390x"
54+
image:
55+
- registry.access.redhat.com/ubi9/python-312
56+
steps:
57+
- uses: "actions/checkout@v4"
58+
with:
59+
submodules: true
60+
61+
- name: Set up QEMU
62+
uses: docker/setup-qemu-action@v3
63+
64+
- name: Set up Docker Buildx
65+
uses: docker/setup-buildx-action@v3
66+
67+
- name: Pull ${{ matrix.image }} for linux/${{ matrix.arch }}
68+
run: |
69+
docker pull --platform linux/${{ matrix.arch }} ${{ matrix.image }}
70+
71+
- name: make build/quantize from llama.cpp sources
72+
run: |
73+
set -e
74+
docker run --platform linux/${{ matrix.arch }} ${{ matrix.image }} uname -a
75+
docker run --platform linux/${{ matrix.arch }} \
76+
-v .:/opt/app-root/src \
77+
-e CMAKE_ARGS="-DLLAMA_FATAL_WARNINGS=ON" \
78+
${{ matrix.image }} \
79+
make quantize
80+
81+
- name: file info
82+
run: file build/quantize*
83+
84+
- uses: actions/upload-artifact@v4
85+
with:
86+
name: "quantize-linux-${{ matrix.arch }}"
87+
path: build/quantize*
88+
89+
merge-artifacts:
90+
name: Merge artifacts
91+
runs-on: ubuntu-latest
92+
needs:
93+
- macos-build
94+
- linux-build
95+
steps:
96+
- name: Merge artifacts
97+
uses: actions/upload-artifact/merge@v4
98+
with:
99+
name: quantize

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
steps:
3131
- uses: "actions/checkout@v4"
3232
with:
33-
submodules: false
33+
submodules: true
3434

3535
- uses: "actions/setup-python@v5"
3636
with:

Makefile

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
CMAKE_ARGS ?=
4-
QUANTIZE = build/quantize
5-
LLAMA_BUILDDIR = build/llama.cpp
4+
5+
UNAME_MACHINE = $(shell uname -m | tr A-Z a-z)
6+
UNAME_OS = $(shell uname -s | tr A-Z a-z)
7+
QUANTIZE = build/quantize-$(UNAME_MACHINE)-$(UNAME_OS)
8+
LLAMA_BUILDDIR = build/llama.cpp-$(UNAME_MACHINE)-$(UNAME_OS)
69
LLAMA_DIR = llama.cpp
710

811

@@ -27,7 +30,10 @@ $(LLAMA_BUILDDIR)/Makefile: $(LLAMA_DIR)/CMakeLists.txt
2730
CMAKE_ARGS="$(CMAKE_ARGS)" cmake -S $(dir $<) -B $(dir $@)
2831

2932
$(LLAMA_BUILDDIR)/bin/quantize: $(LLAMA_BUILDDIR)/Makefile
30-
cmake --build $(dir $<) --config Release --target quantize
33+
cmake --build $(dir $<) --parallel 2 --config Release --target quantize
34+
35+
.PHONY: quantize
36+
quantize: $(QUANTIZE)
3137

3238
$(QUANTIZE): $(LLAMA_BUILDDIR)/bin/quantize
3339
cp -a $< $@

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ classifiers = [
1818
"Environment :: Console",
1919
"License :: OSI Approved :: Apache Software License",
2020
"License :: OSI Approved :: MIT License",
21+
"Operating System :: POSIX :: Linux",
2122
"Operating System :: MacOS :: MacOS X",
2223
"Topic :: Scientific/Engineering :: Artificial Intelligence",
2324
"Programming Language :: Python :: 3",
@@ -41,6 +42,9 @@ issues = "https://github.com/tiran/instructlab-quantize/issues"
4142
[tool.setuptools]
4243
package-dir = {"" = "src"}
4344

45+
[tool.setuptools.package-data]
46+
"instructlab_quantize" = ["quantize-*"]
47+
4448
[tool.check-wheel-contents]
4549

4650
[tool.ruff]

src/instructlab_quantize/__init__.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,27 @@
22
"""Run quantize binary on macOS"""
33

44
import os
5+
import platform
56
import subprocess
7+
import sys
68
from importlib import resources
79

8-
__all__ = (
9-
"QUANTIZE",
10-
"run_quantize",
11-
)
12-
13-
QUANTIZE = resources.files("instructlab_quantize").joinpath("quantize")
10+
__all__ = ("run_quantize",)
1411

1512

1613
def run_quantize(*quantizeargs, **kwargs):
1714
"""Run quantize with subprocess.check_output
1815
1916
stdout = quantize("extra", "arguments")
2017
"""
21-
with resources.as_file(QUANTIZE) as quantize:
18+
machine = platform.machine().lower()
19+
quantize_bin = f"quantize-{machine}-{sys.platform}"
20+
21+
files = resources.files("instructlab_quantize")
22+
23+
with resources.as_file(files.joinpath(quantize_bin)) as quantize:
24+
if not quantize.exists():
25+
raise FileNotFoundError(quantize)
2226
args = [os.fspath(quantize)]
2327
args.extend(quantizeargs)
2428
return subprocess.check_output(args, **kwargs)

src/instructlab_quantize/__main__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
import sys
3+
4+
from . import run_quantize
5+
6+
print(run_quantize(*sys.argv[1:]))
1.43 MB
Binary file not shown.
1.65 MB
Binary file not shown.

tests.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ def m_check_output():
2020

2121

2222
def test_mock_run_quantize(m_check_output: mock.Mock):
23-
quantize = os.fspath(PKG_DIR.joinpath("quantize"))
23+
machine = platform.machine().lower()
24+
quantize = os.fspath(PKG_DIR.joinpath(f"quantize-{machine}-{sys.platform}"))
2425
instructlab_quantize.run_quantize("egg", "spam")
2526
m_check_output.assert_called_with([quantize, "egg", "spam"])
2627
m_check_output.reset_mock()
@@ -29,10 +30,6 @@ def test_mock_run_quantize(m_check_output: mock.Mock):
2930
m_check_output.assert_called_with([quantize, "--help"], stderr=subprocess.STDOUT)
3031

3132

32-
@pytest.mark.skipif(
33-
sys.platform != "darwin" and platform.machine() != "arm64",
34-
reason="binary is Apple M1-only",
35-
)
3633
def test_run_quantize(tmp_path: pathlib.Path):
3734
with pytest.raises(subprocess.CalledProcessError) as exc_info:
3835
instructlab_quantize.run_quantize("--help", stderr=subprocess.STDOUT, text=True)

0 commit comments

Comments
 (0)