Skip to content

Commit 6c857c5

Browse files
philiptaronSomeoneSerge
authored andcommitted
flake.nix : rewrite (ggml-org#4605)
* flake.lock: update to hotfix CUDA::cuda_driver Required to support ggml-org#4606 * flake.nix: rewrite 1. Split into separate files per output. 2. Added overlays, so that this flake can be integrated into others. The names in the overlay are `llama-cpp`, `llama-cpp-opencl`, `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs). 3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/) rather than `with pkgs;` so that there's dependency injection rather than dependency lookup. 4. Add a description and meta information for each package. The description includes a bit about what's trying to accelerate each one. 5. Use specific CUDA packages instead of cudatoolkit on the advice of SomeoneSerge. 6. Format with `serokell/nixfmt` for a consistent style. 7. Update `flake.lock` with the latest goods. * flake.nix: use finalPackage instead of passing it manually * nix: unclutter darwin support * nix: pass most darwin frameworks unconditionally ...for simplicity * *.nix: nixfmt nix shell github:piegamesde/nixfmt/rfc101-style --command \ nixfmt flake.nix .devops/nix/*.nix * flake.nix: add maintainers * nix: move meta down to follow Nixpkgs style more closely * nix: add missing meta attributes nix: clarify the interpretation of meta.maintainers nix: clarify the meaning of "broken" and "badPlatforms" nix: passthru: expose the use* flags for inspection E.g.: ``` ❯ nix eval .#cuda.useCuda true ``` * flake.nix: avoid re-evaluating nixpkgs too many times * flake.nix: use flake-parts * nix: migrate to pname+version * flake.nix: overlay: expose both the namespace and the default attribute * ci: add the (Nix) flakestry workflow * nix: cmakeFlags: explicit OFF bools * nix: cuda: reduce runtime closure * nix: fewer rebuilds * nix: respect config.cudaCapabilities * nix: add the impure driver's location to the DT_RUNPATHs * nix: clean sources more thoroughly ...this way outPaths change less frequently, and so there are fewer rebuilds * nix: explicit mpi support * nix: explicit jetson support * flake.nix: darwin: only expose the default --------- Co-authored-by: Someone Serge <[email protected]>
1 parent 53e1438 commit 6c857c5

File tree

9 files changed

+524
-159
lines changed

9 files changed

+524
-159
lines changed

.devops/nix/apps.nix

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
perSystem =
3+
{ config, lib, ... }:
4+
{
5+
apps =
6+
let
7+
inherit (config.packages) default;
8+
binaries = [
9+
"llama"
10+
"llama-embedding"
11+
"llama-server"
12+
"quantize"
13+
"train-text-from-scratch"
14+
];
15+
mkApp = name: {
16+
type = "app";
17+
program = "${default}/bin/${name}";
18+
};
19+
in
20+
lib.genAttrs binaries mkApp;
21+
};
22+
}

.devops/nix/devshells.nix

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
perSystem =
3+
{ config, lib, ... }:
4+
{
5+
devShells =
6+
lib.concatMapAttrs
7+
(name: package: {
8+
${name} = package.passthru.shell;
9+
${name + "-extra"} = package.passthru.shell-extra;
10+
})
11+
config.packages;
12+
};
13+
}

.devops/nix/jetson-support.nix

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
{ inputs, ... }:
2+
{
3+
perSystem =
4+
{
5+
config,
6+
system,
7+
lib,
8+
pkgsCuda,
9+
...
10+
}:
11+
lib.optionalAttrs (system == "aarch64-linux") {
12+
packages =
13+
let
14+
caps.jetson-xavier = "7.2";
15+
caps.jetson-orin = "8.7";
16+
caps.jetson-nano = "5.3";
17+
18+
pkgsFor =
19+
cap:
20+
import inputs.nixpkgs {
21+
inherit system;
22+
config = {
23+
cudaSupport = true;
24+
cudaCapabilities = [ cap ];
25+
cudaEnableForwardCompat = false;
26+
inherit (pkgsCuda.config) allowUnfreePredicate;
27+
};
28+
};
29+
in
30+
builtins.mapAttrs (name: cap: ((pkgsFor cap).callPackage ./scope.nix { }).llama-cpp) caps;
31+
};
32+
}

.devops/nix/nixpkgs-instances.nix

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{ inputs, ... }:
2+
{
3+
# The _module.args definitions are passed on to modules as arguments. E.g.
4+
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
5+
# `_module.args.pkgs` (defined in this case by flake-parts).
6+
perSystem =
7+
{ system, ... }:
8+
{
9+
_module.args = {
10+
pkgsCuda = import inputs.nixpkgs {
11+
inherit system;
12+
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
13+
# and ucx are built with CUDA support)
14+
config.cudaSupport = true;
15+
config.allowUnfreePredicate =
16+
p:
17+
builtins.all
18+
(
19+
license:
20+
license.free
21+
|| builtins.elem license.shortName [
22+
"CUDA EULA"
23+
"cuDNN EULA"
24+
]
25+
)
26+
(p.meta.licenses or [ p.meta.license ]);
27+
};
28+
# Ensure dependencies use ROCm consistently
29+
pkgsRocm = import inputs.nixpkgs {
30+
inherit system;
31+
config.rocmSupport = true;
32+
};
33+
};
34+
};
35+
}

.devops/nix/package.nix

+265
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
{
2+
lib,
3+
config,
4+
stdenv,
5+
mkShell,
6+
cmake,
7+
ninja,
8+
pkg-config,
9+
git,
10+
python3,
11+
mpi,
12+
openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations
13+
cudaPackages,
14+
darwin,
15+
rocmPackages,
16+
clblast,
17+
useBlas ? builtins.all (x: !x) [
18+
useCuda
19+
useMetalKit
20+
useOpenCL
21+
useRocm
22+
],
23+
useCuda ? config.cudaSupport,
24+
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
25+
useMpi ? false, # Increases the runtime closure size by ~700M
26+
useOpenCL ? false,
27+
useRocm ? config.rocmSupport,
28+
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
29+
}@inputs:
30+
31+
let
32+
inherit (lib)
33+
cmakeBool
34+
cmakeFeature
35+
optionals
36+
strings
37+
versionOlder
38+
;
39+
40+
# It's necessary to consistently use backendStdenv when building with CUDA support,
41+
# otherwise we get libstdc++ errors downstream.
42+
stdenv = throw "Use effectiveStdenv instead";
43+
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
44+
45+
suffices =
46+
lib.optionals useBlas [ "BLAS" ]
47+
++ lib.optionals useCuda [ "CUDA" ]
48+
++ lib.optionals useMetalKit [ "MetalKit" ]
49+
++ lib.optionals useMpi [ "MPI" ]
50+
++ lib.optionals useOpenCL [ "OpenCL" ]
51+
++ lib.optionals useRocm [ "ROCm" ];
52+
53+
pnameSuffix =
54+
strings.optionalString (suffices != [ ])
55+
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
56+
descriptionSuffix =
57+
strings.optionalString (suffices != [ ])
58+
", accelerated with ${strings.concatStringsSep ", " suffices}";
59+
60+
# TODO: package the Python in this repository in a Nix-like way.
61+
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
62+
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
63+
# https://peps.python.org/pep-0517/
64+
llama-python = python3.withPackages (
65+
ps: [
66+
ps.numpy
67+
ps.sentencepiece
68+
]
69+
);
70+
71+
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
72+
llama-python-extra = python3.withPackages (
73+
ps: [
74+
ps.numpy
75+
ps.sentencepiece
76+
ps.torchWithoutCuda
77+
ps.transformers
78+
]
79+
);
80+
81+
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
82+
# separately
83+
darwinBuildInputs =
84+
with darwin.apple_sdk.frameworks;
85+
[
86+
Accelerate
87+
CoreVideo
88+
CoreGraphics
89+
]
90+
++ optionals useMetalKit [ MetalKit ];
91+
92+
cudaBuildInputs = with cudaPackages; [
93+
cuda_cccl.dev # <nv/target>
94+
95+
# A temporary hack for reducing the closure size, remove once cudaPackages
96+
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
97+
cuda_cudart.dev
98+
cuda_cudart.lib
99+
cuda_cudart.static
100+
libcublas.dev
101+
libcublas.lib
102+
libcublas.static
103+
];
104+
105+
rocmBuildInputs = with rocmPackages; [
106+
clr
107+
hipblas
108+
rocblas
109+
];
110+
in
111+
112+
effectiveStdenv.mkDerivation (
113+
finalAttrs: {
114+
pname = "llama-cpp${pnameSuffix}";
115+
version = llamaVersion;
116+
117+
src = lib.cleanSourceWith {
118+
filter =
119+
name: type:
120+
!(builtins.any (_: _) [
121+
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
122+
(name == "README.md") # Ignore *.md changes whe computing outPaths
123+
(lib.hasPrefix "." name) # Skip hidden files and directories
124+
]);
125+
src = lib.cleanSource ../../.;
126+
};
127+
128+
postPatch = ''
129+
substituteInPlace ./ggml-metal.m \
130+
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
131+
132+
# TODO: Package up each Python script or service appropriately.
133+
# If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
134+
# we could make those *.py into setuptools' entrypoints
135+
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
136+
'';
137+
138+
nativeBuildInputs =
139+
[
140+
cmake
141+
ninja
142+
pkg-config
143+
git
144+
]
145+
++ optionals useCuda [
146+
cudaPackages.cuda_nvcc
147+
148+
# TODO: Replace with autoAddDriverRunpath
149+
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
150+
cudaPackages.autoAddOpenGLRunpathHook
151+
];
152+
153+
buildInputs =
154+
optionals effectiveStdenv.isDarwin darwinBuildInputs
155+
++ optionals useCuda cudaBuildInputs
156+
++ optionals useMpi [ mpi ]
157+
++ optionals useOpenCL [ clblast ]
158+
++ optionals useRocm rocmBuildInputs;
159+
160+
cmakeFlags =
161+
[
162+
(cmakeBool "LLAMA_NATIVE" true)
163+
(cmakeBool "LLAMA_BUILD_SERVER" true)
164+
(cmakeBool "BUILD_SHARED_LIBS" true)
165+
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
166+
(cmakeBool "LLAMA_BLAS" useBlas)
167+
(cmakeBool "LLAMA_CLBLAST" useOpenCL)
168+
(cmakeBool "LLAMA_CUBLAS" useCuda)
169+
(cmakeBool "LLAMA_HIPBLAS" useRocm)
170+
(cmakeBool "LLAMA_METAL" useMetalKit)
171+
(cmakeBool "LLAMA_MPI" useMpi)
172+
]
173+
++ optionals useCuda [
174+
(
175+
with cudaPackages.flags;
176+
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
177+
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
178+
)
179+
)
180+
]
181+
++ optionals useRocm [
182+
(cmakeFeature "CMAKE_C_COMPILER" "hipcc")
183+
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
184+
185+
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
186+
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
187+
# and select the line that matches the current nixpkgs version of rocBLAS.
188+
# Should likely use `rocmPackages.clr.gpuTargets`.
189+
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
190+
]
191+
++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
192+
++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
193+
194+
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
195+
# if they haven't been added yet.
196+
postInstall = ''
197+
mv $out/bin/main $out/bin/llama
198+
mv $out/bin/server $out/bin/llama-server
199+
mkdir -p $out/include
200+
cp $src/llama.h $out/include/
201+
'';
202+
203+
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
204+
passthru = {
205+
inherit
206+
useBlas
207+
useCuda
208+
useMetalKit
209+
useMpi
210+
useOpenCL
211+
useRocm
212+
;
213+
214+
shell = mkShell {
215+
name = "shell-${finalAttrs.finalPackage.name}";
216+
description = "contains numpy and sentencepiece";
217+
buildInputs = [ llama-python ];
218+
inputsFrom = [ finalAttrs.finalPackage ];
219+
};
220+
221+
shell-extra = mkShell {
222+
name = "shell-extra-${finalAttrs.finalPackage.name}";
223+
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
224+
buildInputs = [ llama-python-extra ];
225+
inputsFrom = [ finalAttrs.finalPackage ];
226+
};
227+
};
228+
229+
meta = {
230+
# Configurations we don't want even the CI to evaluate. Results in the
231+
# "unsupported platform" messages. This is mostly a no-op, because
232+
# cudaPackages would've refused to evaluate anyway.
233+
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
234+
235+
# Configurations that are known to result in build failures. Can be
236+
# overridden by importing Nixpkgs with `allowBroken = true`.
237+
broken = (useMetalKit && !effectiveStdenv.isDarwin);
238+
239+
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
240+
homepage = "https://github.com/ggerganov/llama.cpp/";
241+
license = lib.licenses.mit;
242+
243+
# Accommodates `nix run` and `lib.getExe`
244+
mainProgram = "llama";
245+
246+
# These people might respond, on the best effort basis, if you ping them
247+
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
248+
# Consider adding yourself to this list if you want to ensure this flake
249+
# stays maintained and you're willing to invest your time. Do not add
250+
# other people without their consent. Consider removing people after
251+
# they've been unreachable for long periods of time.
252+
253+
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
254+
# an attrset following the same format as in
255+
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
256+
maintainers = with lib.maintainers; [
257+
philiptaron
258+
SomeoneSerge
259+
];
260+
261+
# Extend `badPlatforms` instead
262+
platforms = lib.platforms.all;
263+
};
264+
}
265+
)

.devops/nix/scope.nix

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
lib,
3+
newScope,
4+
llamaVersion ? "0.0.0",
5+
}:
6+
7+
lib.makeScope newScope (
8+
self: {
9+
inherit llamaVersion;
10+
llama-cpp = self.callPackage ./package.nix { };
11+
}
12+
)

0 commit comments

Comments
 (0)