|
| 1 | +{ |
| 2 | + lib, |
| 3 | + config, |
| 4 | + stdenv, |
| 5 | + mkShell, |
| 6 | + cmake, |
| 7 | + ninja, |
| 8 | + pkg-config, |
| 9 | + git, |
| 10 | + python3, |
| 11 | + mpi, |
| 12 | + openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations |
| 13 | + cudaPackages, |
| 14 | + darwin, |
| 15 | + rocmPackages, |
| 16 | + clblast, |
| 17 | + useBlas ? builtins.all (x: !x) [ |
| 18 | + useCuda |
| 19 | + useMetalKit |
| 20 | + useOpenCL |
| 21 | + useRocm |
| 22 | + ], |
| 23 | + useCuda ? config.cudaSupport, |
| 24 | + useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, |
| 25 | + useMpi ? false, # Increases the runtime closure size by ~700M |
| 26 | + useOpenCL ? false, |
| 27 | + useRocm ? config.rocmSupport, |
| 28 | + llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake |
| 29 | +}@inputs: |
| 30 | + |
| 31 | +let |
| 32 | + inherit (lib) |
| 33 | + cmakeBool |
| 34 | + cmakeFeature |
| 35 | + optionals |
| 36 | + strings |
| 37 | + versionOlder |
| 38 | + ; |
| 39 | + |
| 40 | + # It's necessary to consistently use backendStdenv when building with CUDA support, |
| 41 | + # otherwise we get libstdc++ errors downstream. |
| 42 | + stdenv = throw "Use effectiveStdenv instead"; |
| 43 | + effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv; |
| 44 | + |
| 45 | + suffices = |
| 46 | + lib.optionals useBlas [ "BLAS" ] |
| 47 | + ++ lib.optionals useCuda [ "CUDA" ] |
| 48 | + ++ lib.optionals useMetalKit [ "MetalKit" ] |
| 49 | + ++ lib.optionals useMpi [ "MPI" ] |
| 50 | + ++ lib.optionals useOpenCL [ "OpenCL" ] |
| 51 | + ++ lib.optionals useRocm [ "ROCm" ]; |
| 52 | + |
| 53 | + pnameSuffix = |
| 54 | + strings.optionalString (suffices != [ ]) |
| 55 | + "-${strings.concatMapStringsSep "-" strings.toLower suffices}"; |
| 56 | + descriptionSuffix = |
| 57 | + strings.optionalString (suffices != [ ]) |
| 58 | + ", accelerated with ${strings.concatStringsSep ", " suffices}"; |
| 59 | + |
| 60 | + # TODO: package the Python in this repository in a Nix-like way. |
| 61 | + # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo |
| 62 | + # is PEP 517-compatible, and ensure the correct .dist-info is generated. |
| 63 | + # https://peps.python.org/pep-0517/ |
| 64 | + llama-python = python3.withPackages ( |
| 65 | + ps: [ |
| 66 | + ps.numpy |
| 67 | + ps.sentencepiece |
| 68 | + ] |
| 69 | + ); |
| 70 | + |
| 71 | + # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime |
| 72 | + llama-python-extra = python3.withPackages ( |
| 73 | + ps: [ |
| 74 | + ps.numpy |
| 75 | + ps.sentencepiece |
| 76 | + ps.torchWithoutCuda |
| 77 | + ps.transformers |
| 78 | + ] |
| 79 | + ); |
| 80 | + |
| 81 | + # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 |
| 82 | + # separately |
| 83 | + darwinBuildInputs = |
| 84 | + with darwin.apple_sdk.frameworks; |
| 85 | + [ |
| 86 | + Accelerate |
| 87 | + CoreVideo |
| 88 | + CoreGraphics |
| 89 | + ] |
| 90 | + ++ optionals useMetalKit [ MetalKit ]; |
| 91 | + |
| 92 | + cudaBuildInputs = with cudaPackages; [ |
| 93 | + cuda_cccl.dev # <nv/target> |
| 94 | + |
| 95 | + # A temporary hack for reducing the closure size, remove once cudaPackages |
| 96 | + # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 |
| 97 | + cuda_cudart.dev |
| 98 | + cuda_cudart.lib |
| 99 | + cuda_cudart.static |
| 100 | + libcublas.dev |
| 101 | + libcublas.lib |
| 102 | + libcublas.static |
| 103 | + ]; |
| 104 | + |
| 105 | + rocmBuildInputs = with rocmPackages; [ |
| 106 | + clr |
| 107 | + hipblas |
| 108 | + rocblas |
| 109 | + ]; |
| 110 | +in |
| 111 | + |
| 112 | +effectiveStdenv.mkDerivation ( |
| 113 | + finalAttrs: { |
| 114 | + pname = "llama-cpp${pnameSuffix}"; |
| 115 | + version = llamaVersion; |
| 116 | + |
| 117 | + src = lib.cleanSourceWith { |
| 118 | + filter = |
| 119 | + name: type: |
| 120 | + !(builtins.any (_: _) [ |
| 121 | + (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths |
| 122 | + (name == "README.md") # Ignore *.md changes whe computing outPaths |
| 123 | + (lib.hasPrefix "." name) # Skip hidden files and directories |
| 124 | + ]); |
| 125 | + src = lib.cleanSource ../../.; |
| 126 | + }; |
| 127 | + |
| 128 | + postPatch = '' |
| 129 | + substituteInPlace ./ggml-metal.m \ |
| 130 | + --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" |
| 131 | +
|
| 132 | + # TODO: Package up each Python script or service appropriately. |
| 133 | + # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`, |
| 134 | + # we could make those *.py into setuptools' entrypoints |
| 135 | + substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" |
| 136 | + ''; |
| 137 | + |
| 138 | + nativeBuildInputs = |
| 139 | + [ |
| 140 | + cmake |
| 141 | + ninja |
| 142 | + pkg-config |
| 143 | + git |
| 144 | + ] |
| 145 | + ++ optionals useCuda [ |
| 146 | + cudaPackages.cuda_nvcc |
| 147 | + |
| 148 | + # TODO: Replace with autoAddDriverRunpath |
| 149 | + # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged |
| 150 | + cudaPackages.autoAddOpenGLRunpathHook |
| 151 | + ]; |
| 152 | + |
| 153 | + buildInputs = |
| 154 | + optionals effectiveStdenv.isDarwin darwinBuildInputs |
| 155 | + ++ optionals useCuda cudaBuildInputs |
| 156 | + ++ optionals useMpi [ mpi ] |
| 157 | + ++ optionals useOpenCL [ clblast ] |
| 158 | + ++ optionals useRocm rocmBuildInputs; |
| 159 | + |
| 160 | + cmakeFlags = |
| 161 | + [ |
| 162 | + (cmakeBool "LLAMA_NATIVE" true) |
| 163 | + (cmakeBool "LLAMA_BUILD_SERVER" true) |
| 164 | + (cmakeBool "BUILD_SHARED_LIBS" true) |
| 165 | + (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) |
| 166 | + (cmakeBool "LLAMA_BLAS" useBlas) |
| 167 | + (cmakeBool "LLAMA_CLBLAST" useOpenCL) |
| 168 | + (cmakeBool "LLAMA_CUBLAS" useCuda) |
| 169 | + (cmakeBool "LLAMA_HIPBLAS" useRocm) |
| 170 | + (cmakeBool "LLAMA_METAL" useMetalKit) |
| 171 | + (cmakeBool "LLAMA_MPI" useMpi) |
| 172 | + ] |
| 173 | + ++ optionals useCuda [ |
| 174 | + ( |
| 175 | + with cudaPackages.flags; |
| 176 | + cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( |
| 177 | + builtins.concatStringsSep ";" (map dropDot cudaCapabilities) |
| 178 | + ) |
| 179 | + ) |
| 180 | + ] |
| 181 | + ++ optionals useRocm [ |
| 182 | + (cmakeFeature "CMAKE_C_COMPILER" "hipcc") |
| 183 | + (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") |
| 184 | + |
| 185 | + # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM |
| 186 | + # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt |
| 187 | + # and select the line that matches the current nixpkgs version of rocBLAS. |
| 188 | + # Should likely use `rocmPackages.clr.gpuTargets`. |
| 189 | + "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" |
| 190 | + ] |
| 191 | + ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ] |
| 192 | + ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ]; |
| 193 | + |
| 194 | + # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, |
| 195 | + # if they haven't been added yet. |
| 196 | + postInstall = '' |
| 197 | + mv $out/bin/main $out/bin/llama |
| 198 | + mv $out/bin/server $out/bin/llama-server |
| 199 | + mkdir -p $out/include |
| 200 | + cp $src/llama.h $out/include/ |
| 201 | + ''; |
| 202 | + |
| 203 | + # Define the shells here, but don't add in the inputsFrom to avoid recursion. |
| 204 | + passthru = { |
| 205 | + inherit |
| 206 | + useBlas |
| 207 | + useCuda |
| 208 | + useMetalKit |
| 209 | + useMpi |
| 210 | + useOpenCL |
| 211 | + useRocm |
| 212 | + ; |
| 213 | + |
| 214 | + shell = mkShell { |
| 215 | + name = "shell-${finalAttrs.finalPackage.name}"; |
| 216 | + description = "contains numpy and sentencepiece"; |
| 217 | + buildInputs = [ llama-python ]; |
| 218 | + inputsFrom = [ finalAttrs.finalPackage ]; |
| 219 | + }; |
| 220 | + |
| 221 | + shell-extra = mkShell { |
| 222 | + name = "shell-extra-${finalAttrs.finalPackage.name}"; |
| 223 | + description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; |
| 224 | + buildInputs = [ llama-python-extra ]; |
| 225 | + inputsFrom = [ finalAttrs.finalPackage ]; |
| 226 | + }; |
| 227 | + }; |
| 228 | + |
| 229 | + meta = { |
| 230 | + # Configurations we don't want even the CI to evaluate. Results in the |
| 231 | + # "unsupported platform" messages. This is mostly a no-op, because |
| 232 | + # cudaPackages would've refused to evaluate anyway. |
| 233 | + badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin; |
| 234 | + |
| 235 | + # Configurations that are known to result in build failures. Can be |
| 236 | + # overridden by importing Nixpkgs with `allowBroken = true`. |
| 237 | + broken = (useMetalKit && !effectiveStdenv.isDarwin); |
| 238 | + |
| 239 | + description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; |
| 240 | + homepage = "https://github.com/ggerganov/llama.cpp/"; |
| 241 | + license = lib.licenses.mit; |
| 242 | + |
| 243 | + # Accommodates `nix run` and `lib.getExe` |
| 244 | + mainProgram = "llama"; |
| 245 | + |
| 246 | + # These people might respond, on the best effort basis, if you ping them |
| 247 | + # in case of Nix-specific regressions or for reviewing Nix-specific PRs. |
| 248 | + # Consider adding yourself to this list if you want to ensure this flake |
| 249 | + # stays maintained and you're willing to invest your time. Do not add |
| 250 | + # other people without their consent. Consider removing people after |
| 251 | + # they've been unreachable for long periods of time. |
| 252 | + |
| 253 | + # Note that lib.maintainers is defined in Nixpkgs, but you may just add |
| 254 | + # an attrset following the same format as in |
| 255 | + # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix |
| 256 | + maintainers = with lib.maintainers; [ |
| 257 | + philiptaron |
| 258 | + SomeoneSerge |
| 259 | + ]; |
| 260 | + |
| 261 | + # Extend `badPlatforms` instead |
| 262 | + platforms = lib.platforms.all; |
| 263 | + }; |
| 264 | + } |
| 265 | +) |
0 commit comments