glibc,
config,
stdenv,
- mkShell,
runCommand,
cmake,
ninja,
pkg-config,
git,
- python3,
mpi,
blas,
cudaPackages,
vulkan-loader,
curl,
shaderc,
- useBlas ? builtins.all (x: !x) [
- useCuda
- useMetalKit
- useRocm
- useVulkan
- ] && blas.meta.available,
+ useBlas ?
+ builtins.all (x: !x) [
+ useCuda
+ useMetalKit
+ useRocm
+ useVulkan
+ ]
+ && blas.meta.available,
useCuda ? config.cudaSupport,
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
- useMpi ? false, # Increases the runtime closure size by ~700M
+ # Increases the runtime closure size by ~700M
+ useMpi ? false,
useRocm ? config.rocmSupport,
enableCurl ? true,
useVulkan ? false,
# otherwise we get libstdc++ errors downstream.
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
- precompileMetalShaders ? false
-}@inputs:
+ precompileMetalShaders ? false,
+}:
let
inherit (lib)
cmakeFeature
optionals
strings
- versionOlder
;
stdenv = throw "Use effectiveStdenv instead";
pnameSuffix =
strings.optionalString (suffices != [ ])
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
- descriptionSuffix =
- strings.optionalString (suffices != [ ])
- ", accelerated with ${strings.concatStringsSep ", " suffices}";
-
- executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
-
- # TODO: package the Python in this repository in a Nix-like way.
- # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
- # is PEP 517-compatible, and ensure the correct .dist-info is generated.
- # https://peps.python.org/pep-0517/
- #
- # TODO: Package up each Python script or service appropriately, by making
- # them into "entrypoints"
- llama-python = python3.withPackages (
- ps: [
- ps.numpy
- ps.sentencepiece
- ]
- );
+ descriptionSuffix = strings.optionalString (
+ suffices != [ ]
+ ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
- # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
- llama-python-extra = python3.withPackages (
- ps: [
- ps.numpy
- ps.sentencepiece
- ps.tiktoken
- ps.torchWithoutCuda
- ps.transformers
-
- # server bench
- ps.matplotlib
-
- # server tests
- ps.openai
- ps.behave
- ps.prometheus-client
-
- # for examples/pydantic-models-to-grammar-examples.py
- ps.docstring-parser
- ps.pydantic
-
- # for scripts/compare-llama-bench.py
- ps.gitpython
- ps.tabulate
- ]
- );
-
- xcrunHost = runCommand "xcrunHost" {} ''
+ xcrunHost = runCommand "xcrunHost" { } ''
mkdir -p $out/bin
ln -s /usr/bin/xcrun $out/bin
'';
];
in
-effectiveStdenv.mkDerivation (
- finalAttrs: {
- pname = "llama-cpp${pnameSuffix}";
- version = llamaVersion;
-
- # Note: none of the files discarded here are visible in the sandbox or
- # affect the output hash. This also means they can be modified without
- # triggering a rebuild.
- src = lib.cleanSourceWith {
- filter =
- name: type:
- let
- noneOf = builtins.all (x: !x);
- baseName = baseNameOf name;
- in
- noneOf [
- (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
- (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
- (lib.hasPrefix "." baseName) # Skip hidden files and directories
- (baseName == "flake.lock")
- ];
- src = lib.cleanSource ../../.;
- };
-
- postPatch = ''
- substituteInPlace ./ggml/src/ggml-metal.m \
- --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
- substituteInPlace ./ggml/src/ggml-metal.m \
- --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
- '';
-
- # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
- # `default.metallib` may be compiled with Metal compiler from XCode
- # and we need to escape sandbox on MacOS to access Metal compiler.
- # `xcrun` is used find the path of the Metal compiler, which is varible
- # and not on $PATH
- # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
- __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
-
- nativeBuildInputs =
- [
- cmake
- ninja
- pkg-config
- git
- ]
- ++ optionals useCuda [
- cudaPackages.cuda_nvcc
- autoAddDriverRunpath
- ]
- ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
- glibc.static
- ] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
- xcrunHost
- ];
-
- buildInputs =
- optionals effectiveStdenv.isDarwin darwinBuildInputs
- ++ optionals useCuda cudaBuildInputs
- ++ optionals useMpi [ mpi ]
- ++ optionals useRocm rocmBuildInputs
- ++ optionals useBlas [ blas ]
- ++ optionals useVulkan vulkanBuildInputs
- ++ optionals enableCurl [ curl ];
-
- cmakeFlags =
- [
- (cmakeBool "LLAMA_BUILD_SERVER" true)
- (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
- (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
- (cmakeBool "LLAMA_CURL" enableCurl)
- (cmakeBool "GGML_NATIVE" false)
- (cmakeBool "GGML_BLAS" useBlas)
- (cmakeBool "GGML_CUDA" useCuda)
- (cmakeBool "GGML_HIPBLAS" useRocm)
- (cmakeBool "GGML_METAL" useMetalKit)
- (cmakeBool "GGML_VULKAN" useVulkan)
- (cmakeBool "GGML_STATIC" enableStatic)
- ]
- ++ optionals useCuda [
- (
- with cudaPackages.flags;
- cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
- builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
- )
- )
- ]
- ++ optionals useRocm [
- (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
- (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
- ]
- ++ optionals useMetalKit [
- (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
- (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
+effectiveStdenv.mkDerivation (finalAttrs: {
+ pname = "llama-cpp${pnameSuffix}";
+ version = llamaVersion;
+
+ # Note: none of the files discarded here are visible in the sandbox or
+ # affect the output hash. This also means they can be modified without
+ # triggering a rebuild.
+ src = lib.cleanSourceWith {
+ filter =
+ name: type:
+ let
+ noneOf = builtins.all (x: !x);
+ baseName = baseNameOf name;
+ in
+ noneOf [
+ (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
+ (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
+ (lib.hasPrefix "." baseName) # Skip hidden files and directories
+ (baseName == "flake.lock")
];
+ src = lib.cleanSource ../../.;
+ };
+
+ postPatch = ''
+ substituteInPlace ./ggml/src/ggml-metal.m \
+ --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
+ substituteInPlace ./ggml/src/ggml-metal.m \
+ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
+ '';
- # Environment variables needed for ROCm
- env = optionals useRocm {
- ROCM_PATH = "${rocmPackages.clr}";
- HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
- };
-
- # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
- # if they haven't been added yet.
- postInstall = ''
- mkdir -p $out/include
- cp $src/include/llama.h $out/include/
- '';
-
- # Define the shells here, but don't add in the inputsFrom to avoid recursion.
- passthru = {
- inherit
- useBlas
- useCuda
- useMetalKit
- useMpi
- useRocm
- useVulkan
- ;
-
- shell = mkShell {
- name = "shell-${finalAttrs.finalPackage.name}";
- description = "contains numpy and sentencepiece";
- buildInputs = [ llama-python ];
- inputsFrom = [ finalAttrs.finalPackage ];
- shellHook = ''
- addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
- '';
- };
-
- shell-extra = mkShell {
- name = "shell-extra-${finalAttrs.finalPackage.name}";
- description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
- buildInputs = [ llama-python-extra ];
- inputsFrom = [ finalAttrs.finalPackage ];
- };
- };
-
- meta = {
- # Configurations we don't want even the CI to evaluate. Results in the
- # "unsupported platform" messages. This is mostly a no-op, because
- # cudaPackages would've refused to evaluate anyway.
- badPlatforms = optionals useCuda lib.platforms.darwin;
-
- # Configurations that are known to result in build failures. Can be
- # overridden by importing Nixpkgs with `allowBroken = true`.
- broken = (useMetalKit && !effectiveStdenv.isDarwin);
-
- description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
- homepage = "https://github.com/ggerganov/llama.cpp/";
- license = lib.licenses.mit;
-
- # Accommodates `nix run` and `lib.getExe`
- mainProgram = "llama-cli";
+ # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
+ # `default.metallib` may be compiled with Metal compiler from XCode
+ # and we need to escape sandbox on MacOS to access Metal compiler.
+ # `xcrun` is used find the path of the Metal compiler, which is varible
+ # and not on $PATH
+ # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
+ __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
- # These people might respond, on the best effort basis, if you ping them
- # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
- # Consider adding yourself to this list if you want to ensure this flake
- # stays maintained and you're willing to invest your time. Do not add
- # other people without their consent. Consider removing people after
- # they've been unreachable for long periods of time.
+ nativeBuildInputs =
+ [
+ cmake
+ ninja
+ pkg-config
+ git
+ ]
+ ++ optionals useCuda [
+ cudaPackages.cuda_nvcc
- # Note that lib.maintainers is defined in Nixpkgs, but you may just add
- # an attrset following the same format as in
- # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
- maintainers = with lib.maintainers; [
- philiptaron
- SomeoneSerge
- ];
+ autoAddDriverRunpath
+ ]
+ ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
+ ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
+
+ buildInputs =
+ optionals effectiveStdenv.isDarwin darwinBuildInputs
+ ++ optionals useCuda cudaBuildInputs
+ ++ optionals useMpi [ mpi ]
+ ++ optionals useRocm rocmBuildInputs
+ ++ optionals useBlas [ blas ]
+ ++ optionals useVulkan vulkanBuildInputs
+ ++ optionals enableCurl [ curl ];
+
+ cmakeFlags =
+ [
+ (cmakeBool "LLAMA_BUILD_SERVER" true)
+ (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
+ (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
+ (cmakeBool "LLAMA_CURL" enableCurl)
+ (cmakeBool "GGML_NATIVE" false)
+ (cmakeBool "GGML_BLAS" useBlas)
+ (cmakeBool "GGML_CUDA" useCuda)
+ (cmakeBool "GGML_HIPBLAS" useRocm)
+ (cmakeBool "GGML_METAL" useMetalKit)
+ (cmakeBool "GGML_VULKAN" useVulkan)
+ (cmakeBool "GGML_STATIC" enableStatic)
+ ]
+ ++ optionals useCuda [
+ (
+ with cudaPackages.flags;
+ cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
+ builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
+ )
+ )
+ ]
+ ++ optionals useRocm [
+ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
+ (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
+ ]
+ ++ optionals useMetalKit [
+ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
+ (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
+ ];
+
+ # Environment variables needed for ROCm
+ env = optionals useRocm {
+ ROCM_PATH = "${rocmPackages.clr}";
+ HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
+ };
+
+ # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
+ # if they haven't been added yet.
+ postInstall = ''
+ mkdir -p $out/include
+ cp $src/include/llama.h $out/include/
+ '';
- # Extend `badPlatforms` instead
- platforms = lib.platforms.all;
- };
- }
-)
+ meta = {
+ # Configurations we don't want even the CI to evaluate. Results in the
+ # "unsupported platform" messages. This is mostly a no-op, because
+ # cudaPackages would've refused to evaluate anyway.
+ badPlatforms = optionals useCuda lib.platforms.darwin;
+
+ # Configurations that are known to result in build failures. Can be
+ # overridden by importing Nixpkgs with `allowBroken = true`.
+ broken = (useMetalKit && !effectiveStdenv.isDarwin);
+
+ description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
+ homepage = "https://github.com/ggerganov/llama.cpp/";
+ license = lib.licenses.mit;
+
+ # Accommodates `nix run` and `lib.getExe`
+ mainProgram = "llama-cli";
+
+ # These people might respond, on the best effort basis, if you ping them
+ # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
+ # Consider adding yourself to this list if you want to ensure this flake
+ # stays maintained and you're willing to invest your time. Do not add
+ # other people without their consent. Consider removing people after
+ # they've been unreachable for long periods of time.
+
+ # Note that lib.maintainers is defined in Nixpkgs, but you may just add
+ # an attrset following the same format as in
+ # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
+ maintainers = with lib.maintainers; [
+ philiptaron
+ SomeoneSerge
+ ];
+
+ # Extend `badPlatforms` instead
+ platforms = lib.platforms.all;
+ };
+})