--- /dev/null
+{
+ perSystem =
+ { config, lib, ... }:
+ {
+ apps =
+ let
+ inherit (config.packages) default;
+ binaries = [
+ "llama"
+ "llama-embedding"
+ "llama-server"
+ "quantize"
+ "train-text-from-scratch"
+ ];
+ mkApp = name: {
+ type = "app";
+ program = "${default}/bin/${name}";
+ };
+ in
+ lib.genAttrs binaries mkApp;
+ };
+}
--- /dev/null
+{
+ perSystem =
+ { config, lib, ... }:
+ {
+ devShells =
+ lib.concatMapAttrs
+ (name: package: {
+ ${name} = package.passthru.shell;
+ ${name + "-extra"} = package.passthru.shell-extra;
+ })
+ config.packages;
+ };
+}
--- /dev/null
+{ inputs, ... }:
+{
+ perSystem =
+ {
+ config,
+ system,
+ lib,
+ pkgsCuda,
+ ...
+ }:
+ lib.optionalAttrs (system == "aarch64-linux") {
+ packages =
+ let
+ caps.jetson-xavier = "7.2";
+ caps.jetson-orin = "8.7";
+ caps.jetson-nano = "5.3";
+
+ pkgsFor =
+ cap:
+ import inputs.nixpkgs {
+ inherit system;
+ config = {
+ cudaSupport = true;
+ cudaCapabilities = [ cap ];
+ cudaEnableForwardCompat = false;
+ inherit (pkgsCuda.config) allowUnfreePredicate;
+ };
+ };
+ in
+ builtins.mapAttrs (name: cap: ((pkgsFor cap).callPackage ./scope.nix { }).llama-cpp) caps;
+ };
+}
--- /dev/null
+{ inputs, ... }:
+{
+ # The _module.args definitions are passed on to modules as arguments. E.g.
+ # the module `{ pkgs ... }: { /* config */ }` implicitly uses
+ # `_module.args.pkgs` (defined in this case by flake-parts).
+ perSystem =
+ { system, ... }:
+ {
+ _module.args = {
+ pkgsCuda = import inputs.nixpkgs {
+ inherit system;
+ # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
+ # and ucx are built with CUDA support)
+ config.cudaSupport = true;
+ config.allowUnfreePredicate =
+ p:
+ builtins.all
+ (
+ license:
+ license.free
+ || builtins.elem license.shortName [
+ "CUDA EULA"
+ "cuDNN EULA"
+ ]
+ )
+ (p.meta.licenses or [ p.meta.license ]);
+ };
+ # Ensure dependencies use ROCm consistently
+ pkgsRocm = import inputs.nixpkgs {
+ inherit system;
+ config.rocmSupport = true;
+ };
+ };
+ };
+}
--- /dev/null
+{
+ lib,
+ config,
+ stdenv,
+ mkShell,
+ cmake,
+ ninja,
+ pkg-config,
+ git,
+ python3,
+ mpi,
+ openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations
+ cudaPackages,
+ darwin,
+ rocmPackages,
+ clblast,
+ useBlas ? builtins.all (x: !x) [
+ useCuda
+ useMetalKit
+ useOpenCL
+ useRocm
+ ],
+ useCuda ? config.cudaSupport,
+ useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
+ useMpi ? false, # Increases the runtime closure size by ~700M
+ useOpenCL ? false,
+ useRocm ? config.rocmSupport,
+ llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
+}@inputs:
+
+let
+ inherit (lib)
+ cmakeBool
+ cmakeFeature
+ optionals
+ strings
+ versionOlder
+ ;
+
+ # It's necessary to consistently use backendStdenv when building with CUDA support,
+ # otherwise we get libstdc++ errors downstream.
+ stdenv = throw "Use effectiveStdenv instead";
+ effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
+
+ suffices =
+ lib.optionals useBlas [ "BLAS" ]
+ ++ lib.optionals useCuda [ "CUDA" ]
+ ++ lib.optionals useMetalKit [ "MetalKit" ]
+ ++ lib.optionals useMpi [ "MPI" ]
+ ++ lib.optionals useOpenCL [ "OpenCL" ]
+ ++ lib.optionals useRocm [ "ROCm" ];
+
+ pnameSuffix =
+ strings.optionalString (suffices != [ ])
+ "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
+ descriptionSuffix =
+ strings.optionalString (suffices != [ ])
+ ", accelerated with ${strings.concatStringsSep ", " suffices}";
+
+ # TODO: package the Python in this repository in a Nix-like way.
+ # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
+ # is PEP 517-compatible, and ensure the correct .dist-info is generated.
+ # https://peps.python.org/pep-0517/
+ llama-python = python3.withPackages (
+ ps: [
+ ps.numpy
+ ps.sentencepiece
+ ]
+ );
+
+ # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
+ llama-python-extra = python3.withPackages (
+ ps: [
+ ps.numpy
+ ps.sentencepiece
+ ps.torchWithoutCuda
+ ps.transformers
+ ]
+ );
+
+ # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
+ # separately
+ darwinBuildInputs =
+ with darwin.apple_sdk.frameworks;
+ [
+ Accelerate
+ CoreVideo
+ CoreGraphics
+ ]
+ ++ optionals useMetalKit [ MetalKit ];
+
+ cudaBuildInputs = with cudaPackages; [
+ cuda_cccl.dev # <nv/target>
+
+ # A temporary hack for reducing the closure size, remove once cudaPackages
+ # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
+ cuda_cudart.dev
+ cuda_cudart.lib
+ cuda_cudart.static
+ libcublas.dev
+ libcublas.lib
+ libcublas.static
+ ];
+
+ rocmBuildInputs = with rocmPackages; [
+ clr
+ hipblas
+ rocblas
+ ];
+in
+
+effectiveStdenv.mkDerivation (
+ finalAttrs: {
+ pname = "llama-cpp${pnameSuffix}";
+ version = llamaVersion;
+
+ src = lib.cleanSourceWith {
+ filter =
+ name: type:
+ !(builtins.any (_: _) [
+ (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
+ (name == "README.md") # Ignore *.md changes whe computing outPaths
+ (lib.hasPrefix "." name) # Skip hidden files and directories
+ ]);
+ src = lib.cleanSource ../../.;
+ };
+
+ postPatch = ''
+ substituteInPlace ./ggml-metal.m \
+ --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
+
+ # TODO: Package up each Python script or service appropriately.
+ # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
+ # we could make those *.py into setuptools' entrypoints
+ substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
+ '';
+
+ nativeBuildInputs =
+ [
+ cmake
+ ninja
+ pkg-config
+ git
+ ]
+ ++ optionals useCuda [
+ cudaPackages.cuda_nvcc
+
+ # TODO: Replace with autoAddDriverRunpath
+ # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
+ cudaPackages.autoAddOpenGLRunpathHook
+ ];
+
+ buildInputs =
+ optionals effectiveStdenv.isDarwin darwinBuildInputs
+ ++ optionals useCuda cudaBuildInputs
+ ++ optionals useMpi [ mpi ]
+ ++ optionals useOpenCL [ clblast ]
+ ++ optionals useRocm rocmBuildInputs;
+
+ cmakeFlags =
+ [
+ (cmakeBool "LLAMA_NATIVE" true)
+ (cmakeBool "LLAMA_BUILD_SERVER" true)
+ (cmakeBool "BUILD_SHARED_LIBS" true)
+ (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
+ (cmakeBool "LLAMA_BLAS" useBlas)
+ (cmakeBool "LLAMA_CLBLAST" useOpenCL)
+ (cmakeBool "LLAMA_CUBLAS" useCuda)
+ (cmakeBool "LLAMA_HIPBLAS" useRocm)
+ (cmakeBool "LLAMA_METAL" useMetalKit)
+ (cmakeBool "LLAMA_MPI" useMpi)
+ ]
+ ++ optionals useCuda [
+ (
+ with cudaPackages.flags;
+ cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
+ builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
+ )
+ )
+ ]
+ ++ optionals useRocm [
+ (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
+ (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
+
+ # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
+ # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
+ # and select the line that matches the current nixpkgs version of rocBLAS.
+ # Should likely use `rocmPackages.clr.gpuTargets`.
+ "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
+ ]
+ ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
+ ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
+
+ # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
+ # if they haven't been added yet.
+ postInstall = ''
+ mv $out/bin/main $out/bin/llama
+ mv $out/bin/server $out/bin/llama-server
+ mkdir -p $out/include
+ cp $src/llama.h $out/include/
+ '';
+
+ # Define the shells here, but don't add in the inputsFrom to avoid recursion.
+ passthru = {
+ inherit
+ useBlas
+ useCuda
+ useMetalKit
+ useMpi
+ useOpenCL
+ useRocm
+ ;
+
+ shell = mkShell {
+ name = "shell-${finalAttrs.finalPackage.name}";
+ description = "contains numpy and sentencepiece";
+ buildInputs = [ llama-python ];
+ inputsFrom = [ finalAttrs.finalPackage ];
+ };
+
+ shell-extra = mkShell {
+ name = "shell-extra-${finalAttrs.finalPackage.name}";
+ description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
+ buildInputs = [ llama-python-extra ];
+ inputsFrom = [ finalAttrs.finalPackage ];
+ };
+ };
+
+ meta = {
+ # Configurations we don't want even the CI to evaluate. Results in the
+ # "unsupported platform" messages. This is mostly a no-op, because
+ # cudaPackages would've refused to evaluate anyway.
+ badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
+
+ # Configurations that are known to result in build failures. Can be
+ # overridden by importing Nixpkgs with `allowBroken = true`.
+ broken = (useMetalKit && !effectiveStdenv.isDarwin);
+
+ description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
+ homepage = "https://github.com/ggerganov/llama.cpp/";
+ license = lib.licenses.mit;
+
+ # Accommodates `nix run` and `lib.getExe`
+ mainProgram = "llama";
+
+ # These people might respond, on the best effort basis, if you ping them
+ # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
+ # Consider adding yourself to this list if you want to ensure this flake
+ # stays maintained and you're willing to invest your time. Do not add
+ # other people without their consent. Consider removing people after
+ # they've been unreachable for long periods of time.
+
+ # Note that lib.maintainers is defined in Nixpkgs, but you may just add
+ # an attrset following the same format as in
+ # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
+ maintainers = with lib.maintainers; [
+ philiptaron
+ SomeoneSerge
+ ];
+
+ # Extend `badPlatforms` instead
+ platforms = lib.platforms.all;
+ };
+ }
+)
--- /dev/null
+{
+ lib,
+ newScope,
+ llamaVersion ? "0.0.0",
+}:
+
+lib.makeScope newScope (
+ self: {
+ inherit llamaVersion;
+ llama-cpp = self.callPackage ./package.nix { };
+ }
+)
--- /dev/null
+# Make the flake discoverable on https://flakestry.dev
+name: "Publish a flake to flakestry"
+on:
+ push:
+ tags:
+ - "v?[0-9]+.[0-9]+.[0-9]+"
+ - "v?[0-9]+.[0-9]+"
+ workflow_dispatch:
+ inputs:
+ tag:
+ description: "The existing tag to publish"
+ type: "string"
+ required: true
+jobs:
+ publish-flake:
+ runs-on: ubuntu-latest
+ permissions:
+ id-token: "write"
+ contents: "read"
+ steps:
+ - uses: flakestry/flakestry-publish@main
+ with:
+ version: "${{ inputs.tag || github.ref_name }}"
{
"nodes": {
- "flake-utils": {
+ "flake-parts": {
"inputs": {
- "systems": "systems"
+ "nixpkgs-lib": "nixpkgs-lib"
},
"locked": {
- "lastModified": 1694529238,
- "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=",
- "owner": "numtide",
- "repo": "flake-utils",
- "rev": "ff7b65b44d01cf9ba6a71320833626af21126384",
+ "lastModified": 1701473968,
+ "narHash": "sha256-YcVE5emp1qQ8ieHUnxt1wCZCC3ZfAS+SRRWZ2TMda7E=",
+ "owner": "hercules-ci",
+ "repo": "flake-parts",
+ "rev": "34fed993f1674c8d06d58b37ce1e0fe5eebcb9f5",
"type": "github"
},
"original": {
- "owner": "numtide",
- "repo": "flake-utils",
+ "owner": "hercules-ci",
+ "repo": "flake-parts",
"type": "github"
}
},
"nixpkgs": {
"locked": {
- "lastModified": 1698318101,
- "narHash": "sha256-gUihHt3yPD7bVqg+k/UVHgngyaJ3DMEBchbymBMvK1E=",
+ "lastModified": 1703559957,
+ "narHash": "sha256-x9PUuMEPGUOMB51zNxrDr2QoHbYWlCS2xhFedm9MC5Q=",
"owner": "NixOS",
"repo": "nixpkgs",
- "rev": "63678e9f3d3afecfeafa0acead6239cdb447574c",
+ "rev": "75dd68c36f458c6593c5bbb48abfd3e59bfed380",
"type": "github"
},
"original": {
"type": "github"
}
},
- "root": {
- "inputs": {
- "flake-utils": "flake-utils",
- "nixpkgs": "nixpkgs"
- }
- },
- "systems": {
+ "nixpkgs-lib": {
"locked": {
- "lastModified": 1681028828,
- "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
- "owner": "nix-systems",
- "repo": "default",
- "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+ "dir": "lib",
+ "lastModified": 1701253981,
+ "narHash": "sha256-ztaDIyZ7HrTAfEEUt9AtTDNoCYxUdSd6NrRHaYOIxtk=",
+ "owner": "NixOS",
+ "repo": "nixpkgs",
+ "rev": "e92039b55bcd58469325ded85d4f58dd5a4eaf58",
"type": "github"
},
"original": {
- "owner": "nix-systems",
- "repo": "default",
+ "dir": "lib",
+ "owner": "NixOS",
+ "ref": "nixos-unstable",
+ "repo": "nixpkgs",
"type": "github"
}
+ },
+ "root": {
+ "inputs": {
+ "flake-parts": "flake-parts",
+ "nixpkgs": "nixpkgs"
+ }
}
},
"root": "root",
{
+ description = "Port of Facebook's LLaMA model in C/C++";
+
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
- flake-utils.url = "github:numtide/flake-utils";
+ flake-parts.url = "github:hercules-ci/flake-parts";
};
- outputs = { self, nixpkgs, flake-utils }:
- flake-utils.lib.eachDefaultSystem (system:
- let
- name = "llama.cpp";
- src = ./.;
- meta.mainProgram = "llama";
- inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin;
- buildInputs = with pkgs; [ openmpi ];
- osSpecific = with pkgs; buildInputs ++ (
- if isAarch64 && isDarwin then
- with pkgs.darwin.apple_sdk_11_0.frameworks; [
- Accelerate
- MetalKit
- ]
- else if isAarch32 && isDarwin then
- with pkgs.darwin.apple_sdk.frameworks; [
- Accelerate
- CoreGraphics
- CoreVideo
- ]
- else if isDarwin then
- with pkgs.darwin.apple_sdk.frameworks; [
- Accelerate
- CoreGraphics
- CoreVideo
- ]
- else
- with pkgs; [ openblas ]
- );
- pkgs = import nixpkgs { inherit system; };
- nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ];
- cudatoolkit_joined = with pkgs; symlinkJoin {
- # HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit
- # see https://github.com/NixOS/nixpkgs/issues/224291
- # copied from jaxlib
- name = "${cudaPackages.cudatoolkit.name}-merged";
- paths = [
- cudaPackages.cudatoolkit.lib
- cudaPackages.cudatoolkit.out
- ] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [
- # for some reason some of the required libs are in the targets/x86_64-linux
- # directory; not sure why but this works around it
- "${cudaPackages.cudatoolkit}/targets/${system}"
- ];
- };
- llama-python =
- pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]);
- # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
- llama-python-extra =
- pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]);
- postPatch = ''
- substituteInPlace ./ggml-metal.m \
- --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
- substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python'
- '';
- postInstall = ''
- mv $out/bin/main $out/bin/llama
- mv $out/bin/server $out/bin/llama-server
- mkdir -p $out/include
- cp ${src}/llama.h $out/include/
- '';
- cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ];
- in
+
+ # For inspection, use `nix flake show github:ggerganov/llama.cpp` or the nix repl:
+ #
+ # ```bash
+ # ❯ nix repl
+ # nix-repl> :lf github:ggerganov/llama.cpp
+ # Added 13 variables.
+ # nix-repl> outputs.apps.x86_64-linux.quantize
+ # { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/quantize"; type = "app"; }
+ # ```
+ outputs =
+ { self, flake-parts, ... }@inputs:
+ let
+ # We could include the git revisions in the package names but those would
+ # needlessly trigger rebuilds:
+ # llamaVersion = self.dirtyShortRev or self.shortRev;
+
+ # Nix already uses cryptographic hashes for versioning, so we'll just fix
+ # the fake semver for now:
+ llamaVersion = "0.0.0";
+ in
+ flake-parts.lib.mkFlake { inherit inputs; }
+
{
- packages.default = pkgs.stdenv.mkDerivation {
- inherit name src meta postPatch nativeBuildInputs postInstall;
- buildInputs = osSpecific;
- cmakeFlags = cmakeFlags
- ++ (if isAarch64 && isDarwin then [
- "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
- "-DLLAMA_METAL=ON"
- ] else [
- "-DLLAMA_BLAS=ON"
- "-DLLAMA_BLAS_VENDOR=OpenBLAS"
- ]);
- };
- packages.opencl = pkgs.stdenv.mkDerivation {
- inherit name src meta postPatch nativeBuildInputs postInstall;
- buildInputs = with pkgs; buildInputs ++ [ clblast ];
- cmakeFlags = cmakeFlags ++ [
- "-DLLAMA_CLBLAST=ON"
- ];
- };
- packages.cuda = pkgs.stdenv.mkDerivation {
- inherit name src meta postPatch nativeBuildInputs postInstall;
- buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ];
- cmakeFlags = cmakeFlags ++ [
- "-DLLAMA_CUBLAS=ON"
- ];
- };
- packages.rocm = pkgs.stdenv.mkDerivation {
- inherit name src meta postPatch nativeBuildInputs postInstall;
- buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ];
- cmakeFlags = cmakeFlags ++ [
- "-DLLAMA_HIPBLAS=1"
- "-DCMAKE_C_COMPILER=hipcc"
- "-DCMAKE_CXX_COMPILER=hipcc"
- # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
- # in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
- # and select the line that matches the current nixpkgs version of rocBLAS.
- "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
- ];
- };
- apps.llama-server = {
- type = "app";
- program = "${self.packages.${system}.default}/bin/llama-server";
- };
- apps.llama-embedding = {
- type = "app";
- program = "${self.packages.${system}.default}/bin/embedding";
- };
- apps.llama = {
- type = "app";
- program = "${self.packages.${system}.default}/bin/llama";
- };
- apps.quantize = {
- type = "app";
- program = "${self.packages.${system}.default}/bin/quantize";
- };
- apps.train-text-from-scratch = {
- type = "app";
- program = "${self.packages.${system}.default}/bin/train-text-from-scratch";
- };
- apps.default = self.apps.${system}.llama;
- devShells.default = pkgs.mkShell {
- buildInputs = [ llama-python ];
- packages = nativeBuildInputs ++ osSpecific;
- };
- devShells.extra = pkgs.mkShell {
- buildInputs = [ llama-python-extra ];
- packages = nativeBuildInputs ++ osSpecific;
- };
- });
+
+ imports = [
+ .devops/nix/nixpkgs-instances.nix
+ .devops/nix/apps.nix
+ .devops/nix/devshells.nix
+ .devops/nix/jetson-support.nix
+ ];
+
+ # An overlay can be used to have a more granular control over llama-cpp's
+ # dependencies and configuration, than that offered by the `.override`
+ # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays.
+ #
+ # E.g. in a flake:
+ # ```
+ # { nixpkgs, llama-cpp, ... }:
+ # let pkgs = import nixpkgs {
+ # overlays = [ (llama-cpp.overlays.default) ];
+ # system = "aarch64-linux";
+ # config.allowUnfree = true;
+ # config.cudaSupport = true;
+ # config.cudaCapabilities = [ "7.2" ];
+ # config.cudaEnableForwardCompat = false;
+ # }; in {
+ # packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp;
+ # }
+ # ```
+ #
+ # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format
+ flake.overlays.default =
+ (final: prev: {
+ llamaPackages = final.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
+ inherit (final.llamaPackages) llama-cpp;
+ });
+
+ systems = [
+ "aarch64-darwin"
+ "aarch64-linux"
+ "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant)
+ "x86_64-linux"
+ ];
+
+ perSystem =
+ {
+ config,
+ lib,
+ pkgs,
+ pkgsCuda,
+ pkgsRocm,
+ ...
+ }:
+ {
+ # We don't use the overlay here so as to avoid making too many instances of nixpkgs,
+ # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs
+ packages =
+ {
+ default = (pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
+ }
+ // lib.optionalAttrs pkgs.stdenv.isLinux {
+ opencl = config.packages.default.override { useOpenCL = true; };
+ cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
+ rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
+
+ mpi-cpu = config.packages.default.override { useMpi = true; };
+ mpi-cuda = config.packages.default.override { useMpi = true; };
+ };
+ };
+ };
}