COPY . .
-RUN mkdir build && \
- cd build && \
- if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
+RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
echo "LLAMA_SYCL_F16 is set" && \
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
fi && \
- cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
- cmake --build . --config Release --target main
+ cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
+ cmake --build build --config Release --target main
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
# Build it
WORKDIR /app
COPY . .
-RUN mkdir build && \
- cd build && \
- cmake .. -DLLAMA_VULKAN=1 && \
- cmake --build . --config Release --target main
+RUN cmake -B build -DLLAMA_VULKAN=1 && \
+ cmake --build build --config Release --target main
# Clean up
WORKDIR /
COPY . .
-RUN mkdir build && \
- cd build && \
- if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
+RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
echo "LLAMA_SYCL_F16 is set" && \
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
fi && \
- cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
- cmake --build . --config Release --target server
+ cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
+ cmake --build build --config Release --target server
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
# Build it
WORKDIR /app
COPY . .
-RUN mkdir build && \
- cd build && \
- cmake .. -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
- cmake --build . --config Release --target server
+RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
+ cmake --build build --config Release --target server
# Clean up
WORKDIR /
id: cmake_build
run: |
set -eux
- mkdir build
- cd build
- cmake .. \
+ cmake -B build \
-DLLAMA_NATIVE=OFF \
-DLLAMA_BUILD_SERVER=ON \
-DLLAMA_CURL=ON \
-DLLAMA_FATAL_WARNINGS=OFF \
-DLLAMA_ALL_WARNINGS=OFF \
-DCMAKE_BUILD_TYPE=Release;
- cmake --build . --config Release -j $(nproc) --target server
+ cmake --build build --config Release -j $(nproc) --target server
- name: Download the dataset
id: download_dataset
- name: Build
id: cmake_build
run: |
- mkdir build
- cd build
- cmake .. \
+ cmake -B build \
-DLLAMA_NATIVE=OFF \
-DLLAMA_BUILD_SERVER=ON \
-DLLAMA_CURL=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
- cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target server
- name: Tests
- name: Build
id: cmake_build
run: |
- mkdir build
- cd build
- cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
- cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
+ cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
+ cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
- name: Python setup
id: setup_python
```sh
git clone https://github.com/oneapi-src/oneMKL
cd oneMKL
-mkdir -p buildWithCublas && cd buildWithCublas
-cmake ../ -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas
-make
+cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas
+cmake --build buildWithCublas --config Release
```
source /opt/intel/oneapi/setvars.sh
# Build LLAMA with MKL BLAS acceleration for intel GPU
-mkdir -p build && cd build
# Option 1: Use FP32 (recommended for better performance in most cases)
-cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
# Option 2: Use FP16
-cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
+cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
-#build all binary
-cmake --build . --config Release -j -v
+# build all binary
+cmake --build build --config Release -j -v
```
#### Nvidia GPU
export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
# Build LLAMA with Nvidia BLAS acceleration through SYCL
-mkdir -p build && cd build
# Option 1: Use FP32 (recommended for better performance in most cases)
-cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
# Option 2: Use FP16
-cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
+cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
-#build all binary
-cmake --build . --config Release -j -v
+# build all binary
+cmake --build build --config Release -j -v
```
On the oneAPI command line window, step into the llama.cpp main directory and run the following:
```
-mkdir -p build
-cd build
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
# Option 1: Use FP32 (recommended for better performance in most cases)
-cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
+cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
# Option 2: Or FP16
-cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
+cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
-make -j
+cmake --build build --config Release -j
```
Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former instructions:
make
```
+ **Note**: for `Debug` builds, run `make LLAMA_DEBUG=1`
+
- On Windows:
1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases).
- Using `CMake`:
```bash
- mkdir build
- cd build
- cmake ..
- cmake --build . --config Release
+ cmake -B build
+ cmake --build build --config Release
```
+ **Note**: for `Debug` builds, there are two cases:
+
+ - Single-config generators (e.g. default = `Unix Makefiles`; note that they just ignore the `--config` flag):
+
+ ```bash
+ cmake -B build -DCMAKE_BUILD_TYPE=Debug
+ cmake --build build
+ ```
+
+ - Multi-config generators (`-G` param set to Visual Studio, XCode...):
+
+ ```bash
+ cmake -B build -G "Xcode"
+ cmake --build build --config Debug
+ ```
+
- Using `Zig` (version 0.11 or later):
Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C,
- Using `CMake` on Linux:
```bash
- mkdir build
- cd build
- cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
- cmake --build . --config Release
+ cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
+ cmake --build build --config Release
```
- #### BLIS
- Using manual oneAPI installation:
By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. Otherwise please install oneAPI and follow the below steps:
```bash
- mkdir build
- cd build
source /opt/intel/oneapi/setvars.sh # You can skip this step if in oneapi-basekit docker image, only required for manual installation
- cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON
- cmake --build . --config Release
+ cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON
+ cmake --build build --config Release
```
- Using oneAPI docker image:
- Using `CMake`:
```bash
- mkdir build
- cd build
- cmake .. -DLLAMA_CUDA=ON
- cmake --build . --config Release
+ cmake -B build -DLLAMA_CUDA=ON
+ cmake --build build --config Release
```
The environment variable [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) can be used to specify which GPU(s) will be used. The following compilation options are also available to tweak performance:
- Using `CMake` for Linux (assuming a gfx1030-compatible AMD GPU):
```bash
CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ \
- cmake -H. -Bbuild -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
- && cmake --build build -- -j 16
+ cmake -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
+ && cmake --build build --config Release -- -j 16
```
On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DLLAMA_HIP_UMA=ON"`.
However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs).
```sh
git clone --recurse-submodules https://github.com/KhronosGroup/OpenCL-SDK.git
- mkdir OpenCL-SDK/build
- cd OpenCL-SDK/build
- cmake .. -DBUILD_DOCS=OFF \
+ cd OpenCL-SDK
+ cmake -B build -DBUILD_DOCS=OFF \
-DBUILD_EXAMPLES=OFF \
-DBUILD_TESTING=OFF \
-DOPENCL_SDK_BUILD_SAMPLES=OFF \
-DOPENCL_SDK_TEST_SAMPLES=OFF
- cmake --build . --config Release
- cmake --install . --prefix /some/path
+ cmake --build build
+ cmake --install build --prefix /some/path
```
</details>
```cmd
set OPENCL_SDK_ROOT="C:/OpenCL-SDK-v2023.04.17-Win-x64"
git clone https://github.com/CNugteren/CLBlast.git
- mkdir CLBlast\build
- cd CLBlast\build
- cmake .. -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64
- cmake --build . --config Release
- cmake --install . --prefix C:/CLBlast
+ cd CLBlast
+ cmake -B build -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64
+ cmake --build build --config Release
+ cmake --install build --prefix C:/CLBlast
```
+ (note: `--config Release` at build time is the default and only relevant for Visual Studio builds - or multi-config Ninja builds)
+
- <details>
<summary>Unix:</summary>
```sh
git clone https://github.com/CNugteren/CLBlast.git
- mkdir CLBlast/build
- cd CLBlast/build
- cmake .. -DBUILD_SHARED_LIBS=OFF -DTUNERS=OFF
- cmake --build . --config Release
- cmake --install . --prefix /some/path
+ cd CLBlast
+ cmake -B build -DBUILD_SHARED_LIBS=OFF -DTUNERS=OFF
+ cmake --build build --config Release
+ cmake --install build --prefix /some/path
```
Where `/some/path` is where the built library will be installed (default is `/usr/local`).
```
- CMake (Unix):
```sh
- mkdir build
- cd build
- cmake .. -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
- cmake --build . --config Release
+ cmake -B build -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
+ cmake --build build --config Release
```
- CMake (Windows):
```cmd
set CL_BLAST_CMAKE_PKG="C:/CLBlast/lib/cmake/CLBlast"
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
- mkdir build
- cd build
- cmake .. -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64
- cmake --build . --config Release
- cmake --install . --prefix C:/LlamaCPP
+ cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64
+ cmake --build build --config Release
+ cmake --install build --prefix C:/LlamaCPP
```
##### Running Llama with CLBlast
Then, build llama.cpp using the cmake command below:
```bash
- mkdir -p build
- cd build
- cmake .. -DLLAMA_VULKAN=1
- cmake --build . --config Release
+ cmake -B build -DLLAMA_VULKAN=1
+ cmake --build build --config Release
# Test the output binary (with "-ngl 33" to offload all layers to GPU)
./bin/main -m "PATH_TO_MODEL" -p "Hi you how are you" -n 50 -e -ngl 33 -t 4
```cmd
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
-mkdir build
-cd build
-cmake .. -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=C:/CLBlast/lib/cmake/CLBlast -G "Visual Studio 17 2022" -A x64
-cmake --build . --config Release
-cmake --install . --prefix C:/LlamaCPP
+cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=C:/CLBlast/lib/cmake/CLBlast -G "Visual Studio 17 2022" -A x64
+cmake --build build --config Release
+cmake --install build --prefix C:/LlamaCPP
```
### Build main-cmake-pkg
```cmd
cd ..\examples\main-cmake-pkg
-mkdir build
-cd build
-cmake .. -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="C:/CLBlast/lib/cmake/CLBlast;C:/LlamaCPP/lib/cmake/Llama" -G "Visual Studio 17 2022" -A x64
-cmake --build . --config Release
-cmake --install . --prefix C:/MyLlamaApp
+cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="C:/CLBlast/lib/cmake/CLBlast;C:/LlamaCPP/lib/cmake/Llama" -G "Visual Studio 17 2022" -A x64
+cmake --build build --config Release
+cmake --install build --prefix C:/MyLlamaApp
```
- Using `make`:
```bash
- make
+ make server
```
- Using `CMake`:
```bash
- cmake --build . --config Release
+ cmake -B build
+ cmake --build build --config Release -t server
```
+ Binary is at `./build/bin/server`
+
## Build with SSL
`server` can also be built with SSL support using OpenSSL 3
- Using `CMake`:
```bash
- mkdir build
- cd build
- cmake .. -DLLAMA_SERVER_SSL=ON
- make server
+ cmake -B build -DLLAMA_SERVER_SSL=ON
+ cmake --build build --config Release -t server
```
## Quick Start