SYCL: Update to oneAPI 2025.2 (#16371)

author Neo Zhang Jianyu <redacted>

Thu, 2 Oct 2025 07:16:25 +0000 (15:16 +0800)

committer GitHub <redacted>

Thu, 2 Oct 2025 07:16:25 +0000 (10:16 +0300)
author Neo Zhang Jianyu <redacted>
Thu, 2 Oct 2025 07:16:25 +0000 (15:16 +0800)
committer GitHub <redacted>
Thu, 2 Oct 2025 07:16:25 +0000 (10:16 +0300)
diff --git a/.devops/intel.Dockerfile b/.devops/intel.Dockerfile

index 9ce80a71eb9501094a753363a1a521b6082e4489..cd2f9aa79bd1ec394dfe2d32be1c2f6c92770622 100644 (file)
--- a/.devops/intel.Dockerfile
+++ b/.devops/intel.Dockerfile
@@ -1,8 +1,8 @@
-ARG ONEAPI_VERSION=2025.1.1-0-devel-ubuntu24.04
+ARG ONEAPI_VERSION=2025.2.2-0-devel-ubuntu24.04
  
  ## Build Image
  
-FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
+FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
  
  ARG GGML_SYCL_F16=OFF
  RUN apt-get update && \
@@ -31,7 +31,7 @@ RUN mkdir -p /app/full \
      && cp requirements.txt /app/full \
      && cp .devops/tools.sh /app/full/tools.sh
  
-FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
+FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base
  
  RUN apt-get update \
      && apt-get install -y libgomp1 curl\
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml

index ffd81ae5e07818c37113ed74b6658fd489109e56..224c6df2a10a79ab63e266a5b1d5fe8cf5527d62 100644 (file)
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1059,7 +1059,7 @@ jobs:
          shell: bash
  
      env:
-      WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
+      WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe
        WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
        ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
      steps:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml

index 91fd43a32eb16c9c7bfc2e885f6de0fba5a7e42d..e28958fcdb2efa34bcdb87661bd61f022c277f8f 100644 (file)
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -462,7 +462,7 @@ jobs:
          shell: bash
  
      env:
-      WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
+      WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe
        WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
        ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
  
@@ -505,6 +505,7 @@ jobs:
            cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
  
            cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
+          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero_v2.dll" ./build/bin
            cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
            cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
            cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
@@ -513,10 +514,15 @@ jobs:
            cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
            cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
            cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
+          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl-ls.exe" ./build/bin
  
            cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
            cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
  
+          cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/tcm.dll" ./build/bin
+          cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/libhwloc-15.dll" ./build/bin
+          cp "${{ env.ONEAPI_ROOT }}/umf/latest/bin/umf.dll" ./build/bin
+
            echo "cp oneAPI running time dll files to ./build/bin done"
            7z a llama-bin-win-sycl-x64.zip ./build/bin/*
  
diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md

index 6e9b88935da97a0a6f361359a6d24ecd5893fb7c..92ab27066b4a5ffe804113acc66d95a7f9ea9625 100644 (file)
--- a/docs/backend/SYCL.md
+++ b/docs/backend/SYCL.md
@@ -145,12 +145,13 @@ The docker build option is currently limited to *Intel GPU* targets.
  ```sh
  # Using FP16
  docker build -t llama-cpp-sycl --build-arg="GGML_SYCL_F16=ON" --target light -f .devops/intel.Dockerfile .
+
+# Using FP32
+docker build -t llama-cpp-sycl --build-arg="GGML_SYCL_F16=OFF" --target light -f .devops/intel.Dockerfile .
  ```
  
  *Notes*:
  
-To build in default FP32 *(Slower than FP16 alternative)*, set `--build-arg="GGML_SYCL_F16=OFF"` in the previous command.
-
  You can also use the `.devops/llama-server-intel.Dockerfile`, which builds the *"server"* alternative.
  Check the [documentation for Docker](../docker.md) to see the available images.
  
@@ -160,7 +161,7 @@ Check the [documentation for Docker](../docker.md) to see the available images.
  # First, find all the DRI cards
  ls -la /dev/dri
  # Then, pick the card that you want to use (here for e.g. /dev/dri/card1).
-docker run -it --rm -v "$(pwd):/app:Z" --device /dev/dri/renderD128:/dev/dri/renderD128 --device /dev/dri/card1:/dev/dri/card1 llama-cpp-sycl -m "/app/models/YOUR_MODEL_FILE" -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33
+docker run -it --rm -v "/path/to/models:/models" --device /dev/dri/renderD128:/dev/dri/renderD128 --device /dev/dri/card0:/dev/dri/card0 llama-cpp-sycl -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -c 4096 -s 0
  ```
  
  *Notes:*
@@ -215,9 +216,19 @@ To target AMD GPUs with SYCL, the ROCm stack must be installed first.
  
  2. **Install Intel® oneAPI Base toolkit**
  
+SYCL backend depends on:
+  - Intel® oneAPI DPC++/C++ compiler/running-time.
+  - Intel® oneAPI DPC++/C++ library (oneDPL).
+  - Intel® oneAPI Deep Neural Network Library (oneDNN).
+  - Intel® oneAPI Math Kernel Library (oneMKL).
+
  - **For Intel GPU**
  
-The base toolkit can be obtained from the official [Intel® oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) page.
+All above are included in both **Intel® oneAPI Base toolkit** and **Intel® Deep Learning Essentials** packages.
+
+It's recommended to install **Intel® Deep Learning Essentials** which only provides the necessary libraries with less size.
+
+The **Intel® oneAPI Base toolkit** and **Intel® Deep Learning Essentials** can be obtained from the official [Intel® oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) page.
  
  Please follow the instructions for downloading and installing the Toolkit for Linux, and preferably keep the default installation values unchanged, notably the installation path *(`/opt/intel/oneapi` by default)*.
  
@@ -225,6 +236,12 @@ Following guidelines/code snippets assume the default installation values. Other
  
  Upon a successful installation, SYCL is enabled for the available intel devices, along with relevant libraries such as oneAPI oneDNN for Intel GPUs.
  
+|Verified release|
+|-|
+|2025.2.1|
+|2025.1|
+|2024.1|
+
  - **Adding support to Nvidia GPUs**
  
  **oneAPI Plugin**: In order to enable SYCL support on Nvidia GPUs, please install the [Codeplay oneAPI Plugin for Nvidia GPUs](https://developer.codeplay.com/products/oneapi/nvidia/download). User should also make sure the plugin version matches the installed base toolkit one *(previous step)* for a seamless "oneAPI on Nvidia GPU" setup.
@@ -255,10 +272,11 @@ sycl-ls
  When targeting an intel GPU, the user should expect one or more devices among the available SYCL devices. Please make sure that at least one GPU is present via `sycl-ls`, for instance `[level_zero:gpu]` in the sample output below:
  
  ```
-[opencl:acc][opencl:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2  [2023.16.10.0.17_160000]
-[opencl:cpu][opencl:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
-[opencl:gpu][opencl:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO  [23.30.26918.50]
-[level_zero:gpu][level_zero:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
+[level_zero:gpu][level_zero:0] Intel(R) oneAPI Unified Runtime over Level-Zero, Intel(R) Arc(TM) A770 Graphics 12.55.8 [1.3.29735+27]
+[level_zero:gpu][level_zero:1] Intel(R) oneAPI Unified Runtime over Level-Zero, Intel(R) UHD Graphics 730 12.2.0 [1.3.29735+27]
+[opencl:cpu][opencl:0] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i5-13400 OpenCL 3.0 (Build 0) [2025.20.8.0.06_160000]
+[opencl:gpu][opencl:1] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO  [24.39.31294]
+[opencl:gpu][opencl:2] Intel(R) OpenCL Graphics, Intel(R) UHD Graphics 730 OpenCL 3.0 NEO  [24.39.31294]
  ```
  
  - **Nvidia GPU**
@@ -353,7 +371,7 @@ cmake --build build --config Release -j -v
  
  #### Retrieve and prepare model
  
-You can refer to the general [*Prepare and Quantize*](README.md#prepare-and-quantize) guide for model preparation, or download an already quantized model like [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf) or [Meta-Llama-3-8B-Instruct-Q4_0.gguf](https://huggingface.co/aptha/Meta-Llama-3-8B-Instruct-Q4_0-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_0.gguf).
+You can refer to the general [*Prepare and Quantize*](README.md#prepare-and-quantize) guide for model preparation, or download an already quantized model like [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf?download=true) or [Meta-Llama-3-8B-Instruct-Q4_0.gguf](https://huggingface.co/aptha/Meta-Llama-3-8B-Instruct-Q4_0-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_0.gguf).
  
  ##### Check device
  
@@ -466,7 +484,17 @@ If you already have a recent version of Microsoft Visual Studio, you can skip th
  
  3. Install Intel® oneAPI Base toolkit
  
-The base toolkit can be obtained from the official [Intel® oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) page.
+SYCL backend depends on:
+  - Intel® oneAPI DPC++/C++ compiler/running-time.
+  - Intel® oneAPI DPC++/C++ library (oneDPL).
+  - Intel® oneAPI Deep Neural Network Library (oneDNN).
+  - Intel® oneAPI Math Kernel Library (oneMKL).
+
+All above are included in both **Intel® oneAPI Base toolkit** and **Intel® Deep Learning Essentials** packages.
+
+It's recommended to install **Intel® Deep Learning Essentials** which only provides the necessary libraries with less size.
+
+The **Intel® oneAPI Base toolkit** and **Intel® Deep Learning Essentials** can be obtained from the official [Intel® oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) page.
  
  Please follow the instructions for downloading and installing the Toolkit for Windows, and preferably keep the default installation values unchanged, notably the installation path *(`C:\Program Files (x86)\Intel\oneAPI` by default)*.
author	Neo Zhang Jianyu <redacted>
	Thu, 2 Oct 2025 07:16:25 +0000 (15:16 +0800)
committer	GitHub <redacted>
	Thu, 2 Oct 2025 07:16:25 +0000 (10:16 +0300)
.devops/intel.Dockerfile		patch \| blob \| history
.github/workflows/build.yml		patch \| blob \| history
.github/workflows/release.yml		patch \| blob \| history
docs/backend/SYCL.md		patch \| blob \| history