sycl : Add option to set the SYCL architecture for all targets (#10266)

author Romain Biessy <redacted>

Tue, 19 Nov 2024 08:02:23 +0000 (09:02 +0100)

committer GitHub <redacted>

Tue, 19 Nov 2024 08:02:23 +0000 (08:02 +0000)
author Romain Biessy <redacted>
Tue, 19 Nov 2024 08:02:23 +0000 (09:02 +0100)
committer GitHub <redacted>
Tue, 19 Nov 2024 08:02:23 +0000 (08:02 +0000)
diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md

index 38185f73897ee9e435c23db2b6af0bc897f5baf3..e431f51f184ccd075abaa34d089206475917fa81 100644 (file)
--- a/docs/backend/SYCL.md
+++ b/docs/backend/SYCL.md
@@ -312,12 +312,14 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_
  export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
  
  # Build LLAMA with Nvidia BLAS acceleration through SYCL
+# Setting GGML_SYCL_DEVICE_ARCH is optional but can improve performance
+GGML_SYCL_DEVICE_ARCH=sm_80 # Example architecture
  
  # Option 1: Use FP32 (recommended for better performance in most cases)
-cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
  
  # Option 2: Use FP16
-cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
+cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
  
  # build all binary
  cmake --build build --config Release -j -v
@@ -335,8 +337,9 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithrocBLAS/include:$CPLUS_INCLUDE
  
  ## AMD
  # Use FP32, FP16 is not supported
-# Find your GGML_SYCL_HIP_TARGET with rocminfo, under the key 'Name:'
-cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_HIP_TARGET=${GGML_SYCL_HIP_TARGET} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+# Find your GGML_SYCL_DEVICE_ARCH with rocminfo, under the key 'Name:'
+GGML_SYCL_DEVICE_ARCH=gfx90a # Example architecture
+cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
  
  # build all binary
  cmake --build build --config Release -j -v
@@ -646,6 +649,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
  |--------------------|---------------------------------------|---------------------------------------------|
  | GGML_SYCL          | ON (mandatory)                        | Enable build with SYCL code path.<br>FP32 path - recommended for better perforemance than FP16 on quantized model|
  | GGML_SYCL_TARGET   | INTEL *(default)* \| NVIDIA \| AMD    | Set the SYCL target device type.            |
+| GGML_SYCL_DEVICE_ARCH | Optional (except for AMD)          | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. |
  | GGML_SYCL_F16      | OFF *(default)* \|ON *(optional)*     | Enable FP16 build with SYCL code path.      |
  | CMAKE_C_COMPILER   | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path.      |
  | CMAKE_CXX_COMPILER | `icpx` *(Linux)*, `icx` *(Windows)*   | Set `icpx/icx` compiler for SYCL code path. |
diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt

index a82818d607f5e816a585c2da87ff070ba81dc316..b16a0e9ada7ed1c10a43ae8c3f0f2194d0fad31d 100644 (file)
--- a/ggml/CMakeLists.txt
+++ b/ggml/CMakeLists.txt
@@ -164,6 +164,8 @@ option(GGML_SYCL                            "ggml: use SYCL"
  option(GGML_SYCL_F16                        "ggml: use 16 bit floats for sycl calculations"   OFF)
  set   (GGML_SYCL_TARGET "INTEL" CACHE STRING
                                              "ggml: sycl target device")
+set   (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
+                                            "ggml: sycl device architecture")
  
  # extra artifacts
  option(GGML_BUILD_TESTS    "ggml: build tests"    ${GGML_STANDALONE})
diff --git a/ggml/src/ggml-sycl/CMakeLists.txt b/ggml/src/ggml-sycl/CMakeLists.txt

index 03bf3cb929b324deec82f3aeed3ffbb3e052e2fa..d1d0ff83d636c5636f71ebaca1a92ea09349bb30 100644 (file)
--- a/ggml/src/ggml-sycl/CMakeLists.txt
+++ b/ggml/src/ggml-sycl/CMakeLists.txt
@@ -72,10 +72,14 @@ else()
          set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
          target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl)
      elseif (GGML_SYCL_TARGET STREQUAL "AMD")
-        if (GGML_SYCL_HIP_TARGET STREQUAL "")
-            message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_HIP_TARGET has not been set.")
+        if (NOT GGML_SYCL_DEVICE_ARCH)
+            message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
          endif()
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${GGML_SYCL_HIP_TARGET}")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa")
          target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl)
      endif()
+
+    if (GGML_SYCL_DEVICE_ARCH)
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}")
+  endif()
  endif()
author	Romain Biessy <redacted>
	Tue, 19 Nov 2024 08:02:23 +0000 (09:02 +0100)
committer	GitHub <redacted>
	Tue, 19 Nov 2024 08:02:23 +0000 (08:02 +0000)
docs/backend/SYCL.md		patch \| blob \| history
ggml/CMakeLists.txt		patch \| blob \| history
ggml/src/ggml-sycl/CMakeLists.txt		patch \| blob \| history