make
ctest -L gh --output-on-failure'
+ ubuntu-22-cmake-sycl:
+ runs-on: ubuntu-22.04
+
+ strategy:
+ fail-fast: false
+ matrix:
+ dwhisper_sycl: [ON]
+ dcmake_c_compiler: [icx]
+ dcmake_cxx_compiler: [icpx]
+ arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
+
+ continue-on-error: true
+
+ steps:
+ - name: Clone
+ uses: actions/checkout@v3
+
+ - name: add oneAPI to apt
+ shell: bash
+ run: |
+ cd /tmp
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+
+ - name: install oneAPI dpcpp compiler
+ shell: bash
+ run: |
+ sudo apt update
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
+
+ - name: install oneAPI MKL library
+ shell: bash
+ run: |
+ sudo apt install intel-oneapi-mkl-devel
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v3
+
+ - name: Build
+ id: cmake_build
+ run: |
+ source /opt/intel/oneapi/setvars.sh
+ mkdir build
+ cd build
+ cmake -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
+ cmake --build . --config Release -j $(nproc)
+
+ ubuntu-22-cmake-sycl-fp16:
+ runs-on: ubuntu-22.04
+
+ strategy:
+ fail-fast: false
+ matrix:
+ dwhisper_sycl: [ON]
+ dcmake_c_compiler: [icx]
+ dcmake_cxx_compiler: [icpx]
+ arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
+
+ continue-on-error: true
+
+ steps:
+ - name: Clone
+ uses: actions/checkout@v3
+
+ - name: add oneAPI to apt
+ shell: bash
+ run: |
+ cd /tmp
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+
+ - name: install oneAPI dpcpp compiler
+ shell: bash
+ run: |
+ sudo apt update
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
+
+ - name: install oneAPI MKL library
+ shell: bash
+ run: |
+ sudo apt install intel-oneapi-mkl-devel
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v3
+
+ - name: Build
+ id: cmake_build
+ run: |
+ source /opt/intel/oneapi/setvars.sh
+ mkdir build
+ cd build
+ cmake -DWHISPER_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
+ cmake --build . --config Release -j $(nproc)
+
windows:
runs-on: windows-latest
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
option(WHISPER_METAL_EMBED_LIBRARY "whisper: embed Metal library" OFF)
else()
- option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
- option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
- option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
- option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF)
- option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
- option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
+ option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
+ option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
+ option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
+ option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF)
+ option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
+ option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
+ option(WHISPER_SYCL "whisper: use SYCL" OFF)
+ option(WHISPER_SYCL_F16 "whisper: use 16 bit floats for sycl calculations" OFF)
endif()
option(WHISPER_PERF "whisper: enable perf timings" OFF)
find_package(Threads REQUIRED)
+#compile flag sycl
+if (WHISPER_SYCL)
+ set(CMAKE_CXX_STANDARD 17)
+else()
+ set(CMAKE_CXX_STANDARD 11)
+endif()
+
# on APPLE
if (APPLE)
# include Accelerate framework
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
endif()
+if (WHISPER_SYCL)
+ if ( NOT DEFINED ENV{ONEAPI_ROOT})
+ message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
+ endif()
+ #todo: AOT
+
+ find_package(IntelSYCL REQUIRED)
+ if (WHISPER_SYCL_F16)
+ add_compile_definitions(GGML_SYCL_F16)
+ endif()
+ add_compile_definitions(GGML_USE_SYCL)
+
+ add_compile_options(-I./) #include DPCT
+ add_compile_options(-I/${SYCL_INCLUDE_DIR})
+
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
+
+ set(GGML_HEADERS_SYCL ggml-sycl.h)
+ set(GGML_SOURCES_SYCL ggml-sycl.cpp)
+
+ set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
+endif()
# compiler flags
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
${GGML_SOURCES_METAL}
${GGML_SOURCES_CUDA}
${GGML_SOURCES_OPENCL}
+ ${GGML_SOURCES_SYCL}
+ ${GGML_HEADERS_SYCL}
whisper.h
whisper.cpp
)
--- /dev/null
+# whisper.cpp for SYCL\r
+\r
+[Background](#background)\r
+\r
+[OS](#os)\r
+\r
+[Intel GPU](#intel-gpu)\r
+\r
+[Linux](#linux)\r
+\r
+[Environment Variable](#environment-variable)\r
+\r
+[Known Issue](#known-issue)\r
+\r
+[Todo](#todo)\r
+\r
+## Background\r
+\r
+SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators\97such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.\r
+\r
+oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.\r
+\r
+Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.\r
+\r
+To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.\r
+\r
+The whisper.cpp for SYCL is used to support Intel GPUs.\r
+\r
+For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).\r
+\r
+## OS\r
+\r
+|OS|Status|Verified|\r
+|-|-|-|\r
+|Linux|Support|Ubuntu 22.04|\r
+|Windows|Ongoing| |\r
+\r
+\r
+## Intel GPU\r
+\r
+|Intel GPU| Status | Verified Model|\r
+|-|-|-|\r
+|Intel Data Center Max Series| Support| Max 1550|\r
+|Intel Data Center Flex Series| Support| Flex 170|\r
+|Intel Arc Series| Support| Arc 770|\r
+|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|\r
+|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|\r
+\r
+\r
+## Linux\r
+\r
+### Setup Environment\r
+\r
+1. Install Intel GPU driver.\r
+\r
+a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).\r
+\r
+Note: for iGPU, please install the client GPU driver.\r
+\r
+b. Add user to group: video, render.\r
+\r
+```\r
+sudo usermod -aG render username\r
+sudo usermod -aG video username\r
+```\r
+\r
+Note: re-login to enable it.\r
+\r
+c. Check\r
+\r
+```\r
+sudo apt install clinfo\r
+sudo clinfo -l\r
+```\r
+\r
+Output (example):\r
+\r
+```\r
+Platform #0: Intel(R) OpenCL Graphics\r
+ `-- Device #0: Intel(R) Arc(TM) A770 Graphics\r
+\r
+\r
+Platform #0: Intel(R) OpenCL HD Graphics\r
+ `-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]\r
+```\r
+\r
+2. Install Intel® oneAPI Base toolkit.\r
+\r
+\r
+a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).\r
+\r
+Recommend to install to default folder: **/opt/intel/oneapi**.\r
+\r
+Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.\r
+\r
+b. Check\r
+\r
+```\r
+source /opt/intel/oneapi/setvars.sh\r
+\r
+sycl-ls\r
+```\r
+\r
+There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.\r
+\r
+Output (example):\r
+```\r
+[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]\r
+[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]\r
+[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]\r
+[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]\r
+\r
+```\r
+\r
+2. Build locally:\r
+\r
+```\r
+mkdir -p build\r
+cd build\r
+source /opt/intel/oneapi/setvars.sh\r
+\r
+#for FP16\r
+#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON \r
+\r
+#for FP32\r
+cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx\r
+\r
+#build example/main only\r
+#cmake --build . --config Release --target main\r
+\r
+#build all binary\r
+cmake --build . --config Release -v\r
+\r
+```\r
+\r
+or\r
+\r
+```\r
+./examples/sycl/build.sh\r
+```\r
+\r
+Note:\r
+\r
+- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.\r
+\r
+### Run\r
+\r
+1. Put model file to folder **models**\r
+\r
+2. Enable oneAPI running environment\r
+\r
+```\r
+source /opt/intel/oneapi/setvars.sh\r
+```\r
+\r
+3. List device ID\r
+\r
+Run without parameter:\r
+\r
+```\r
+./build/bin/ls-sycl-device\r
+\r
+or\r
+\r
+./build/bin/main\r
+```\r
+\r
+Check the ID in startup log, like:\r
+\r
+```\r
+found 4 SYCL devices:\r
+ Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,\r
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136\r
+ Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,\r
+ max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280\r
+ Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,\r
+ max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280\r
+ Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,\r
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136\r
+\r
+```\r
+\r
+|Attribute|Note|\r
+|-|-|\r
+|compute capability 1.3|Level-zero running time, recommended |\r
+|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|\r
+\r
+4. Set device ID and execute whisper.cpp\r
+\r
+Set device ID = 0 by **GGML_SYCL_DEVICE=0**\r
+\r
+```\r
+GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav\r
+```\r
+or run by script:\r
+\r
+```\r
+./examples/sycl/run_whisper.sh\r
+```\r
+\r
+\r
+\r
+5. Check the device ID in output\r
+\r
+Like:\r
+```\r
+Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device\r
+```\r
+\r
+\r
+## Environment Variable\r
+\r
+#### Build\r
+\r
+|Name|Value|Function|\r
+|-|-|-|\r
+|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|\r
+|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|\r
+|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|\r
+|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|\r
+\r
+#### Running\r
+\r
+\r
+|Name|Value|Function|\r
+|-|-|-|\r
+|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|\r
+|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|\r
+\r
+## Known Issue\r
+\r
+- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.\r
+\r
+ Miss to enable oneAPI running environment.\r
+\r
+ Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.\r
+\r
+\r
+- Hang during startup\r
+\r
+ llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.\r
+\r
+ Solution: add **--no-mmap**.\r
+\r
+## Todo\r
+\r
+- Support to build in Windows.\r
+\r
+- Support multiple cards.
\ No newline at end of file
add_subdirectory(talk)
add_subdirectory(talk-llama)
add_subdirectory(lsp)
+ if (LLAMA_SYCL)
+ add_subdirectory(sycl)
+ endif()
endif()
add_subdirectory(wchess)
--- /dev/null
+# MIT license\r
+# Copyright (C) 2024 Intel Corporation\r
+# SPDX-License-Identifier: MIT\r
+\r
+set(TARGET ls-sycl-device)\r
+add_executable(${TARGET} ls-sycl-device.cpp)\r
+install(TARGETS ${TARGET} RUNTIME)\r
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})\r
+target_compile_features(${TARGET} PRIVATE cxx_std_17)
\ No newline at end of file
--- /dev/null
+# llama.cpp/example/sycl\r
+\r
+This example program provide the tools for llama.cpp for SYCL on Intel GPU.\r
+\r
+## Tool\r
+\r
+|Tool Name| Function|Status|\r
+|-|-|-|\r
+|ls-sycl-device| List all SYCL devices with ID, compute capability, max work group size, ect.|Support|\r
+\r
+### ls-sycl-device\r
+\r
+List all SYCL devices with ID, compute capability, max work group size, ect.\r
+\r
+1. Build the llama.cpp for SYCL for all targets.\r
+\r
+2. Enable oneAPI running environment\r
+\r
+```\r
+source /opt/intel/oneapi/setvars.sh\r
+```\r
+\r
+3. Execute\r
+\r
+```\r
+./build/bin/ls-sycl-device\r
+```\r
+\r
+Check the ID in startup log, like:\r
+\r
+```\r
+found 4 SYCL devices:\r
+ Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,\r
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136\r
+ Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,\r
+ max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280\r
+ Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,\r
+ max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280\r
+ Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,\r
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136\r
+\r
+```\r
+\r
+|Attribute|Note|\r
+|-|-|\r
+|compute capability 1.3|Level-zero running time, recommended |\r
+|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
\ No newline at end of file
--- /dev/null
+# MIT license\r
+# Copyright (C) 2024 Intel Corporation\r
+# SPDX-License-Identifier: MIT\r
+\r
+mkdir -p build\r
+cd build\r
+source /opt/intel/oneapi/setvars.sh\r
+\r
+#for FP16\r
+#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON # faster for long-prompt inference\r
+\r
+#for FP32\r
+cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx\r
+\r
+#build example/main only\r
+#cmake --build . --config Release --target main\r
+\r
+#build all binary\r
+cmake --build . --config Release -v
\ No newline at end of file
--- /dev/null
+/*MIT license\r
+ Copyright (C) 2024 Intel Corporation\r
+ SPDX-License-Identifier: MIT\r
+*/\r
+\r
+#include "ggml-sycl.h"\r
+\r
+int main(int argc, char ** argv) {\r
+ ggml_backend_sycl_print_sycl_devices();\r
+ return 0;\r
+}
\ No newline at end of file
--- /dev/null
+#!/bin/bash\r
+\r
+# MIT license\r
+# Copyright (C) 2024 Intel Corporation\r
+# SPDX-License-Identifier: MIT\r
+\r
+INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"\r
+source /opt/intel/oneapi/setvars.sh\r
+\r
+if [ $# -gt 0 ]; then\r
+ export GGML_SYCL_DEVICE=$1\r
+else\r
+ export GGML_SYCL_DEVICE=0\r
+fi\r
+echo GGML_SYCL_DEVICE=$GGML_SYCL_DEVICE\r
+#export GGML_SYCL_DEBUG=1\r
+./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
\ No newline at end of file
#include "ggml-cuda.h"
#endif
+#ifdef GGML_USE_SYCL
+#include "ggml-sycl.h"
+#endif
+
#ifdef WHISPER_USE_OPENVINO
#include "openvino/whisper-openvino-encoder.h"
#endif
}
#endif
+#ifdef GGML_USE_SYCL
+ if (params.use_gpu) {
+ WHISPER_LOG_INFO("%s: using SYCL backend\n", __func__);
+ backend_gpu = ggml_backend_sycl_init(params.gpu_device);
+ if (!backend_gpu) {
+ WHISPER_LOG_ERROR("%s: ggml_backend_sycl_init() failed\n", __func__);
+ }
+ }
+#endif
+
if (backend_gpu) {
return backend_gpu;
}