From: Aaron Teo Date: Fri, 15 Aug 2025 13:11:22 +0000 (+0800) Subject: ggml: initial IBM zDNN backend (llama/14975) X-Git-Tag: upstream/0.0.2471~20 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=0eb6fed2155989f755b03a6fdfcf53c36f006f26;p=pkg%2Fggml%2Fsources%2Fggml ggml: initial IBM zDNN backend (llama/14975) * ggml-zdnn: inital backend impl Signed-off-by: Aaron Teo ggml-zdnn: temp change z17 to arch15 Signed-off-by: Aaron Teo ggml-zdnn: fix build bugs Signed-off-by: Aaron Teo * ggml-zdnn: tensor->extra logging check Signed-off-by: Aaron Teo ggml-zdnn: add layout name mapping, ztensor information Signed-off-by: Aaron Teo ggml-zdnn: separate logging into its own line Signed-off-by: Aaron Teo ggml-zdnn: add shape comparison Signed-off-by: Aaron Teo ggml-zdnn: add ggml_tensor shape log Signed-off-by: Aaron Teo ggml-zdnn: fix incorrect shape logging Signed-off-by: Aaron Teo * ggml-zdnn: add output buffer check Signed-off-by: Aaron Teo * ggml-zdnn: run compute and store into tensor->extra Signed-off-by: Aaron Teo * ggml-zdnn: add set_tensor Signed-off-by: Aaron Teo * ggml-zdnn: add more loggers Signed-off-by: Aaron Teo * ggml-zdnn: update set_tensor logging to check only for matmul Signed-off-by: Aaron Teo * ggml-zdnn: last working matmul version Signed-off-by: Aaron Teo * ggml-zdnn: add comments to prevent accidentally deleting lines Signed-off-by: Aaron Teo * ggml-zdnn: support op out_prod Signed-off-by: Aaron Teo * ggml-zdnn: update op out_prod to use tensor->extra Signed-off-by: Aaron Teo * ggml-zdnn: rewrite the backend implementation Signed-off-by: Aaron Teo * ggml-zdnn: bugfix new impl Signed-off-by: Aaron Teo * ggml-zdnn: fix compiler warnings and bugfixes Signed-off-by: Aaron Teo * ggml-zdnn: test ztensor finding in init_tensor Signed-off-by: Aaron Teo * ggml-zdnn: implement at least 1 op to test Signed-off-by: Aaron Teo * ggml-zdnn: assign tensor->extra to buffer Signed-off-by: Aaron Teo * ggml-zdnn: add check for view tensors to prevent init_tensor Signed-off-by: Aaron Teo * ggml-zdnn: rework init_tensor to create new buffers Signed-off-by: Aaron Teo * ggml-zdnn: switch to std vector instead of array Signed-off-by: Aaron Teo * ggml-zdnn: switch buffers back and set to arbitrary number Signed-off-by: Aaron Teo * ggml-zdnn: impl init_tensor Signed-off-by: Aaron Teo * ggml-zdnn: update supports_op matmul matrix Signed-off-by: Aaron Teo * ggml-zdnn: fix incorrect ztensor shape, reduce memory padding Signed-off-by: Aaron Teo * ggml-zdnn: code clean up Signed-off-by: Aaron Teo * ggml-zdnn: impl matmul Signed-off-by: Aaron Teo * ggml-zdnn: fix compiler error missing type Signed-off-by: Aaron Teo * ggml-zdnn: fix missing data transform call Signed-off-by: Aaron Teo * ggml-zdnn: add bias init_tensor Signed-off-by: Aaron Teo * ggml-zdnn: tighten memory usage, change string allocation Signed-off-by: Aaron Teo * ggml-zdnn: add bias ztensor and data free Signed-off-by: Aaron Teo * ggml-zdnn: add bias data transform Signed-off-by: Aaron Teo * ggml-zdnn: add more debug info for extra buffer transform Signed-off-by: Aaron Teo * ggml-zdnn: add logger to check if mat mul ops go through set_tensor Signed-off-by: Aaron Teo * ggml-zdnn: activate bias transform in matmul Signed-off-by: Aaron Teo * ggml-zdnn: move weights transform into mulmat Signed-off-by: Aaron Teo * ggml-zdnn: add more safeguards in matmul Signed-off-by: Aaron Teo * ggml-zdnn: fix sequencing of transforms Signed-off-by: Aaron Teo * ggml-zdnn: bugfix transform ztensor vs origtensor Signed-off-by: Aaron Teo * ggml-zdnn: figure out why sigtrap is happening Signed-off-by: Aaron Teo * ggml-zdnn: fix sigsegv Signed-off-by: Aaron Teo * ggml-zdnn: move everything back to local declaration Signed-off-by: Aaron Teo * ggml-zdnn: move bias data to local also Signed-off-by: Aaron Teo * ggml-zdnn: bring back working matmul Signed-off-by: Aaron Teo * ggml-zdnn: rewrite into mre Signed-off-by: Aaron Teo * ggml-zdnn: fix missing vector import Signed-off-by: Aaron Teo * ggml-zdnn: fix missing vector import in header Signed-off-by: Aaron Teo * ggml-zdnn: attempt to fix sigsegv Signed-off-by: Aaron Teo * ggml-zdnn: fix missing load tensor Signed-off-by: Aaron Teo * ggml-zdnn: fix invalid ztensor buffer release Signed-off-by: Aaron Teo * ggml-zdnn: add logging to debug free buffer Signed-off-by: Aaron Teo * ggml-zdnn: remove free_buffer debug info Signed-off-by: Aaron Teo * ggml-zdnn: add parmblkformat detections Signed-off-by: Aaron Teo * ggml-zdnn: add nnpa installed detection Signed-off-by: Aaron Teo * ggml-zdnn: add zdnn_init call for static libs Signed-off-by: Aaron Teo * ggml-zdnn: add init_tensor Signed-off-by: Aaron Teo * ggml-zdnn: attempt at fixing invalid buffer Signed-off-by: Aaron Teo * ggml-zdnn: switch to using deque to fix pointer deref problem Signed-off-by: Aaron Teo * ggml-zdnn: add weights logging to check Signed-off-by: Aaron Teo * ggml-zdnn: attempt to use unique ptr Signed-off-by: Aaron Teo * ggml-zdnn: add tensor to pre_tfm_desc logging Signed-off-by: Aaron Teo * ggml-zdnn: add inputs logging Signed-off-by: Aaron Teo * ggml-zdnn: disable op_none initialisation for testing Signed-off-by: Aaron Teo * ggml-zdnn: fix missing return from init_tensor Signed-off-by: Aaron Teo * ggml-zdnn: load ztensors in cgraph exec Signed-off-by: Aaron Teo * ggml-zdnn: work on moving output ztensor as well Signed-off-by: Aaron Teo * ggml-zdnn: disable logging and breakpoints for full test Signed-off-by: Aaron Teo * ggml-zdnn: attempt at manually changing the layout Signed-off-by: Aaron Teo * ggml-zdnn: attempt at using default nwhc format instead Signed-off-by: Aaron Teo * ggml-zdnn: disable global load ztensor for now Signed-off-by: Aaron Teo * ggml-zdnn: fix errorenous output load tensor Signed-off-by: Aaron Teo * ggml-zdnn: add guards to prevent loading ztensor if transformed Signed-off-by: Aaron Teo * ggml-zdnn: code cleanup Signed-off-by: Aaron Teo * ggml-zdnn: bring load ztensor back to init routine Signed-off-by: Aaron Teo * ggml-zdnn: code clean up Signed-off-by: Aaron Teo * ggml-zdnn: fix ztensor deallocation abort stabilise ggml <-> zdnn api Signed-off-by: Aaron Teo * ggml-zdnn: clean up matmul selection Signed-off-by: Aaron Teo * ggml-zdnn: clean up project structure Signed-off-by: Aaron Teo * ggml-zdnn: update documentation, prepare for upstream Signed-off-by: Aaron Teo * chore: add codeowners Signed-off-by: Aaron Teo * ggml-zdnn: disable batched matmul Signed-off-by: Aaron Teo * ggml-zdnn: attempt at fixing tensor views during matmul Signed-off-by: Aaron Teo * ggml-zdnn: deny all view tensors directly Signed-off-by: Aaron Teo * ggml-zdnn: fix pr comments Signed-off-by: Aaron Teo * docs: update ops docs for zdnn Signed-off-by: Aaron Teo * ggml-zdnn: redo test-backend-ops for ops.md Signed-off-by: Aaron Teo * ggml-zdnn: fix typo in build-s390x.md Signed-off-by: Aaron Teo * codeowners: remove taronaeo for now Signed-off-by: Aaron Teo * Revert "codeowners: remove taronaeo for now" This reverts commit 411ea4ed78d08778967bd0bd33a6538cfcbe082f. * ggml-zdnn: remove unused ggml_zdnn macro Signed-off-by: Aaron Teo --------- Signed-off-by: Aaron Teo --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 1fb7abea..90e274cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -188,6 +188,7 @@ option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF) option(GGML_WEBGPU "ggml: use WebGPU" OFF) option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF) +option(GGML_ZDNN "ggml: use zDNN" OFF) option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF) option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF) diff --git a/include/ggml-zdnn.h b/include/ggml-zdnn.h new file mode 100644 index 00000000..c2c30c97 --- /dev/null +++ b/include/ggml-zdnn.h @@ -0,0 +1,16 @@ +#pragma once + +#include "ggml.h" +#include "ggml-backend.h" + +#ifdef __cplusplus +extern "C" { +#endif + +GGML_BACKEND_API ggml_backend_t ggml_backend_zdnn_init(void); + +GGML_BACKEND_API ggml_backend_reg_t ggml_backend_zdnn_reg(void); + +#ifdef __cplusplus +} +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 177fb282..2b5b8169 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -382,6 +382,7 @@ ggml_add_backend(RPC) ggml_add_backend(SYCL) ggml_add_backend(Vulkan) ggml_add_backend(WebGPU) +ggml_add_backend(zDNN) ggml_add_backend(OpenCL) foreach (target ggml-base ggml) diff --git a/src/ggml-backend-reg.cpp b/src/ggml-backend-reg.cpp index 6c315137..5f02a710 100644 --- a/src/ggml-backend-reg.cpp +++ b/src/ggml-backend-reg.cpp @@ -49,6 +49,10 @@ #include "ggml-webgpu.h" #endif +#ifdef GGML_USE_ZDNN +#include "ggml-zdnn.h" +#endif + #ifdef GGML_USE_OPENCL #include "ggml-opencl.h" #endif @@ -180,6 +184,9 @@ struct ggml_backend_registry { #ifdef GGML_USE_WEBGPU register_backend(ggml_backend_webgpu_reg()); #endif +#ifdef GGML_USE_ZDNN + register_backend(ggml_backend_zdnn_reg()); +#endif #ifdef GGML_USE_OPENCL register_backend(ggml_backend_opencl_reg()); #endif diff --git a/src/ggml-cpu/CMakeLists.txt b/src/ggml-cpu/CMakeLists.txt index f188d163..ce0a3e12 100644 --- a/src/ggml-cpu/CMakeLists.txt +++ b/src/ggml-cpu/CMakeLists.txt @@ -460,7 +460,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name) # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version. # binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15. message(STATUS "z17 target") - list(APPEND ARCH_FLAGS -march=z17) + list(APPEND ARCH_FLAGS -march=arch15) else() message(STATUS "Unknown target") message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")