From: Georgi Gerganov Date: Thu, 3 Jul 2025 07:53:35 +0000 (+0300) Subject: kv-cache : use ggml_set_rows (llama/14285) X-Git-Tag: upstream/0.0.2309~32 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=0171749adbc0568456986f77930241e47ddd7598;p=pkg%2Fggml%2Fsources%2Fggml kv-cache : use ggml_set_rows (llama/14285) * kv-cache : use ggml_set_rows ggml-ci * graph : separate k and v indices ggml-ci * cont : remove redundant ifs ggml-ci * kv-cache : improve find_slot impl * kv-cache : bounds-check when accessing slot_info indices * kv-cache : add comments ggml-ci * ggml : add TODOs for adding GGML_OP_SET_ROWS support in the backends ggml-ci --- diff --git a/src/ggml-cann/ggml-cann.cpp b/src/ggml-cann/ggml-cann.cpp index 8a3d2026..eae575cc 100755 --- a/src/ggml-cann/ggml-cann.cpp +++ b/src/ggml-cann/ggml-cann.cpp @@ -2086,6 +2086,12 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, return false; } } break; + case GGML_OP_SET_ROWS: + { + // TODO: add support + // ref: https://github.com/ggml-org/llama.cpp/pull/14274 + return false; + } break; case GGML_OP_CPY: { ggml_tensor *src = op->src[0]; if ((op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16) || diff --git a/src/ggml-opencl/ggml-opencl.cpp b/src/ggml-opencl/ggml-opencl.cpp index 71829c05..9436e6ea 100644 --- a/src/ggml-opencl/ggml-opencl.cpp +++ b/src/ggml-opencl/ggml-opencl.cpp @@ -2222,6 +2222,12 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te default: return false; } + case GGML_OP_SET_ROWS: + { + // TODO: add support + // ref: https://github.com/ggml-org/llama.cpp/pull/14274 + return false; + } break; case GGML_OP_CPY: case GGML_OP_DUP: case GGML_OP_CONT: diff --git a/src/ggml-sycl/ggml-sycl.cpp b/src/ggml-sycl/ggml-sycl.cpp index 1d41d7a4..ee4c8f7b 100644 --- a/src/ggml-sycl/ggml-sycl.cpp +++ b/src/ggml-sycl/ggml-sycl.cpp @@ -4285,6 +4285,12 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g return false; } } + case GGML_OP_SET_ROWS: + { + // TODO: add support + // ref: https://github.com/ggml-org/llama.cpp/pull/14274 + return false; + } break; case GGML_OP_CPY: { ggml_type src0_type = op->src[0]->type; diff --git a/src/ggml-vulkan/ggml-vulkan.cpp b/src/ggml-vulkan/ggml-vulkan.cpp index ccb0d47b..8114cb42 100644 --- a/src/ggml-vulkan/ggml-vulkan.cpp +++ b/src/ggml-vulkan/ggml-vulkan.cpp @@ -10339,6 +10339,12 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm return false; } } break; + case GGML_OP_SET_ROWS: + { + // TODO: add support + // ref: https://github.com/ggml-org/llama.cpp/pull/14274 + return false; + } break; case GGML_OP_CONT: case GGML_OP_CPY: case GGML_OP_DUP: