fix vulkan ggml_acc only works in 3d but not 4d (llama/19426)

author ymcki <redacted>

Fri, 13 Feb 2026 12:31:37 +0000 (20:31 +0800)

committer Georgi Gerganov <redacted>

Sat, 14 Feb 2026 22:20:18 +0000 (00:20 +0200)
author ymcki <redacted>
Fri, 13 Feb 2026 12:31:37 +0000 (20:31 +0800)
committer Georgi Gerganov <redacted>
Sat, 14 Feb 2026 22:20:18 +0000 (00:20 +0200)
diff --git a/src/ggml-cuda/ggml-cuda.cu b/src/ggml-cuda/ggml-cuda.cu

index 7dc688483ad19fd941f66ecbd48e01a82237e63a..85ce96958fa0c050e4a5349d7366bbf7bbf250f4 100644 (file)
--- a/src/ggml-cuda/ggml-cuda.cu
+++ b/src/ggml-cuda/ggml-cuda.cu
@@ -4822,8 +4822,11 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
          case GGML_OP_CONV_2D_DW:
          case GGML_OP_CONV_TRANSPOSE_2D:
          case GGML_OP_POOL_2D:
-        case GGML_OP_ACC:
              return true;
+        case GGML_OP_ACC:
+            // TODO: extend support like so:
+            //return ggml_is_contiguous_rows(op->src[0]) && ggml_is_contiguous_rows(op->src[1]);
+            return ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1]);
          case GGML_OP_SUM:
              return ggml_is_contiguous_rows(op->src[0]);
          case GGML_OP_TOP_K:
diff --git a/src/ggml-vulkan/ggml-vulkan.cpp b/src/ggml-vulkan/ggml-vulkan.cpp

index 72097ffd0ffc73ae6e09ff7783ed20904827e0ae..e5dcd3cbda276cb02bc729f524fb04c5642da59d 100644 (file)
--- a/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/src/ggml-vulkan/ggml-vulkan.cpp
@@ -9801,16 +9801,16 @@ static void ggml_vk_acc(ggml_backend_vk_context * ctx, vk_context& subctx, const
      const uint32_t src1_type_size = ggml_type_size(src1->type);
      const uint32_t dst_type_size = ggml_type_size(dst->type);
  
-    int nb1 = dst->op_params[0] / 4; // 4 bytes of float32
-    int nb2 = dst->op_params[1] / 4; // 4 bytes of float32
-    // int nb3 = dst->op_params[2] / 4; // 4 bytes of float32 - unused
-    int offset = dst->op_params[3] / 4; // offset in bytes
+    int nb1 = dst->op_params[0] / src0_type_size; // 4 bytes of float32
+    int nb2 = dst->op_params[1] / src0_type_size; // 4 bytes of float32
+    int nb3 = dst->op_params[2] / src0_type_size; // 4 bytes of float32
+    int offset = dst->op_params[3] / src0_type_size; // offset in bytes
  
      ggml_vk_op_f32<vk_op_binary_push_constants>(ctx, subctx, src0, src1, nullptr, nullptr, dst, GGML_OP_ACC, {
          (uint32_t)ggml_nelements(src0),
-        (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2],(uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)nb1, (uint32_t)nb2, (uint32_t)src0->nb[3] / src0_type_size,
+        (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2],(uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)nb1, (uint32_t)nb2, (uint32_t)nb3,
          (uint32_t)src1->ne[0], (uint32_t)src1->ne[1], (uint32_t)src1->ne[2],(uint32_t)src1->ne[3], (uint32_t)src1->nb[0] / src1_type_size, (uint32_t)src1->nb[1] / src1_type_size, (uint32_t)src1->nb[2] / src1_type_size, (uint32_t)src1->nb[3] / src1_type_size,
-        (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2],(uint32_t) dst->ne[3], (uint32_t) dst->nb[0] /  dst_type_size, (uint32_t)nb1, (uint32_t)nb2, (uint32_t) dst->nb[3] /  dst_type_size,
+        (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2],(uint32_t) dst->ne[3], (uint32_t) dst->nb[0] /  dst_type_size, (uint32_t)nb1, (uint32_t)nb2, (uint32_t)nb3,
          0,
          0.0f, 0.0f, offset,
      });
diff --git a/src/ggml-vulkan/vulkan-shaders/acc.comp b/src/ggml-vulkan/vulkan-shaders/acc.comp

index 5084a70ed49f76ca8137d6c0fd8b068ae5f8081a..3d61168b56f09f97a76042f62370279dfe73a204 100644 (file)
--- a/src/ggml-vulkan/vulkan-shaders/acc.comp
+++ b/src/ggml-vulkan/vulkan-shaders/acc.comp
@@ -13,17 +13,18 @@ void main() {
  
      const uint offset = p.param3;
      const uint src1_i = idx - offset;
-    const uint oz = src1_i / p.nb02;
-    const uint oy = (src1_i - (oz * p.nb02)) / p.nb01;
-    const uint ox = src1_i % p.nb01;
+    const uint i3 = src1_i / p.nb03;
+    const uint rem2 = src1_i - i3 * p.nb03;
+    const uint i2 = rem2 / p.nb02;
+    const uint rem1 = rem2 - i2 * p.nb02;
+    const uint i1 = rem1 / p.nb01;
+    const uint i0 = rem1 % p.nb01;
  
      uint i00, i01, i02, i03;
-    get_indices(idx, i00, i01, i02, i03);
  
-    if (ox < p.ne10 && oy < p.ne11 && oz < p.ne12) {
-        data_d[get_doffset() + dst_idx(i00, i01, i02, i03)] = D_TYPE(FLOAT_TYPE(data_a[get_aoffset() + src0_idx(i00, i01, i02, i03)]) + FLOAT_TYPE(data_b[get_boffset() + ox + oy * p.ne10 + oz * p.ne10 * p.ne11]));
+    if (i0 < p.ne10 && i1 < p.ne11 && i2 < p.ne12 && i3 < p.ne13) {
+        data_d[get_doffset() + idx] = D_TYPE(FLOAT_TYPE(data_a[get_aoffset() + idx]) + FLOAT_TYPE(data_b[get_boffset() + src1_idx(i0, i1, i2, i3)]));
      } else {
-        data_d[get_doffset() + dst_idx(i00, i01, i02, i03)] = D_TYPE(FLOAT_TYPE(data_a[get_aoffset() + src0_idx(i00, i01, i02, i03)]));
+        data_d[get_doffset() + idx] = D_TYPE(FLOAT_TYPE(data_a[get_aoffset() + idx]));
      }
  }
-
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp

index 8816f6963fd4fcad0f720aca01f9cef17284a8b9..a50c569b820ffef84a6a1e34796d8eb41c09d922 100644 (file)
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -5847,26 +5847,46 @@ struct test_acc : public test_case {
      const ggml_type type;
      const std::array<int64_t, 4> ne_a;
      const std::array<int64_t, 4> ne_b;
+    const int64_t stride_dim;
  
      std::string vars() override {
-        return VARS_TO_STR3(type, ne_a, ne_b);
+        return VARS_TO_STR4(type, ne_a, ne_b, stride_dim);
      }
  
      test_acc(ggml_type type = GGML_TYPE_F32,
-            std::array<int64_t, 4> ne_a = {256, 17, 1, 1},
-            std::array<int64_t, 4> ne_b = {256, 16, 1, 1})
-        : type(type), ne_a(ne_a), ne_b(ne_b) {}
+            std::array<int64_t, 4> ne_a = {256, 17, 2, 3},
+            std::array<int64_t, 4> ne_b = {256, 16, 2, 3},
+            uint64_t stride_dim = -1)
+        : type(type), ne_a(ne_a), ne_b(ne_b), stride_dim(stride_dim) {}
  
      ggml_tensor * build_graph(ggml_context * ctx) override {
          ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
          ggml_set_param(a);
          ggml_set_name(a, "a");
  
-        ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
-        ggml_set_param(b);
+        ggml_tensor * b;
+        if (stride_dim == 1 || stride_dim == 2 || stride_dim == 3) {
+            // Create a larger tensor and take a view at a non-zero offset.
+            // This tests that the backend correctly handles b's data offset
+            std::array<int64_t, 4> ne_b_pad = {ne_b[0], ne_b[1], ne_b[2], ne_b[3]};
+            ne_b_pad[stride_dim] += 1;
+            ggml_tensor * b_pad = ggml_new_tensor(ctx, type, 4, ne_b_pad.data());
+            ggml_set_param(b_pad);
+            ggml_set_name(b_pad, "b_pad");
+            // View that skips the first row, so b has a non-zero byte offset
+            b = ggml_view_4d(ctx, b_pad,
+                ne_b[0], ne_b[1], ne_b[2], ne_b[3],
+                b_pad->nb[1], b_pad->nb[2], b_pad->nb[3],
+                b_pad->nb[1]);
+        } else {
+            b = ggml_new_tensor(ctx, type, 4, ne_b.data());
+            ggml_set_param(b);
+        }
          ggml_set_name(b, "b");
  
-        ggml_tensor * out = ggml_acc(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], b->nb[1]);
+        // When ne_b[0] < ne_a[0], a->nb[1] != b->nb[1], so the stride
+        // parameters to ggml_acc don't match b's natural stride.
+        ggml_tensor * out = ggml_acc(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], 0);
          ggml_set_name(out, "out");
  
          return out;
@@ -8170,7 +8190,12 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
      test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {9, 9, 1280, 1}));
      test_cases.emplace_back(new test_group_norm_mul_add(GGML_TYPE_F32, {64, 64, 320, 1}));
      test_cases.emplace_back(new test_group_norm_mul_add(GGML_TYPE_F32, {9, 9, 1280, 1}));
-    test_cases.emplace_back(new test_acc());
+    test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 1, 1}, {256, 16, 1, 1}, -1));
+    test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {256, 16, 2, 3}, -1));
+    test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {128, 16, 2, 3}, -1));
+    test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {256, 16, 2, 3}, 1));
+    test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {128, 16, 2, 3}, 2));
+    test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {64, 16, 2, 3}, 3));
      test_cases.emplace_back(new test_pad());
      test_cases.emplace_back(new test_pad(GGML_TYPE_F32, {33, 17, 2, 1}, 4, 3, true)); // circular
      test_cases.emplace_back(new test_pad_ext());
@@ -8605,6 +8630,14 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
      test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 512, 1)); // prefill
      test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 1,   1)); // generate
  
+    // acc
+    test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 1, 1}, {256, 16, 1, 1}, -1));
+    test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {256, 16, 2, 3}, -1));
+    test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {128, 16, 2, 3}, -1));
+    test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {256, 16, 2, 3}, 1));
+    test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {128, 16, 2, 3}, 2));
+    test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {64, 16, 2, 3}, 3));
+
      return test_cases;
  }
author	ymcki <redacted>
	Fri, 13 Feb 2026 12:31:37 +0000 (20:31 +0800)
committer	Georgi Gerganov <redacted>
	Sat, 14 Feb 2026 22:20:18 +0000 (00:20 +0200)
src/ggml-cuda/ggml-cuda.cu		patch \| blob \| history
src/ggml-vulkan/ggml-vulkan.cpp		patch \| blob \| history
src/ggml-vulkan/vulkan-shaders/acc.comp		patch \| blob \| history
tests/test-backend-ops.cpp		patch \| blob \| history