vulkan: fix noncontig check for mat_mul_id splitting (llama/14683)

author Jeff Bolz <redacted>

Tue, 15 Jul 2025 19:51:09 +0000 (14:51 -0500)

committer Georgi Gerganov <redacted>

Sat, 19 Jul 2025 14:47:23 +0000 (17:47 +0300)
author Jeff Bolz <redacted>
Tue, 15 Jul 2025 19:51:09 +0000 (14:51 -0500)
committer Georgi Gerganov <redacted>
Sat, 19 Jul 2025 14:47:23 +0000 (17:47 +0300)
diff --git a/src/ggml-vulkan/ggml-vulkan.cpp b/src/ggml-vulkan/ggml-vulkan.cpp

index 9f5646bf29daecb8bbdccfa574bcfecf97f60bcf..3019a545d58edfdebb60b3b3e896edd2da143735 100644 (file)
--- a/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/src/ggml-vulkan/ggml-vulkan.cpp
@@ -4922,7 +4922,7 @@ static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) {
      return
          tensor->nb[0] == ggml_type_size(tensor->type) &&
          tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) &&
-        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
+        (tensor->ne[3] == 1 || tensor->nb[3] == tensor->nb[2]*tensor->ne[2]);
  }
  
  static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const ggml_tensor * src, const ggml_tensor * dst, ggml_type to) {
@@ -10356,10 +10356,6 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
                          // If there's not enough shared memory for row_ids and the result tile, fallback to CPU
                          return false;
                      }
-                    // Check against size of shared memory variable
-                    if (op->src[2]->ne[0] > 4096) {
-                        return false;
-                    }
                  }
                  switch (src0_type) {
                      case GGML_TYPE_F32:
diff --git a/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp b/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp

index 157154af3a328d5df56e8dcbc52b5cde9f385937..d4e4e6bae63df0d2cad95a64ab8acf7dde5382c8 100644 (file)
--- a/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp
+++ b/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp
@@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
  void main() {
      [[unroll]] for (uint wgy = 0; wgy < 256; wgy++) {
          const uint i = gl_WorkGroupID.x * 256 + wgy;
-        if (i >= p.M * p.K / QUANT_K) {
+        if (i >= p.nel / QUANT_K) {
              return;
          }
  
diff --git a/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp b/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp

index c17dd0d99911668af5e243833df66e12acd506b1..3661f771c745f744156885b3e88fd35de0567d03 100644 (file)
--- a/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp
+++ b/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp
@@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
  void main() {
      [[unroll]] for (uint wgy = 0; wgy < 256; wgy++) {
          const uint i = uint(gl_WorkGroupID.x * 256 + wgy);
-        if (i >= p.M * p.K / QUANT_K) {
+        if (i >= p.nel / QUANT_K) {
              return;
          }
  
diff --git a/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp b/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp

index 987f113a35ad030f60d837045199ed4cfd107fb1..1370db3654dd73acba54c1d1cb4b1f35edb02af4 100644 (file)
--- a/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp
+++ b/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp
@@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
  void main() {
      [[unroll]] for (uint wgy = 0; wgy < 256; wgy++) {
          const uint ib = gl_WorkGroupID.x * 256 + wgy;
-        if (ib >= p.M * p.K / QUANT_K) {
+        if (ib >= p.nel / QUANT_K) {
              return;
          }
  
diff --git a/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp b/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp

index 6db5403b6613ec86233a0c2c927687ea8c56fb09..3f3b839e11832124920e71ae85e6b36ea5327a06 100644 (file)
--- a/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp
+++ b/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp
@@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
  void main() {
      [[unroll]] for (uint wgy = 0; wgy < 256; wgy++) {
          const uint ib = gl_WorkGroupID.x * 256 + wgy;
-        if (ib >= p.M * p.K / QUANT_K) {
+        if (ib >= p.nel / QUANT_K) {
              return;
          }
  
diff --git a/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp b/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp

index 0b91317550f97989cbfb707bf2624c030eeacaec..9cf34256e8c80c3fa359482d3a3bb4c284374388 100644 (file)
--- a/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp
+++ b/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp
@@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
  void main() {
      [[unroll]] for (uint wgy = 0; wgy < 256; wgy++) {
          const uint i = gl_WorkGroupID.x * 256 + wgy;
-        if (i >= p.M * p.K / QUANT_K) {
+        if (i >= p.nel / QUANT_K) {
              return;
          }
          const uint tid = gl_LocalInvocationID.x;
author	Jeff Bolz <redacted>
	Tue, 15 Jul 2025 19:51:09 +0000 (14:51 -0500)
committer	Georgi Gerganov <redacted>
	Sat, 19 Jul 2025 14:47:23 +0000 (17:47 +0300)
src/ggml-vulkan/ggml-vulkan.cpp		patch \| blob \| history
src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp		patch \| blob \| history
src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp		patch \| blob \| history
src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp		patch \| blob \| history
src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp		patch \| blob \| history
src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp		patch \| blob \| history