vulkan: faster q6_k matmul (#17813)

author Eve <redacted>

Sun, 14 Dec 2025 07:29:37 +0000 (07:29 +0000)

committer GitHub <redacted>

Sun, 14 Dec 2025 07:29:37 +0000 (08:29 +0100)
author Eve <redacted>
Sun, 14 Dec 2025 07:29:37 +0000 (07:29 +0000)
committer GitHub <redacted>
Sun, 14 Dec 2025 07:29:37 +0000 (08:29 +0100)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml

index 9fe1401df4791b500a6fbbf8e781f0ecf1ce3c58..af4c60be645871c7ba7641da944ff44af829eb65 100644 (file)
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -20,7 +20,8 @@ on:
        '**/*.swift',
        '**/*.m',
        '**/*.metal',
-      '**/*.comp'
+      '**/*.comp',
+      '**/*.glsl'
      ]
  
    pull_request:
@@ -40,7 +41,8 @@ on:
        '**/*.swift',
        '**/*.m',
        '**/*.metal',
-      '**/*.comp'
+      '**/*.comp',
+      '**/*.glsl'
      ]
  
  concurrency:
diff --git a/.gitignore b/.gitignore

index 428f08411005785ad0065a03ed7ddb93ef65e938..05eb578a82ff699ff3b67ec32aa43c9429aa11e8 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -54,6 +54,7 @@
  /out/
  /tmp/
  /autogen-*.md
+/common/build-info.cpp
  
  # Deprecated
  
diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl

index ee5ded2e8d3eb17ab33df8bfd874dfe9f57fed66..58ede04400d86ae582e02e83e2ac8966655d0e9a 100644 (file)
--- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl
@@ -244,17 +244,20 @@ void load_a_to_shmem(const uint pos_a, const uint row, const uint col, const uin
              const uint iqs = idx % 128;                 // 0..127
  
              const uint n = iqs / 64;                    // 0,1
-            const uint b = (iqs % 64) / 32;             // 0,1
+            const uint b = ((iqs % 64) / 32) * 4;       // 0,4
              const uint is_b = (iqs % 16) / 8;           // 0,1
              const uint qhshift = ((iqs % 64) / 16) * 2; // 0,2,4,6
              const uint is = 8 * n + qhshift + is_b;     // 0..15
-            const uint qsi = n * 64 + (iqs % 32) * 2;   // 0,2,4..126
-            const uint qhi = n * 32 + (iqs % 16) * 2;   // 0,2,4..62
+            const uint qsi = n * 32 + (iqs % 32);       // 0..63
+            const uint qhi = n * 16 + (iqs % 16);       // 0..31
  
              const float dscale = float(data_a[ib].d) * float(data_a[ib].scales[is]);
  
-            buf_a[buf_idx] = FLOAT_TYPE_VEC2(dscale * float(int8_t(((data_a[ib].ql[qsi    ] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi    ] >> qhshift) & 3) << 4)) - 32),
-                                             dscale * float(int8_t(((data_a[ib].ql[qsi + 1] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi + 1] >> qhshift) & 3) << 4)) - 32));
+            const uint ql = (uint(data_a_packed16[ib].ql[qsi]) >> b) & 0x0F0F;
+            const uint qh = (uint(data_a_packed16[ib].qh[qhi]) >> qhshift) & 0x0303;
+            const vec2 q = (vec2(unpack8(ql | (qh << 4)).xy) - 32) * dscale;
+
+            buf_a[buf_idx] = FLOAT_TYPE_VEC2(q.x, q.y);
  #elif defined(DATA_A_IQ1_S)
              const uint idx = pos_a + col * p.stride_a / LOAD_VEC_A + row;
              const uint buf_idx = col * SHMEM_STRIDE + row * LOAD_VEC_A / 2;
author	Eve <redacted>
	Sun, 14 Dec 2025 07:29:37 +0000 (07:29 +0000)
committer	GitHub <redacted>
	Sun, 14 Dec 2025 07:29:37 +0000 (08:29 +0100)
.github/workflows/build.yml		patch \| blob \| history
.gitignore		patch \| blob \| history
ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl		patch \| blob \| history