vulkan: Use BK=32 for coopmat2 mul_mat_id (#18332)

author Jeff Bolz <redacted>

Fri, 26 Dec 2025 17:15:02 +0000 (11:15 -0600)

committer GitHub <redacted>

Fri, 26 Dec 2025 17:15:02 +0000 (18:15 +0100)
author Jeff Bolz <redacted>
Fri, 26 Dec 2025 17:15:02 +0000 (11:15 -0600)
committer GitHub <redacted>
Fri, 26 Dec 2025 17:15:02 +0000 (18:15 +0100)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp

index 7ae2e38356e4754c3baddb7e1865027291b9e2f9..35a1f19fe934e9201338c8379ccc06317865deb6 100644 (file)
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -1565,7 +1565,7 @@ class vk_perf_logger {
                  total_op_times += time;
              }
              std::cerr << t.first << ": " << t.second.size() << " x " << (total_op_times / t.second.size() / 1000.0)
-                      << " us";
+                      << " us = " << (total_op_times / 1000.0) << " us";
  
              // If we have as many flops entries as timing entries for the op, then compute and log the flops/S.
              auto it = flops.find(t.first);
@@ -2830,9 +2830,9 @@ static void ggml_vk_load_shaders(vk_device& device) {
          s_mmq_wg_denoms_k = { 32,  64,  1 };
  
          // spec constants and tile sizes for quant matmul_id
-        l_warptile_mmqid = { 256, 128, 128, 16, 1, device->subgroup_size };
-        m_warptile_mmqid = { 256, 128, 64, 16, 0, device->subgroup_size };
-        s_warptile_mmqid = { 256, 128, 64, 16, 0, device->subgroup_size };
+        l_warptile_mmqid = { 256, 128, 128, 32, 1, device->subgroup_size };
+        m_warptile_mmqid = { 256, 128, 64, 32, 0, device->subgroup_size };
+        s_warptile_mmqid = { 256, 128, 64, 32, 0, device->subgroup_size };
          l_mmqid_wg_denoms = { 128, 128, 1 };
          m_mmqid_wg_denoms = { 128, 64, 1 };
          s_mmqid_wg_denoms = { 128, 64, 1 };
author	Jeff Bolz <redacted>
	Fri, 26 Dec 2025 17:15:02 +0000 (11:15 -0600)
committer	GitHub <redacted>
	Fri, 26 Dec 2025 17:15:02 +0000 (18:15 +0100)