vulkan: Warptile tuning for Intel Xe2/Xe3 (llama/18178)

author virajwad <redacted>

Wed, 7 Jan 2026 10:59:47 +0000 (02:59 -0800)

committer Georgi Gerganov <redacted>

Wed, 14 Jan 2026 07:11:59 +0000 (09:11 +0200)
author virajwad <redacted>
Wed, 7 Jan 2026 10:59:47 +0000 (02:59 -0800)
committer Georgi Gerganov <redacted>
Wed, 14 Jan 2026 07:11:59 +0000 (09:11 +0200)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp

index 3c13777b8aa3e38ce5ab232795d4636e00337849..1f255b705e09c5f9ab778004ae25db941a2eb85d 100644 (file)
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -2996,6 +2996,10 @@ static void ggml_vk_load_shaders(vk_device& device) {
          if ((device->architecture == AMD_GCN) && (device->driver_id != vk::DriverId::eAmdProprietary)) {
              m_warptile_mmq = m_warptile_mmq_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 };
              m_warptile_mmqid = m_warptile_mmqid_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 };
+        } else if (device->vendor_id == VK_VENDOR_ID_INTEL && device->coopmat_support && device->architecture == INTEL_XE2) {
+            // Xe2/Xe3 with coopmat enabled - warptile performance tuning
+            l_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 };
+            l_warptile_mmq = { 512, 128, 128, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 };
          }
  
          l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 };
@@ -3678,6 +3682,11 @@ static void ggml_vk_load_shaders(vk_device& device) {
          m_wg_denoms = { 64,  64, 1 };
          s_wg_denoms = { 32,  32, 1 };
  
+        if (device->vendor_id == VK_VENDOR_ID_INTEL && device->architecture == INTEL_XE2) {
+            // Xe2/Xe3 - bf16 warptile performance tuning
+            l_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, 4, 4, 1, subgroup_size_8 };
+        }
+
          CREATE_MM(GGML_TYPE_BF16, pipeline_matmul_bf16, matmul_bf16, , wg_denoms, warptile, vk_mat_mat_push_constants, 3, , 0);
          CREATE_MM(GGML_TYPE_BF16, pipeline_matmul_id_bf16, matmul_id_bf16, , wg_denoms, warptile, vk_mat_mat_id_push_constants, mul_mat_id_param_count, _id, 0);
      }
@@ -5061,11 +5070,23 @@ static vk_device ggml_vk_get_device(size_t idx) {
              switch (device->vendor_id) {
  #ifndef GGML_VULKAN_RUN_TESTS
              case VK_VENDOR_ID_AMD:
+                device->mul_mat_l[i]    = false;
+                device->mul_mat_m[i]    = true;
+                device->mul_mat_s[i]    = true;
+                device->mul_mat_id_l[i] = false;
+                device->mul_mat_id_m[i] = true;
+                device->mul_mat_id_s[i] = true;
+                break;
              case VK_VENDOR_ID_INTEL:
-                device->mul_mat_l[i] = false;
+                if (!device->coopmat_support || device->architecture != INTEL_XE2) {
+                    device->mul_mat_l[i] = false;
+                    device->mul_mat_id_l[i] = false;
+                } else {
+                    device->mul_mat_l[i] = true;  // if coopmat & XE2+, allow large matmul warptile config for Intel
+                    device->mul_mat_id_l[i] = true;
+                }
                  device->mul_mat_m[i] = true;
                  device->mul_mat_s[i] = true;
-                device->mul_mat_id_l[i] = false;
                  device->mul_mat_id_m[i] = true;
                  device->mul_mat_id_s[i] = true;
                  break;
author	virajwad <redacted>
	Wed, 7 Jan 2026 10:59:47 +0000 (02:59 -0800)
committer	Georgi Gerganov <redacted>
	Wed, 14 Jan 2026 07:11:59 +0000 (09:11 +0200)