vulkan: matmul gcn tuning (llama/13016)

author Eve <redacted>

Thu, 24 Apr 2025 07:18:33 +0000 (07:18 +0000)

committer Georgi Gerganov <redacted>

Thu, 24 Apr 2025 17:39:16 +0000 (20:39 +0300)
author Eve <redacted>
Thu, 24 Apr 2025 07:18:33 +0000 (07:18 +0000)
committer Georgi Gerganov <redacted>
Thu, 24 Apr 2025 17:39:16 +0000 (20:39 +0300)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp

index 39f3cd343ac450d39092d65752ceda103e664e39..c0bdb9e17a7b498ad5177fd395d9371f60de21a7 100644 (file)
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -246,6 +246,7 @@ struct vk_device_struct {
      bool pipeline_robustness;
      vk::Device device;
      uint32_t vendor_id;
+    vk::DriverId driver_id;
      vk_device_architecture architecture;
      vk_queue compute_queue;
      vk_queue transfer_queue;
@@ -1740,6 +1741,11 @@ static void ggml_vk_load_shaders(vk_device& device) {
          m_warptile_mmq_int = { 128,  64,  64, 32, subgroup_size_8,     32, 2, 2, 2, 1, subgroup_size_8 };
          s_warptile_mmq_int = { subgroup_size_32, 32, 32, 32, 32,       32, 2, 2, 1, 1, subgroup_size_8 };
  
+        // chip specific tuning
+        if ((device->architecture == AMD_GCN) && (device->driver_id != vk::DriverId::eAmdProprietary)) {
+            m_warptile_mmq = m_warptile_mmq_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 };
+        }
+
          l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 };
          m_mmq_wg_denoms = m_wg_denoms = { 64,  64, 1 };
          s_mmq_wg_denoms = s_wg_denoms = { 32,  32, 1 };
@@ -2658,6 +2664,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
          device->physical_device.getProperties2(&props2);
          device->properties = props2.properties;
          device->vendor_id = device->properties.vendorID;
+        device->driver_id = driver_props.driverID;
  
          const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE");
author	Eve <redacted>
	Thu, 24 Apr 2025 07:18:33 +0000 (07:18 +0000)
committer	Georgi Gerganov <redacted>
	Thu, 24 Apr 2025 17:39:16 +0000 (20:39 +0300)