if ((device->architecture == AMD_GCN) && (device->driver_id != vk::DriverId::eAmdProprietary)) {
m_warptile_mmq = m_warptile_mmq_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 };
m_warptile_mmqid = m_warptile_mmqid_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 };
+ } else if (device->vendor_id == VK_VENDOR_ID_INTEL && device->coopmat_support && device->architecture == INTEL_XE2) {
+ // Xe2/Xe3 with coopmat enabled - warptile performance tuning
+ l_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 };
+ l_warptile_mmq = { 512, 128, 128, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 };
}
l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 };
m_wg_denoms = { 64, 64, 1 };
s_wg_denoms = { 32, 32, 1 };
+ if (device->vendor_id == VK_VENDOR_ID_INTEL && device->architecture == INTEL_XE2) {
+ // Xe2/Xe3 - bf16 warptile performance tuning
+ l_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, 4, 4, 1, subgroup_size_8 };
+ }
+
CREATE_MM(GGML_TYPE_BF16, pipeline_matmul_bf16, matmul_bf16, , wg_denoms, warptile, vk_mat_mat_push_constants, 3, , 0);
CREATE_MM(GGML_TYPE_BF16, pipeline_matmul_id_bf16, matmul_id_bf16, , wg_denoms, warptile, vk_mat_mat_id_push_constants, mul_mat_id_param_count, _id, 0);
}
switch (device->vendor_id) {
#ifndef GGML_VULKAN_RUN_TESTS
case VK_VENDOR_ID_AMD:
+ device->mul_mat_l[i] = false;
+ device->mul_mat_m[i] = true;
+ device->mul_mat_s[i] = true;
+ device->mul_mat_id_l[i] = false;
+ device->mul_mat_id_m[i] = true;
+ device->mul_mat_id_s[i] = true;
+ break;
case VK_VENDOR_ID_INTEL:
- device->mul_mat_l[i] = false;
+ if (!device->coopmat_support || device->architecture != INTEL_XE2) {
+ device->mul_mat_l[i] = false;
+ device->mul_mat_id_l[i] = false;
+ } else {
+ device->mul_mat_l[i] = true; // if coopmat & XE2+, allow large matmul warptile config for Intel
+ device->mul_mat_id_l[i] = true;
+ }
device->mul_mat_m[i] = true;
device->mul_mat_s[i] = true;
- device->mul_mat_id_l[i] = false;
device->mul_mat_id_m[i] = true;
device->mul_mat_id_s[i] = true;
break;