bool pipeline_robustness;
vk::Device device;
uint32_t vendor_id;
+ vk::DriverId driver_id;
vk_device_architecture architecture;
vk_queue compute_queue;
vk_queue transfer_queue;
m_warptile_mmq_int = { 128, 64, 64, 32, subgroup_size_8, 32, 2, 2, 2, 1, subgroup_size_8 };
s_warptile_mmq_int = { subgroup_size_32, 32, 32, 32, 32, 32, 2, 2, 1, 1, subgroup_size_8 };
+ // chip specific tuning
+ if ((device->architecture == AMD_GCN) && (device->driver_id != vk::DriverId::eAmdProprietary)) {
+ m_warptile_mmq = m_warptile_mmq_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 };
+ }
+
l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 };
m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 };
s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 };
device->physical_device.getProperties2(&props2);
device->properties = props2.properties;
device->vendor_id = device->properties.vendorID;
+ device->driver_id = driver_props.driverID;
const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE");