From: Jeff Bolz Date: Fri, 5 Dec 2025 20:21:57 +0000 (-0600) Subject: vulkan: enable mmvq for q2_k on NVIDIA (llama/17675) X-Git-Tag: upstream/0.9.4.395~60 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=04deeb820f3d1981b1c0cb5a6436ebc6479c104a;p=pkg%2Fggml%2Fsources%2Fggml vulkan: enable mmvq for q2_k on NVIDIA (llama/17675) --- diff --git a/src/ggml-vulkan/ggml-vulkan.cpp b/src/ggml-vulkan/ggml-vulkan.cpp index 97b0fd74..c8f0449a 100644 --- a/src/ggml-vulkan/ggml-vulkan.cpp +++ b/src/ggml-vulkan/ggml-vulkan.cpp @@ -6948,6 +6948,10 @@ static bool ggml_vk_should_use_mmvq(const vk_device& device, uint32_t m, uint32_ // Quantization overhead is not worth it for small k switch (device->vendor_id) { case VK_VENDOR_ID_NVIDIA: + if (src0_type == GGML_TYPE_Q2_K) { + return true; + } + if (k <= 4096) { return false; }