]> git.djapps.eu Git - pkg/ggml/sources/ggml/commitdiff
Fix q_xxs using mul_mat_q (llama/7459)
authorAidanBeltonS <redacted>
Mon, 27 May 2024 16:34:51 +0000 (17:34 +0100)
committerGeorgi Gerganov <redacted>
Wed, 29 May 2024 10:16:38 +0000 (13:16 +0300)
src/ggml-sycl.cpp

index f329bc27265feab2cd2a1b0b806bb35e050286a9..8839f775d5b880c507ef5d8efa96ac20497d2eb7 100644 (file)
@@ -15263,6 +15263,7 @@ static void ggml_sycl_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1
             }
         } else {
             bool use_mul_mat_q = min_compute_capability >= VER_4VEC && ggml_is_quantized(src0->type);
+            use_mul_mat_q = use_mul_mat_q && (src0->type != GGML_TYPE_IQ2_XXS);
 
             if (use_xmx && min_compute_capability >= VER_GEN9 && src1->ne[1] > XMX_MAX_BATCH_SIZE) {
                 use_mul_mat_q = false;