]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
opencl: refine condition for kqv mm (llama/17392)
authorlhez <redacted>
Fri, 21 Nov 2025 22:34:48 +0000 (14:34 -0800)
committerGeorgi Gerganov <redacted>
Fri, 12 Dec 2025 15:53:05 +0000 (17:53 +0200)
ggml/src/ggml-opencl/ggml-opencl.cpp

index 4cb6afe927191632081a393d5a021c5751ac9c31..2319f7a9e25a52ff8766250700f9862f54cedca5 100644 (file)
@@ -6895,9 +6895,23 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co
     cl_context context = backend_ctx->context;
 
     if(src0t == GGML_TYPE_F16 && src1t == GGML_TYPE_F32){
-        if (ne01 >= 64 && ne1 >= 32 && ne00 >= 16 && (ne12 % ne02) == 0){
-            ggml_cl_mul_mat_kq_kqv_adreno(backend, src0, src1, dst);
-            return;
+        if (ne01 >= 64 && ne1 >= 32 && ne00 >= 16 && (ne12 % ne02) == 0) {
+            // For KQ
+            if (ggml_is_permuted(src0) && ggml_is_permuted(src1) &&
+                nb00 <= nb02 &&
+                nb02 <= nb01 &&
+                nb01 <= nb03 &&
+                nb10 <= nb12 &&
+                nb12 <= nb11 &&
+                nb11 <= nb13) {
+                ggml_cl_mul_mat_kq_kqv_adreno(backend, src0, src1, dst);
+                return;
+            }
+            // For KQV
+            if (!ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
+                ggml_cl_mul_mat_kq_kqv_adreno(backend, src0, src1, dst);
+                return;
+            }
         }
     }