]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
llamafile : disable sgemm for batch-size 1 (#9330)
authorEve <redacted>
Sat, 7 Sep 2024 19:02:26 +0000 (19:02 +0000)
committerGitHub <redacted>
Sat, 7 Sep 2024 19:02:26 +0000 (22:02 +0300)
ggml/src/llamafile/sgemm.cpp

index f0988ba7cd24c5907b8d131fe2dcccee8555a048..d0c2bb284509bc2048b57d12c320cad0ecef2380 100644 (file)
@@ -1006,6 +1006,10 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
     assert(nth > 0);
     assert(ith < nth);
 
+    // only enable sgemm for prompt processing
+    if (n < 2)
+        return false;
+
     if (Ctype != GGML_TYPE_F32)
         return false;