From: Eve Date: Sat, 7 Sep 2024 19:02:26 +0000 (+0000) Subject: llamafile : disable sgemm for batch-size 1 (#9330) X-Git-Tag: upstream/0.0.4488~803 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=e536426ded3fb4a8cd13626e53508cd92928d6c2;p=pkg%2Fggml%2Fsources%2Fllama.cpp llamafile : disable sgemm for batch-size 1 (#9330) --- diff --git a/ggml/src/llamafile/sgemm.cpp b/ggml/src/llamafile/sgemm.cpp index f0988ba7..d0c2bb28 100644 --- a/ggml/src/llamafile/sgemm.cpp +++ b/ggml/src/llamafile/sgemm.cpp @@ -1006,6 +1006,10 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda assert(nth > 0); assert(ith < nth); + // only enable sgemm for prompt processing + if (n < 2) + return false; + if (Ctype != GGML_TYPE_F32) return false;