]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
metal : optimize multi-sequence FA vec kernel (#13493)
authorGeorgi Gerganov <redacted>
Tue, 13 May 2025 15:04:00 +0000 (18:04 +0300)
committerGitHub <redacted>
Tue, 13 May 2025 15:04:00 +0000 (18:04 +0300)
* batched-bench : fix pp batch contents

* metal : optimize multi-sequence FA vec kernel

ggml-ci

ggml/src/ggml-metal/ggml-metal.metal

index 9cfddf4503abe704a977b9350b6937ac99ccba67..122ae59737196bf35cc7afcdcfaeb1fdae1a0f3e 100644 (file)
@@ -3887,6 +3887,11 @@ kernel void kernel_flash_attn_ext_vec(
                 sm[tiisg] = pm[ic + tiisg];
             }
 
+            // skip -INF blocks
+            if (simd_max(sm[tiisg]) == -INFINITY) {
+                continue;
+            }
+
             // Q*K^T
             {
                 // each simdgroup processes 1 query and NE (NW/NL) head elements