vulkan: use scalar FA rather than coopmat2 when N==1 (#13554)

author Jeff Bolz <redacted>

Sat, 17 May 2025 06:35:47 +0000 (15:35 +0900)

committer GitHub <redacted>

Sat, 17 May 2025 06:35:47 +0000 (08:35 +0200)
author Jeff Bolz <redacted>
Sat, 17 May 2025 06:35:47 +0000 (15:35 +0900)
committer GitHub <redacted>
Sat, 17 May 2025 06:35:47 +0000 (08:35 +0200)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp

index 0856a1122832db4d014a2a623b2e601ac242e46b..fe3669b462c38acb3d01134215a86ed06017b6eb 100644 (file)
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -5872,10 +5872,17 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
      vk_pipeline *pipelines;
      bool small_rows = N <= get_fa_num_small_rows(path);
  
+    // coopmat1 does not actually support "small rows" (it needs 16 rows).
+    // So use scalar instead.
      if (small_rows && path == FA_COOPMAT1) {
          path = FA_SCALAR;
      }
  
+    // scalar is faster than coopmat2 when N==1
+    if (N == 1 && path == FA_COOPMAT2) {
+        path = FA_SCALAR;
+    }
+
      bool f32acc = path == FA_SCALAR || dst->op_params[3] == GGML_PREC_F32;
  
      switch (path) {
author	Jeff Bolz <redacted>
	Sat, 17 May 2025 06:35:47 +0000 (15:35 +0900)
committer	GitHub <redacted>
	Sat, 17 May 2025 06:35:47 +0000 (08:35 +0200)