From 6d61a09bc434a5e3d3203f9aac74afd7b45d9ff5 Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Sat, 17 May 2025 15:35:47 +0900 Subject: [PATCH] vulkan: use scalar FA rather than coopmat2 when N==1 (llama/13554) --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 0856a112..fe3669b4 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -5872,10 +5872,17 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx vk_pipeline *pipelines; bool small_rows = N <= get_fa_num_small_rows(path); + // coopmat1 does not actually support "small rows" (it needs 16 rows). + // So use scalar instead. if (small_rows && path == FA_COOPMAT1) { path = FA_SCALAR; } + // scalar is faster than coopmat2 when N==1 + if (N == 1 && path == FA_COOPMAT2) { + path = FA_SCALAR; + } + bool f32acc = path == FA_SCALAR || dst->op_params[3] == GGML_PREC_F32; switch (path) {