From e11fc21e6cb8ff4a38cffa534be85bf867f1a232 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 13 May 2025 18:04:00 +0300 Subject: [PATCH] metal : optimize multi-sequence FA vec kernel (llama/13493) * batched-bench : fix pp batch contents * metal : optimize multi-sequence FA vec kernel ggml-ci --- ggml/src/ggml-metal/ggml-metal.metal | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 9cfddf45..122ae597 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -3887,6 +3887,11 @@ kernel void kernel_flash_attn_ext_vec( sm[tiisg] = pm[ic + tiisg]; } + // skip -INF blocks + if (simd_max(sm[tiisg]) == -INFINITY) { + continue; + } + // Q*K^T { // each simdgroup processes 1 query and NE (NW/NL) head elements