mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-05-30 22:47:22 +02:00
metal : optimize multi-sequence FA vec kernel (llama/13493)
* batched-bench : fix pp batch contents * metal : optimize multi-sequence FA vec kernel ggml-ci
This commit is contained in:
parent
a77a924b20
commit
e11fc21e6c
@ -3887,6 +3887,11 @@ kernel void kernel_flash_attn_ext_vec(
|
||||
sm[tiisg] = pm[ic + tiisg];
|
||||
}
|
||||
|
||||
// skip -INF blocks
|
||||
if (simd_max(sm[tiisg]) == -INFINITY) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Q*K^T
|
||||
{
|
||||
// each simdgroup processes 1 query and NE (NW/NL) head elements
|
||||
|
Loading…
x
Reference in New Issue
Block a user