mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-01 07:25:49 +02:00
metal : optimize multi-sequence FA vec kernel (llama/13493)
* batched-bench : fix pp batch contents * metal : optimize multi-sequence FA vec kernel ggml-ci
This commit is contained in:
parent
a77a924b20
commit
e11fc21e6c
@ -3887,6 +3887,11 @@ kernel void kernel_flash_attn_ext_vec(
|
|||||||
sm[tiisg] = pm[ic + tiisg];
|
sm[tiisg] = pm[ic + tiisg];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// skip -INF blocks
|
||||||
|
if (simd_max(sm[tiisg]) == -INFINITY) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Q*K^T
|
// Q*K^T
|
||||||
{
|
{
|
||||||
// each simdgroup processes 1 query and NE (NW/NL) head elements
|
// each simdgroup processes 1 query and NE (NW/NL) head elements
|
||||||
|
Loading…
x
Reference in New Issue
Block a user