mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-01 07:25:49 +02:00
metal : use FA-vec kernel up to batch size 20 (llama/13496)
* batched-bench : fix pp batch contents * metal : optimize multi-sequence FA vec kernel ggml-ci * metal : use FA-vec kernel up to batch size 20 ggml-ci
This commit is contained in:
parent
e11fc21e6c
commit
08436716ae
@ -4358,7 +4358,7 @@ static bool ggml_metal_encode_node(
|
|||||||
// TODO: add vec kernels for (ne00%64 == 0) and maybe also for (ne00%32 == 0)
|
// TODO: add vec kernels for (ne00%64 == 0) and maybe also for (ne00%32 == 0)
|
||||||
// for now avoiding mainly to keep the number of templates/kernels a bit lower
|
// for now avoiding mainly to keep the number of templates/kernels a bit lower
|
||||||
// these are now trivial to add after: https://github.com/ggml-org/llama.cpp/pull/12612
|
// these are now trivial to add after: https://github.com/ggml-org/llama.cpp/pull/12612
|
||||||
if (ne01 >= 4 || (ne00%128 != 0 && ne00 != 96 && ne00 != 192 && ne00 != 576)) {
|
if (ne01 >= 20 || (ne00%128 != 0 && ne00 != 96 && ne00 != 192 && ne00 != 576)) {
|
||||||
switch (src1->type) {
|
switch (src1->type) {
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user