mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-02-05 04:50:18 +01:00
ggml : update softmax n_task calculation (llama/5126)
updated the n_task calculation to use max number of threads possible. This has improved the prompt eval performance by around 5% for DOT kernels and by around 10% for MMLA kernels on AWS Graviton3.
This commit is contained in:
parent
c3977cb2ce
commit
3c8d14e9c5
2
ggml.c
2
ggml.c
@ -16602,7 +16602,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
||||
} break;
|
||||
case GGML_OP_SOFT_MAX:
|
||||
{
|
||||
n_tasks = MIN(MIN(4, n_threads), ggml_nrows(node->src[0]));
|
||||
n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
|
||||
} break;
|
||||
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user