mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-08-18 07:10:09 +02:00
metal : optimize MoE for large batches (llama/13388)
This commit is contained in:
@@ -2732,11 +2732,11 @@ void ggml_mul_mat_set_prec(
|
||||
c = ggml_mul_mat_id(ctx, as, b, ids);
|
||||
|
||||
as -> [cols, rows, n_expert]
|
||||
ids -> [n_experts_used, n_tokens] (i32)
|
||||
b -> [cols, n_expert_used, n_tokens]
|
||||
ids -> [n_expert_used, n_tokens] (i32)
|
||||
c -> [rows, n_expert_used, n_tokens]
|
||||
|
||||
in b, n_experts_used can be broadcasted to match the n_expert_used of ids
|
||||
in b, n_expert_used can be broadcasted to match the n_expert_used of ids
|
||||
|
||||
c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
|
||||
*/
|
||||
|
Reference in New Issue
Block a user