diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index f5502afb..bc91ac3a 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -5312,7 +5312,7 @@ void ggml_mul_mat_set_prec( as -> [cols, rows, n_expert] ids -> [n_experts_used, n_tokens] (i32) b -> [cols, n_expert_used, n_tokens] - c -> [cols, n_expert_used, n_tokens] + c -> [rows, n_expert_used, n_tokens] in b, n_experts_used can be broadcasted to match the n_expert_used of ids