metal : unify mul_mv_id kernels (llama/6556)

This commit is contained in:
slaren 2024-04-12 18:13:20 +02:00 committed by Georgi Gerganov
parent 60f3713026
commit 00a0947c65
3 changed files with 207 additions and 1122 deletions

View File

@ -1941,7 +1941,12 @@ static enum ggml_status ggml_metal_graph_compute(
{
nth0 = 4;
nth1 = 16;
#if QK_K == 64
pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ4_NL_F32].pipeline;
#else
pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ4_XS_F32].pipeline;
#endif
} break;
default:
{

File diff suppressed because it is too large Load Diff

1
ggml.c
View File

@ -11074,7 +11074,6 @@ static void ggml_compute_forward_mul_mat_id(
}
// initialize matrix_row_counts
GGML_ASSERT(wdata == wdata_src1_end);
memset(matrix_row_counts, 0, n_as*sizeof(int64_t));
// group rows by src0 matrix