From 670bf026622874ca1318ab4405816ad64f9ce32e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Mon, 28 Apr 2025 09:29:26 +0200 Subject: [PATCH] CUDA: fix q_nope_absorbed prec for DS 2 Lite f16 (llama/13137) --- ggml/include/ggml.h | 4 ++-- ggml/src/ggml-cuda/ggml-cuda.cu | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 51aa5b3a..1b8603e7 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -393,8 +393,8 @@ extern "C" { // precision enum ggml_prec { - GGML_PREC_DEFAULT, - GGML_PREC_F32, + GGML_PREC_DEFAULT = 0, // stored as ggml_tensor.op_params, 0 by default + GGML_PREC_F32 = 10, }; // model file types diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index e0e0d213..19b9ce72 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -1935,8 +1935,8 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor ggml_cuda_mul_mat_vec(ctx, src0, src1, nullptr, dst); } else if (!split && use_mul_mat_vec_q) { ggml_cuda_mul_mat_vec_q(ctx, src0, src1, nullptr, dst); - } else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || !any_gpus_with_slow_fp16) - && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { + } else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || !any_gpus_with_slow_fp16) && + dst->op_params[0] == GGML_PREC_DEFAULT && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { // general KQ + KQV multi-batch without FlashAttention ggml_cuda_mul_mat_batched_cublas(ctx, src0, src1, dst); } else if (use_mul_mat_vec) {