CUDA: refactor mmq, dmmv, mmvq (llama/7716)

* CUDA: refactor mmq, dmmv, mmvq

* fix out-of-bounds write

* struct for qk, qr, qi

* fix cmake build

* mmq_type_traits
This commit is contained in:
Johannes Gäßler
2024-06-05 16:53:00 +02:00
committed by Georgi Gerganov
parent abab4500fa
commit e08c62149b
110 changed files with 1778 additions and 1767 deletions

View File

@ -123,12 +123,18 @@ typedef sycl::half2 ggml_half2;
#define QI1_S (QK_K / (4*QR1_S))
#define QR1_S 8
#define QI1_M (QK_K / (4*QR1_M))
#define QR1_M 8
#define QI4_NL (QK4_NL / (4*QR4_NL))
#define QR4_NL 2
#define QI4_XS (QK_K / (4*QR4_XS))
#define QR4_XS 8
#define QI3_S (QK_K / (4*QR3_S))
#define QR3_S 8
#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
#define QK4_0 32