talk-llama : sync llama.cpp

This commit is contained in:
Georgi Gerganov
2025-01-14 09:53:50 +02:00
parent 19d95f9f9a
commit 99b011a9f5
26 changed files with 5788 additions and 5093 deletions

View File

@ -52,7 +52,7 @@ uint32_t llama_hparams::n_embd_v_gqa(uint32_t il) const {
uint32_t llama_hparams::n_embd_k_s() const {
if (wkv_head_size != 0) {
// for RWKV models
return 2 * n_embd;
return token_shift_count * n_embd;
}
// TODO: maybe support other convolution strides than 1