talk-llama : sync llama.cpp

ggml-ci
2025-08-16 11:58:38 +02:00 · 2025-06-20 21:18:44 +03:00
parent d65a579a0a
commit e6c10cf3d5
28 changed files with 2521 additions and 1738 deletions
--- a/examples/talk-llama/llama-hparams.h
+++ b/examples/talk-llama/llama-hparams.h
@ -115,6 +115,9 @@ struct llama_hparams {
    uint32_t ssm_d_state = 0;
    uint32_t ssm_dt_rank = 0;

+    // for hybrid state space models
+    std::array<bool, LLAMA_MAX_LAYERS> recurrent_layer_arr;
+
    bool ssm_dt_b_c_rms = false;

    float f_clamp_kqv      = 0.0f;
@ -181,10 +184,15 @@ struct llama_hparams {

    // dimension of the rolling state embeddings
    // corresponds to Mamba's conv_states size or RWKV's token_shift states size
-    uint32_t n_embd_k_s() const;
+    uint32_t n_embd_r() const;

    // dimension of the recurrent state embeddings
-    uint32_t n_embd_v_s() const;
+    uint32_t n_embd_s() const;
+
+    // whether or not the given layer is recurrent (for hybrid models)
+    bool is_recurrent(uint32_t il) const;
+
+    uint32_t n_pos_per_embd() const;

    bool is_swa(uint32_t il) const;
 };