talk-llama : sync llama.cpp

2025-08-10 03:10:09 +02:00 · 2025-07-28 10:09:47 +03:00
parent 5b4646df1a
commit d0a9d8c7f8
27 changed files with 3566 additions and 995 deletions
--- a/examples/talk-llama/llama-cparams.h
+++ b/examples/talk-llama/llama-cparams.h
@ -11,8 +11,8 @@ struct llama_cparams {
    uint32_t n_batch;
    uint32_t n_ubatch;
    uint32_t n_seq_max;
-    int      n_threads;       // number of threads to use for generation
-    int      n_threads_batch; // number of threads to use for batch processing
+    int32_t  n_threads;       // number of threads to use for generation
+    int32_t  n_threads_batch; // number of threads to use for batch processing

    float rope_freq_base;
    float rope_freq_scale;
@ -33,6 +33,7 @@ struct llama_cparams {
    bool no_perf;
    bool warmup;
    bool op_offload;
+    bool kv_unified;

    enum llama_pooling_type pooling_type;