talk-llama : sync llama.cpp

ggml-ci
2025-08-09 14:25:46 +02:00 · 2025-06-18 10:22:47 +03:00
parent 69061e356f
commit 2f60ebc3c2
26 changed files with 1454 additions and 504 deletions
--- a/examples/talk-llama/llama-memory.h
+++ b/examples/talk-llama/llama-memory.h
@ -73,8 +73,7 @@ struct llama_memory_i {
    virtual llama_memory_state_ptr init_batch(
            const llama_batch & batch,
            uint32_t n_ubatch,
-            bool embd_pooled,
-            bool logits_all) = 0;
+            bool embd_all) = 0;

    // simulate full cache, used for allocating worst-case compute buffers
    virtual llama_memory_state_ptr init_full() = 0;