talk-llama : sync llama.cpp

ggml-ci
2025-08-09 10:35:51 +02:00 · 2025-06-20 21:18:44 +03:00
parent d65a579a0a
commit e6c10cf3d5
28 changed files with 2521 additions and 1738 deletions
--- a/examples/talk-llama/llama-memory.h
+++ b/examples/talk-llama/llama-memory.h
@ -7,6 +7,8 @@

 struct llama_ubatch;

+class llama_batch_allocr;
+
 class llama_io_write_i;
 class llama_io_read_i;

@ -50,9 +52,6 @@ struct llama_memory_state_i {
    // return false on failure
    virtual bool apply() = 0;

-    // TODO: this might get reworked in the future when refactoring llama_batch
-    virtual std::vector<int64_t> & out_ids() = 0;
-
    // get the current ubatch
    virtual const llama_ubatch & get_ubatch() const = 0;

@ -71,7 +70,7 @@ struct llama_memory_i {
    // return a state object containing the ubatches and KV cache state required to process them
    // check the llama_memory_state_i::get_status() for the result
    virtual llama_memory_state_ptr init_batch(
-            const llama_batch & batch,
+            llama_batch_allocr & balloc,
            uint32_t n_ubatch,
            bool embd_all) = 0;