talk-llama : sync llama.cpp

2025-08-09 19:08:32 +02:00 · 2025-07-01 12:21:09 +03:00
parent c4ea72be9a
commit 1f816de7da
24 changed files with 1456 additions and 430 deletions
--- a/examples/talk-llama/llama-memory-recurrent.h
+++ b/examples/talk-llama/llama-memory-recurrent.h
@ -11,8 +11,8 @@
 // llama_memory_recurrent
 //

-// TODO: extract the cache state used for graph computation into llama_memory_recurrent_state_i
-//       see the implementation of llama_kv_cache_unified_state_i for an example how to do it
+// TODO: extract the cache state used for graph computation into llama_memory_recurrent_context_i
+//       see the implementation of llama_kv_cache_unified_context_i for an example how to do it
 class llama_memory_recurrent : public llama_memory_i {
 public:

@ -34,14 +34,14 @@ public:
    // llama_memory_i
    //

-    llama_memory_state_ptr init_batch(
+    llama_memory_context_ptr init_batch(
            llama_batch_allocr & balloc,
            uint32_t n_ubatch,
            bool embd_all) override;

-    llama_memory_state_ptr init_full() override;
+    llama_memory_context_ptr init_full() override;

-    llama_memory_state_ptr init_update(llama_context * lctx, bool optimize) override;
+    llama_memory_context_ptr init_update(llama_context * lctx, bool optimize) override;

    void clear(bool data) override;

@ -125,24 +125,24 @@ private:
    bool state_read_data(llama_io_read_i & io, uint32_t cell_count);
 };

-class llama_memory_recurrent_state : public llama_memory_state_i {
+class llama_memory_recurrent_context : public llama_memory_context_i {
 public:
    // used for errors
-    llama_memory_recurrent_state(llama_memory_status status);
+    llama_memory_recurrent_context(llama_memory_status status);

-    // used to create a full-cache state
-    llama_memory_recurrent_state(
+    // used to create a full-cache or update context
+    llama_memory_recurrent_context(
            llama_memory_recurrent * mem);

-    // used to create a state from a batch
-    llama_memory_recurrent_state(
+    // used to create a batch processing context from a batch
+    llama_memory_recurrent_context(
            llama_memory_recurrent * mem,
            std::vector<llama_ubatch> ubatches);

-    virtual ~llama_memory_recurrent_state();
+    virtual ~llama_memory_recurrent_context();

    //
-    // llama_memory_state_i
+    // llama_memory_context_i
    //

    bool next()  override;
@ -152,7 +152,7 @@ public:
    const llama_ubatch & get_ubatch() const override;

    //
-    // llama_memory_recurrent_state specific API
+    // llama_memory_recurrent_context specific API
    //

    uint32_t get_n_rs() const;