talk-llama : sync llama.cpp

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-07-12 16:26:16 +03:00
parent 6d64e4abf3
commit 6ddff4d96a
24 changed files with 2831 additions and 690 deletions

View File

@ -92,6 +92,8 @@ private:
class llama_memory_hybrid_context : public llama_memory_context_i {
public:
using slot_info_vec_t = llama_kv_cache_unified::slot_info_vec_t;
// init failure
explicit llama_memory_hybrid_context(llama_memory_status status);
@ -107,7 +109,7 @@ public:
// init success
llama_memory_hybrid_context(
llama_memory_hybrid * mem,
std::vector<uint32_t> heads_attn,
slot_info_vec_t sinfos_attn,
std::vector<llama_ubatch> ubatches);
~llama_memory_hybrid_context() = default;