mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-08-09 19:08:32 +02:00
talk-llama : sync llama.cpp
This commit is contained in:
@ -11,8 +11,8 @@
|
||||
// llama_memory_recurrent
|
||||
//
|
||||
|
||||
// TODO: extract the cache state used for graph computation into llama_memory_recurrent_state_i
|
||||
// see the implementation of llama_kv_cache_unified_state_i for an example how to do it
|
||||
// TODO: extract the cache state used for graph computation into llama_memory_recurrent_context_i
|
||||
// see the implementation of llama_kv_cache_unified_context_i for an example how to do it
|
||||
class llama_memory_recurrent : public llama_memory_i {
|
||||
public:
|
||||
|
||||
@ -34,14 +34,14 @@ public:
|
||||
// llama_memory_i
|
||||
//
|
||||
|
||||
llama_memory_state_ptr init_batch(
|
||||
llama_memory_context_ptr init_batch(
|
||||
llama_batch_allocr & balloc,
|
||||
uint32_t n_ubatch,
|
||||
bool embd_all) override;
|
||||
|
||||
llama_memory_state_ptr init_full() override;
|
||||
llama_memory_context_ptr init_full() override;
|
||||
|
||||
llama_memory_state_ptr init_update(llama_context * lctx, bool optimize) override;
|
||||
llama_memory_context_ptr init_update(llama_context * lctx, bool optimize) override;
|
||||
|
||||
void clear(bool data) override;
|
||||
|
||||
@ -125,24 +125,24 @@ private:
|
||||
bool state_read_data(llama_io_read_i & io, uint32_t cell_count);
|
||||
};
|
||||
|
||||
class llama_memory_recurrent_state : public llama_memory_state_i {
|
||||
class llama_memory_recurrent_context : public llama_memory_context_i {
|
||||
public:
|
||||
// used for errors
|
||||
llama_memory_recurrent_state(llama_memory_status status);
|
||||
llama_memory_recurrent_context(llama_memory_status status);
|
||||
|
||||
// used to create a full-cache state
|
||||
llama_memory_recurrent_state(
|
||||
// used to create a full-cache or update context
|
||||
llama_memory_recurrent_context(
|
||||
llama_memory_recurrent * mem);
|
||||
|
||||
// used to create a state from a batch
|
||||
llama_memory_recurrent_state(
|
||||
// used to create a batch processing context from a batch
|
||||
llama_memory_recurrent_context(
|
||||
llama_memory_recurrent * mem,
|
||||
std::vector<llama_ubatch> ubatches);
|
||||
|
||||
virtual ~llama_memory_recurrent_state();
|
||||
virtual ~llama_memory_recurrent_context();
|
||||
|
||||
//
|
||||
// llama_memory_state_i
|
||||
// llama_memory_context_i
|
||||
//
|
||||
|
||||
bool next() override;
|
||||
@ -152,7 +152,7 @@ public:
|
||||
const llama_ubatch & get_ubatch() const override;
|
||||
|
||||
//
|
||||
// llama_memory_recurrent_state specific API
|
||||
// llama_memory_recurrent_context specific API
|
||||
//
|
||||
|
||||
uint32_t get_n_rs() const;
|
||||
|
Reference in New Issue
Block a user