mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-08-10 02:30:20 +02:00
talk-llama : sync llama.cpp
This commit is contained in:
@ -390,6 +390,7 @@ extern "C" {
|
||||
void * imatrix; // pointer to importance matrix data
|
||||
void * kv_overrides; // pointer to vector containing overrides
|
||||
void * tensor_types; // pointer to vector containing tensor types
|
||||
void * prune_layers; // pointer to vector containing layer indices to prune
|
||||
} llama_model_quantize_params;
|
||||
|
||||
typedef struct llama_logit_bias {
|
||||
@ -943,12 +944,14 @@ extern "C" {
|
||||
// Requires the context to have a memory.
|
||||
// For encode-decoder contexts, processes the batch using the decoder.
|
||||
// Positive return values does not mean a fatal error, but rather a warning.
|
||||
// Upon non-zero return values, the memory state is restored to the state before this call
|
||||
// Upon fatal-error or abort, the ubatches that managed to be been processed will remain in the memory state of the context
|
||||
// To handle this correctly, query the memory state using llama_memory_seq_pos_min() and llama_memory_seq_pos_max()
|
||||
// Upon other return values, the memory state is restored to the state before this call
|
||||
// 0 - success
|
||||
// 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
|
||||
// 2 - aborted
|
||||
// 2 - aborted (processed ubatches will remain in the context's memory)
|
||||
// -1 - invalid input batch
|
||||
// < -1 - error
|
||||
// < -1 - fatal error (processed ubatches will remain in the context's memory)
|
||||
LLAMA_API int32_t llama_decode(
|
||||
struct llama_context * ctx,
|
||||
struct llama_batch batch);
|
||||
|
Reference in New Issue
Block a user