mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-08-16 15:21:56 +02:00
talk-llama : sync llama.cpp
This commit is contained in:
@ -185,7 +185,8 @@ extern "C" {
|
||||
LLAMA_ROPE_SCALING_TYPE_NONE = 0,
|
||||
LLAMA_ROPE_SCALING_TYPE_LINEAR = 1,
|
||||
LLAMA_ROPE_SCALING_TYPE_YARN = 2,
|
||||
LLAMA_ROPE_SCALING_TYPE_MAX_VALUE = LLAMA_ROPE_SCALING_TYPE_YARN,
|
||||
LLAMA_ROPE_SCALING_TYPE_LONGROPE = 3,
|
||||
LLAMA_ROPE_SCALING_TYPE_MAX_VALUE = LLAMA_ROPE_SCALING_TYPE_LONGROPE,
|
||||
};
|
||||
|
||||
enum llama_pooling_type {
|
||||
@ -272,6 +273,9 @@ extern "C" {
|
||||
};
|
||||
|
||||
struct llama_model_params {
|
||||
// NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
|
||||
ggml_backend_dev_t * devices;
|
||||
|
||||
int32_t n_gpu_layers; // number of layers to store in VRAM
|
||||
enum llama_split_mode split_mode; // how to split the model across multiple GPUs
|
||||
|
||||
@ -987,6 +991,9 @@ extern "C" {
|
||||
char * buf,
|
||||
int32_t length);
|
||||
|
||||
// Get list of built-in chat templates
|
||||
LLAMA_API int32_t llama_chat_builtin_templates(const char ** output, size_t len);
|
||||
|
||||
//
|
||||
// Sampling API
|
||||
//
|
||||
|
Reference in New Issue
Block a user