mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-08-17 23:40:28 +02:00
@@ -19,10 +19,16 @@
|
||||
# define LLAMA_API
|
||||
#endif
|
||||
|
||||
#define LLAMA_FILE_VERSION 2
|
||||
#define LLAMA_FILE_MAGIC 'ggjt'
|
||||
#define LLAMA_FILE_MAGIC_UNVERSIONED 'ggml'
|
||||
#define LLAMA_SESSION_MAGIC 'ggsn'
|
||||
#define LLAMA_FILE_MAGIC_GGJT 0x67676a74u // 'ggjt'
|
||||
#define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
|
||||
#define LLAMA_FILE_MAGIC_GGMF 0x67676d66u // 'ggmf'
|
||||
#define LLAMA_FILE_MAGIC_GGML 0x67676d6cu // 'ggml'
|
||||
#define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
|
||||
|
||||
#define LLAMA_FILE_VERSION 3
|
||||
#define LLAMA_FILE_MAGIC LLAMA_FILE_MAGIC_GGJT
|
||||
#define LLAMA_FILE_MAGIC_UNVERSIONED LLAMA_FILE_MAGIC_GGML
|
||||
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
||||
#define LLAMA_SESSION_VERSION 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -40,9 +46,9 @@ extern "C" {
|
||||
typedef int llama_token;
|
||||
|
||||
typedef struct llama_token_data {
|
||||
llama_token id; // token id
|
||||
float logit; // log-odds of the token
|
||||
float p; // probability of the token
|
||||
llama_token id; // token id
|
||||
float logit; // log-odds of the token
|
||||
float p; // probability of the token
|
||||
} llama_token_data;
|
||||
|
||||
typedef struct llama_token_data_array {
|
||||
@@ -55,7 +61,6 @@ extern "C" {
|
||||
|
||||
struct llama_context_params {
|
||||
int n_ctx; // text context
|
||||
int n_parts; // -1 for default
|
||||
int n_gpu_layers; // number of layers to store in VRAM
|
||||
int seed; // RNG seed, -1 for random
|
||||
|
||||
@@ -74,16 +79,16 @@ extern "C" {
|
||||
|
||||
// model file types
|
||||
enum llama_ftype {
|
||||
LLAMA_FTYPE_ALL_F32 = 0,
|
||||
LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
|
||||
LLAMA_FTYPE_ALL_F32 = 0,
|
||||
LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
|
||||
// LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
|
||||
// LLAMA_FTYPE_MOSTLY_Q4_3 (6) support has been removed
|
||||
LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
|
||||
// LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
|
||||
// LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed
|
||||
LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
|
||||
};
|
||||
|
||||
LLAMA_API struct llama_context_params llama_context_default_params();
|
||||
@@ -91,6 +96,13 @@ extern "C" {
|
||||
LLAMA_API bool llama_mmap_supported();
|
||||
LLAMA_API bool llama_mlock_supported();
|
||||
|
||||
// TODO: not great API - very likely to change
|
||||
// Initialize the llama + ggml backend
|
||||
// Call once at the start of the program
|
||||
LLAMA_API void llama_init_backend();
|
||||
|
||||
LLAMA_API int64_t llama_time_us();
|
||||
|
||||
// Various functions for loading a ggml llama model.
|
||||
// Allocate (almost) all memory needed for the model.
|
||||
// Return NULL on failure
|
||||
@@ -139,7 +151,7 @@ extern "C" {
|
||||
|
||||
// Set the state reading from the specified address
|
||||
// Returns the number of bytes read
|
||||
LLAMA_API size_t llama_set_state_data(struct llama_context * ctx, const uint8_t * src);
|
||||
LLAMA_API size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src);
|
||||
|
||||
// Save/load session file
|
||||
LLAMA_API bool llama_load_session_file(struct llama_context * ctx, const char * path_session, llama_token * tokens_out, size_t n_token_capacity, size_t * n_token_count_out);
|
||||
|
Reference in New Issue
Block a user