ggml : sync latest llama.cpp (view_src + alloc improvements) (#1247)

* ggml : sync latest llama.cpp (view_src + alloc improvements)

* ggml : fix build
This commit is contained in:
Georgi Gerganov
2023-09-05 20:57:27 +03:00
committed by GitHub
parent ba3c333611
commit c3f319d7c2
6 changed files with 865 additions and 529 deletions

34
ggml.h
View File

@@ -479,6 +479,9 @@ extern "C" {
int64_t perf_cycles;
int64_t perf_time_us;
struct ggml_tensor * view_src;
size_t view_offs;
void * data;
char name[GGML_MAX_NAME];
@@ -661,7 +664,7 @@ extern "C" {
GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
@@ -952,11 +955,11 @@ extern "C" {
// a - x
// b - dy
// TODO: update with configurable eps
GGML_API struct ggml_tensor * ggml_rms_norm_back(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b);
struct ggml_tensor * b,
float eps);
// A: n columns, m rows
// B: n columns, p rows (i.e. we transpose it internally)
@@ -1612,7 +1615,8 @@ extern "C" {
struct ggml_tensor * tensor);
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
@@ -1677,6 +1681,8 @@ extern "C" {
GGML_LINESEARCH_INVALID_PARAMETERS,
};
typedef void (*ggml_opt_callback)(void * data, float * sched);
// optimization parameters
//
// see ggml.c (ggml_opt_default_params) for default values
@@ -1712,12 +1718,14 @@ extern "C" {
float sched; // schedule multiplier (fixed, decay or warmup)
float decay; // weight decay for AdamW, use 0.0f to disable
int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
float alpha; // learning rate
float beta1;
float beta2;
float eps; // epsilon for numerical stability
float eps_f; // epsilon for convergence test
float eps_g; // epsilon for convergence test
float gclip; // gradient clipping
} adam;
// LBFGS parameters
@@ -1745,14 +1753,12 @@ extern "C" {
bool just_initialized;
float loss_before;
float loss_after;
struct {
struct ggml_tensor * x; // view of the parameters
struct ggml_tensor * g1; // gradient
struct ggml_tensor * g2; // gradient squared
struct ggml_tensor * m; // first moment
struct ggml_tensor * v; // second moment
struct ggml_tensor * mh; // first moment hat
struct ggml_tensor * vh; // second moment hat
struct ggml_tensor * pf; // past function values
float fx_best;
float fx_prev;
@@ -1789,10 +1795,10 @@ extern "C" {
// initialize optimizer context
GGML_API void ggml_opt_init(
struct ggml_context * ctx,
struct ggml_context * ctx,
struct ggml_opt_context * opt,
struct ggml_opt_params params,
int64_t nx);
struct ggml_opt_params params,
int64_t nx);
// continue optimizing the function defined by the tensor f
GGML_API enum ggml_opt_result ggml_opt_resume(
@@ -1806,7 +1812,9 @@ extern "C" {
struct ggml_opt_context * opt,
struct ggml_tensor * f,
struct ggml_cgraph * gf,
struct ggml_cgraph * gb);
struct ggml_cgraph * gb,
ggml_opt_callback callback,
void * callback_data);
//
// quantization