ggml : sync latest llama.cpp (view_src + alloc improvements) (#1247)

* ggml : sync latest llama.cpp (view_src + alloc improvements) * ggml : fix build
2025-08-26 12:27:45 +02:00 · 2023-09-05 20:57:27 +03:00
parent ba3c333611
commit c3f319d7c2
6 changed files with 865 additions and 529 deletions
--- a/ggml.h
+++ b/ggml.h
@@ -479,6 +479,9 @@ extern "C" {
        int64_t perf_cycles;
        int64_t perf_time_us;

+        struct ggml_tensor * view_src;
+        size_t               view_offs;
+
        void * data;

        char name[GGML_MAX_NAME];
@@ -661,7 +664,7 @@ extern "C" {
    GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);

    GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
-    GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
+    GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);

    GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);

@@ -952,11 +955,11 @@ extern "C" {

    // a - x
    // b - dy
-    // TODO: update with configurable eps
    GGML_API struct ggml_tensor * ggml_rms_norm_back(
            struct ggml_context * ctx,
            struct ggml_tensor  * a,
-            struct ggml_tensor  * b);
+            struct ggml_tensor  * b,
+            float                 eps);

    // A: n columns, m rows
    // B: n columns, p rows  (i.e. we transpose it internally)
@@ -1612,7 +1615,8 @@ extern "C" {
            struct ggml_tensor  * tensor);


-    GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
+    GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
+    GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);

    GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
    GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
@@ -1677,6 +1681,8 @@ extern "C" {
        GGML_LINESEARCH_INVALID_PARAMETERS,
    };

+    typedef void (*ggml_opt_callback)(void * data, float * sched);
+
    // optimization parameters
    //
    //   see ggml.c (ggml_opt_default_params) for default values
@@ -1712,12 +1718,14 @@ extern "C" {

            float sched; // schedule multiplier (fixed, decay or warmup)
            float decay; // weight decay for AdamW, use 0.0f to disable
+            int   decay_min_ndim; // minimum number of tensor dimension to apply weight decay
            float alpha; // learning rate
            float beta1;
            float beta2;
            float eps;   // epsilon for numerical stability
            float eps_f; // epsilon for convergence test
            float eps_g; // epsilon for convergence test
+            float gclip; // gradient clipping
        } adam;

        // LBFGS parameters
@@ -1745,14 +1753,12 @@ extern "C" {

        bool just_initialized;

+        float loss_before;
+        float loss_after;
+
        struct {
-            struct ggml_tensor * x;  // view of the parameters
-            struct ggml_tensor * g1; // gradient
-            struct ggml_tensor * g2; // gradient squared
            struct ggml_tensor * m;  // first moment
            struct ggml_tensor * v;  // second moment
-            struct ggml_tensor * mh; // first moment hat
-            struct ggml_tensor * vh; // second moment hat
            struct ggml_tensor * pf; // past function values
            float fx_best;
            float fx_prev;
@@ -1789,10 +1795,10 @@ extern "C" {

    // initialize optimizer context
    GGML_API void ggml_opt_init(
-            struct ggml_context * ctx,
+            struct ggml_context     * ctx,
            struct ggml_opt_context * opt,
-            struct ggml_opt_params params,
-            int64_t nx);
+            struct ggml_opt_params    params,
+            int64_t                   nx);

    // continue optimizing the function defined by the tensor f
    GGML_API enum ggml_opt_result ggml_opt_resume(
@@ -1806,7 +1812,9 @@ extern "C" {
            struct ggml_opt_context * opt,
            struct ggml_tensor * f,
            struct ggml_cgraph * gf,
-            struct ggml_cgraph * gb);
+            struct ggml_cgraph * gb,
+            ggml_opt_callback callback,
+            void * callback_data);

    //
    // quantization