examples: add MNIST training + missing ops

This commit is contained in:
Johannes Gäßler 2024-07-30 15:56:35 +02:00 committed by Georgi Gerganov
parent d2986f8b07
commit 1fbd828a5d
2 changed files with 443 additions and 56 deletions

View File

@ -220,7 +220,7 @@
#include <stdio.h>
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
#define GGML_FILE_VERSION 1
#define GGML_FILE_VERSION 2
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
@ -490,9 +490,11 @@ extern "C" {
GGML_OP_CLAMP,
GGML_OP_CONV_TRANSPOSE_1D,
GGML_OP_IM2COL,
GGML_OP_IM2COL_BACK,
GGML_OP_CONV_TRANSPOSE_2D,
GGML_OP_POOL_1D,
GGML_OP_POOL_2D,
GGML_OP_POOL_2D_BACK,
GGML_OP_UPSCALE, // nearest interpolate
GGML_OP_PAD,
GGML_OP_ARANGE,
@ -1582,34 +1584,49 @@ extern "C" {
float min,
float max);
// im2col
// converts data into a format that effectively results in a convolution when combined with matrix multiplication
GGML_API struct ggml_tensor * ggml_im2col(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int s0,
int s1,
int p0,
int p1,
int d0,
int d1,
bool is_2D,
enum ggml_type dst_type);
struct ggml_tensor * a, // convolution kernel
struct ggml_tensor * b, // data
int s0, // stride dimension 0
int s1, // stride dimension 1
int p0, // padding dimension 0
int p1, // padding dimension 1
int d0, // dilation dimension 0
int d1, // dilation dimension 1
bool is_2D,
enum ggml_type dst_type);
GGML_API struct ggml_tensor * ggml_im2col_back(
struct ggml_context * ctx,
struct ggml_tensor * a, // convolution kernel
struct ggml_tensor * b, // gradient of im2col output
int64_t * ne, // shape of im2col input
int s0, // stride dimension 0
int s1, // stride dimension 1
int p0, // padding dimension 0
int p1, // padding dimension 1
int d0, // dilation dimension 0
int d1, // dilation dimension 1
bool is_2D);
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int s0,
int s1,
int p0,
int p1,
int d0,
int d1);
struct ggml_tensor * a, // convolution kernel
struct ggml_tensor * b, // data
int s0, // stride dimension 0
int s1, // stride dimension 1
int p0, // padding dimension 0
int p1, // padding dimension 1
int d0, // dilation dimension 0
int d1); // dilation dimension 1
GGML_API struct ggml_tensor * ggml_conv_1d(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * a, // convolution kernel
struct ggml_tensor * b, // data
int s0, // stride
int p0, // padding
int d0); // dilation
@ -1618,29 +1635,29 @@ extern "C" {
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int s,
int d);
struct ggml_tensor * a, // convolution kernel
struct ggml_tensor * b, // data
int s, // stride
int d); // dilation
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int s0,
int p0,
int d0);
struct ggml_tensor * a, // convolution kernel
struct ggml_tensor * b, // data
int s0, // stride
int p0, // padding
int d0); // dilation
GGML_API struct ggml_tensor * ggml_conv_2d(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int s0,
int s1,
int p0,
int p1,
int d0,
int d1);
struct ggml_tensor * a, // convolution kernel
struct ggml_tensor * b, // data
int s0, // stride dimension 0
int s1, // stride dimension 1
int p0, // padding dimension 0
int p1, // padding dimension 1
int d0, // dilation dimension 0
int d1); // dilation dimension 1
// kernel size is a->ne[0] x a->ne[1]
@ -1702,6 +1719,18 @@ extern "C" {
float p0,
float p1);
GGML_API struct ggml_tensor * ggml_pool_2d_back(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * af, // "a"/input used in forward pass
enum ggml_op_pool op,
int k0,
int k1,
int s0,
int s1,
float p0,
float p1);
// nearest interpolate
// multiplies ne0 and ne1 by scale factor
// used in stable-diffusion

View File

@ -2801,9 +2801,11 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"CLAMP",
"CONV_TRANSPOSE_1D",
"IM2COL",
"IM2COL_BACK",
"CONV_TRANSPOSE_2D",
"POOL_1D",
"POOL_2D",
"POOL_2D_BACK",
"UPSCALE",
"PAD",
"ARANGE",
@ -2837,7 +2839,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"CROSS_ENTROPY_LOSS_BACK",
};
static_assert(GGML_OP_COUNT == 76, "GGML_OP_COUNT != 76");
static_assert(GGML_OP_COUNT == 78, "GGML_OP_COUNT != 78");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
@ -2891,9 +2893,11 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"clamp(x)",
"conv_transpose_1d(x)",
"im2col(x)",
"im2col_back(x)",
"conv_transpose_2d(x)",
"pool_1d(x)",
"pool_2d(x)",
"pool_2d_back(x)",
"upscale(x)",
"pad(x)",
"arange(start, stop, step)",
@ -2927,7 +2931,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"cross_entropy_loss_back(x,y)",
};
static_assert(GGML_OP_COUNT == 76, "GGML_OP_COUNT != 76");
static_assert(GGML_OP_COUNT == 78, "GGML_OP_COUNT != 78");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
@ -3741,6 +3745,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
size_t data_size = ggml_row_size(type, ne[0]);
for (int i = 1; i < n_dims; i++) {
assert(ne[i] > 0);
data_size *= ne[i];
}
@ -3773,6 +3778,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
}
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
GGML_ASSERT(obj_new);
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
@ -4492,8 +4498,6 @@ static struct ggml_tensor * ggml_add_impl(
bool is_node = false;
if (!inplace && (a->grad || b->grad)) {
// TODO: support backward pass for broadcasting
GGML_ASSERT(ggml_are_same_shape(a, b));
is_node = true;
}
@ -6801,17 +6805,20 @@ struct ggml_tensor * ggml_im2col(
GGML_ASSERT(a->ne[2] == b->ne[2]);
} else {
GGML_ASSERT(a->ne[1] == b->ne[1]);
GGML_ASSERT(b->ne[3] == 1);
}
bool is_node = false;
if (a->grad || b->grad) {
GGML_ABORT("fatal error"); // TODO: implement backward
if (/*a->grad ||*/ b->grad) { // a is only used for its shape, not its data
is_node = true;
}
const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
GGML_ASSERT((OW > 0) && "b too small compared to a");
const int64_t ne[4] = {
is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
OW,
@ -6831,6 +6838,37 @@ struct ggml_tensor * ggml_im2col(
return result;
}
struct ggml_tensor * ggml_im2col_back(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int64_t * ne,
int s0,
int s1,
int p0,
int p1,
int d0,
int d1,
bool is_2D) {
bool is_node = false;
if (/*a->grad ||*/ b->grad) { // a is only used for its shape, not its data
is_node = true;
}
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_IM2COL_BACK;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
result->src[1] = b;
return result;
}
// a: [OCIC, KH, KW]
// b: [N, IC, IH, IW]
// result: [N, OC, OH, OW]
@ -6844,7 +6882,7 @@ struct ggml_tensor * ggml_conv_2d(
int p1,
int d0,
int d1) {
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N, OH, OW, IC * KH * KW]
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
struct ggml_tensor * result =
ggml_mul_mat(ctx,
@ -6970,17 +7008,17 @@ struct ggml_tensor * ggml_pool_2d(
bool is_node = false;
if (a->grad) {
GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
struct ggml_tensor * result;
const int64_t ne[3] = {
const int64_t ne[4] = {
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
a->ne[2],
a->ne[3],
};
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
ggml_set_op_params(result, params, sizeof(params));
@ -6991,6 +7029,37 @@ struct ggml_tensor * ggml_pool_2d(
return result;
}
struct ggml_tensor * ggml_pool_2d_back(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * af,
enum ggml_op_pool op,
int k0,
int k1,
int s0,
int s1,
float p0,
float p1) {
bool is_node = false;
if (a->grad) {
is_node = true;
}
struct ggml_tensor * result;
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_POOL_2D_BACK;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
result->src[1] = af;
return result;
}
// ggml_upscale
static struct ggml_tensor * ggml_upscale_impl(
@ -14714,6 +14783,7 @@ static void ggml_compute_forward_conv_transpose_1d(
}
}
// ggml_compute_forward_im2col_f32
// src0: kernel [OC, IC, KH, KW]
// src1: image [N, IC, IH, IW]
// dst: result [N, OH, OW, IC*KH*KW]
@ -14724,7 +14794,6 @@ static void ggml_compute_forward_im2col_f32(
const struct ggml_tensor * src0 = dst->src[0];
const struct ggml_tensor * src1 = dst->src[1];
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
@ -14755,7 +14824,6 @@ static void ggml_compute_forward_im2col_f32(
int ofs0 = is_2D ? nb13 : nb12;
int ofs1 = is_2D ? nb12 : nb11;
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb10 == sizeof(float));
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
@ -14791,6 +14859,7 @@ static void ggml_compute_forward_im2col_f32(
}
// ggml_compute_forward_im2col_f16
// src0: kernel [OC, IC, KH, KW]
// src1: image [N, IC, IH, IW]
// dst: result [N, OH, OW, IC*KH*KW]
@ -14886,6 +14955,99 @@ static void ggml_compute_forward_im2col(
}
}
// ggml_compute_forward_im2col_back_f32
static void ggml_compute_forward_im2col_back_f32(
const struct ggml_compute_params * params,
struct ggml_tensor * dst) {
const struct ggml_tensor * src0 = dst->src[0];
const struct ggml_tensor * src1 = dst->src[1];
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
GGML_TENSOR_BINARY_OP_LOCALS;
const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
const int ith = params->ith;
const int nth = params->nth;
const int64_t N = is_2D ? ne3 : ne2;
const int64_t IC = is_2D ? ne2 : ne1;
const int64_t IH = is_2D ? ne1 : 1;
const int64_t IW = ne0;
const int64_t KH = is_2D ? ne01 : 1;
const int64_t KW = ne00;
const int64_t OH = is_2D ? ne12 : 1;
const int64_t OW = ne11;
int ofs0 = is_2D ? nb3 : nb2;
int ofs1 = is_2D ? nb2 : nb1;
GGML_ASSERT(nb0 == sizeof(float));
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
{
float * const wdata = (float *) dst->data;
for (int64_t in = 0; in < N; in++) {
for (int64_t iic = ith; iic < IC; iic += nth) {
for (int64_t iih = 0; iih < IH; iih++) {
for (int64_t iiw = 0; iiw < IW; iiw++) {
// micro kernel
float grad = 0.0f;
for (int64_t ikh = 0; ikh < KH; ikh++) {
for (int64_t ikw = 0; ikw < KW; ikw++) {
// For s0 > 1 some values were skipped over in the forward pass.
// These values have tmpw % s0 != 0 and need to be skipped in the backwards pass as well.
const int64_t tmpw = (iiw + p0 - ikw*d0);
if (tmpw % s0 != 0) {
continue;
}
const int64_t iow = tmpw / s0;
// Equivalent logic as above except for s1.
int64_t ioh;
if (is_2D) {
const int64_t tmph = iih + p1 - ikh*d1;
if (tmph % s1 != 0) {
continue;
}
ioh = tmph / s1;
} else {
ioh = 0;
}
if (iow < 0 || iow >= OW || ioh < 0 || ioh >= OH) {
continue;
}
const float * const src_data = (const float *) src1->data
+ (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
grad += src_data[iic*(KH*KW) + ikh*KW + ikw];
}
}
float * dst_data = (float *)((char *) wdata + (in*ofs0 + iic*ofs1)); // [IH, IW]
dst_data[iih*IW + iiw] = grad;
}
}
}
}
}
}
// ggml_compute_forward_conv_transpose_2d
@ -15128,6 +15290,128 @@ static void ggml_compute_forward_pool_2d(
}
}
// ggml_compute_forward_pool_2d_back
static void ggml_compute_forward_pool_2d_back(
const struct ggml_compute_params * params,
struct ggml_tensor * dst) {
const struct ggml_tensor * src = dst->src[0];
const struct ggml_tensor * dstf = dst->src[1]; // forward tensor of dst
assert(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
if (params->ith != 0) {
return;
}
const int32_t * opts = (const int32_t *)dst->op_params;
enum ggml_op_pool op = opts[0];
const int k0 = opts[1];
const int k1 = opts[2];
const int s0 = opts[3];
const int s1 = opts[4];
const int p0 = opts[5];
const int p1 = opts[6];
char * cdata = (char *) dst->data;
const char * cdataf = (const char *) dstf->data;
const char * const data_end = cdata + ggml_nbytes(dst);
GGML_ASSERT(params->ith == 0);
memset(cdata, 0, ggml_nbytes(dst));
const int64_t px = src->ne[0];
const int64_t py = src->ne[1];
const int64_t pa = px * py;
const float * splane = (const float *) src->data;
const int ka = k0 * k1;
const int offset0 = -p0;
const int offset1 = -p1;
while (cdata < data_end) {
for (int oy = 0; oy < py; ++oy) {
const float * const srow = splane + oy * px;
for (int ox = 0; ox < px; ++ox) {
const float grad0 = srow[ox];
const int ix = offset0 + ox * s0;
const int iy = offset1 + oy * s1;
if (op == GGML_OP_POOL_MAX) {
float maxval = -FLT_MAX;
int kxmax = -1;
int kymax = -1;
for (int ky = 0; ky < k1; ++ky) {
if (iy + ky < 0 || iy + ky >= dst->ne[1]) {
continue;
}
const void * drowf = (const void *)(cdataf + dst->nb[1] * (iy + ky));
for (int kx = 0; kx < k0; ++kx) {
int j = ix + kx;
if (j < 0 || j >= dst->ne[0]) {
continue;
}
const float val = dst->type == GGML_TYPE_F32 ?
((const float *) drowf)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t *) drowf)[j]);
if (val <= maxval) {
continue;
}
maxval = val;
kxmax = kx;
kymax = ky;
}
}
if (kxmax == -1 || kymax == -1) {
continue;
}
void * drow = (void *)(cdata + dst->nb[1] * (iy + kymax));
const int j = ix + kxmax;
if (dst->type == GGML_TYPE_F32) {
((float *) drow)[j] += grad0;
} else {
((ggml_fp16_t *) drow)[j] = GGML_FP32_TO_FP16(grad0 + GGML_FP16_TO_FP32(((const ggml_fp16_t *) drow)[j]));
}
} else if (op == GGML_OP_POOL_AVG) {
const float grad = grad0 / ka;
for (int ky = 0; ky < k1; ++ky) {
if (iy + ky < 0 || iy + ky >= dst->ne[1]) {
continue;
}
void * drow = (void *)(cdata + dst->nb[1] * (iy + ky));
for (int kx = 0; kx < k0; ++kx) {
int j = ix + kx;
if (j < 0 || j >= dst->ne[0]) {
continue;
}
if (dst->type == GGML_TYPE_F32) {
((float *) drow)[j] += grad;
} else {
((ggml_fp16_t *) drow)[j] += GGML_FP32_TO_FP16(grad);
}
}
}
} else {
GGML_ASSERT(false);
}
}
}
cdata += dst->nb[2];
cdataf += dst->nb[2];
splane += pa;
}
}
// ggml_compute_forward_upscale
static void ggml_compute_forward_upscale_f32(
@ -17097,6 +17381,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
{
ggml_compute_forward_im2col(params, tensor);
} break;
case GGML_OP_IM2COL_BACK:
{
ggml_compute_forward_im2col_back_f32(params, tensor);
} break;
case GGML_OP_CONV_TRANSPOSE_2D:
{
ggml_compute_forward_conv_transpose_2d(params, tensor);
@ -17109,6 +17397,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
{
ggml_compute_forward_pool_2d(params, tensor);
} break;
case GGML_OP_POOL_2D_BACK:
{
ggml_compute_forward_pool_2d_back(params, tensor);
} break;
case GGML_OP_UPSCALE:
{
ggml_compute_forward_upscale(params, tensor);
@ -17477,7 +17769,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table);
}
if (src1->grad) {
src1->grad = ggml_add_or_set(ctx, src1->grad, tensor->grad, zero_table);
if (ggml_are_same_shape(src0, src1)) {
src1->grad = ggml_add_or_set(ctx, src1->grad, tensor->grad, zero_table);
} else {
src1->grad = ggml_add_or_set(ctx, src1->grad, ggml_repeat_back(ctx, tensor->grad, src1), zero_table);
}
}
} break;
case GGML_OP_ADD1:
@ -18074,6 +18370,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
GGML_ABORT("fatal error"); // TODO: not implemented
}
case GGML_OP_IM2COL:
{
if (src1->grad) {
const int32_t s0 = ggml_get_op_params_i32(tensor, 0);
const int32_t s1 = ggml_get_op_params_i32(tensor, 1);
const int32_t p0 = ggml_get_op_params_i32(tensor, 2);
const int32_t p1 = ggml_get_op_params_i32(tensor, 3);
const int32_t d0 = ggml_get_op_params_i32(tensor, 4);
const int32_t d1 = ggml_get_op_params_i32(tensor, 5);
const bool is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
src1->grad = ggml_add_or_set(ctx,
src1->grad,
ggml_im2col_back(ctx, src0, tensor->grad, src1->ne, s0, s1, p0, p1, d0, d1, is_2D),
zero_table);
}
} break;
case GGML_OP_IM2COL_BACK:
{
GGML_ABORT("fatal error"); // TODO: not implemented
}
@ -18086,6 +18399,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
GGML_ABORT("fatal error"); // TODO: not implemented
}
case GGML_OP_POOL_2D:
{
if (src0->grad) {
const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
const int32_t k0 = ggml_get_op_params_i32(tensor, 1);
const int32_t k1 = ggml_get_op_params_i32(tensor, 2);
const int32_t s0 = ggml_get_op_params_i32(tensor, 3);
const int32_t s1 = ggml_get_op_params_i32(tensor, 4);
const int32_t p0 = ggml_get_op_params_i32(tensor, 5);
const int32_t p1 = ggml_get_op_params_i32(tensor, 6);
src0->grad = ggml_add_or_set(ctx,
src0->grad,
ggml_pool_2d_back(ctx, tensor->grad, src0, op, k0, k1, s0, s1, p0, p1),
zero_table);
}
} break;
case GGML_OP_POOL_2D_BACK:
{
GGML_ABORT("fatal error"); // TODO: not implemented
}
@ -18375,6 +18705,7 @@ void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor *
void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep) {
GGML_ASSERT(gf->n_nodes > 0);
GGML_ASSERT(gf->grads);
// if we are keeping the gradient graph, we have to detach the gradient nodes from the original graph
if (keep) {
@ -18802,6 +19133,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
} break;
case GGML_OP_IM2COL:
case GGML_OP_IM2COL_BACK:
case GGML_OP_CONV_TRANSPOSE_1D:
case GGML_OP_CONV_TRANSPOSE_2D:
{
@ -18809,6 +19141,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
} break;
case GGML_OP_POOL_1D:
case GGML_OP_POOL_2D:
case GGML_OP_POOL_2D_BACK:
{
n_tasks = 1;
} break;
@ -19322,9 +19655,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
const uint32_t type = tensor->type;
const uint32_t op = tensor->op;
const int32_t flags = tensor->flags;
fwrite(&type, sizeof(uint32_t), 1, fout);
fwrite(&op, sizeof(uint32_t), 1, fout);
fwrite(&flags, sizeof(int32_t), 1, fout);
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
const uint64_t ne = tensor->ne[j];
@ -19354,9 +19689,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
const uint32_t type = tensor->type;
const uint32_t op = tensor->op;
const int32_t flags = tensor->flags;
fwrite(&type, sizeof(uint32_t), 1, fout);
fwrite(&op, sizeof(uint32_t), 1, fout);
fwrite(&flags, sizeof(int32_t), 1, fout);
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
const uint64_t ne = tensor->ne[j];
@ -19415,6 +19752,14 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
}
}
}
// dump the data
// TODO: pad this to 32 byte boundary
if ((flags & GGML_TENSOR_FLAG_PARAM)) {
const size_t size = ggml_nbytes(tensor);
fwrite(tensor->data, sizeof(char), size, fout);
}
}
}
@ -19528,10 +19873,12 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
{
uint32_t type;
uint32_t op;
int32_t flags;
for (uint32_t i = 0; i < n_leafs; ++i) {
type = *(const uint32_t *) ptr; ptr += sizeof(type);
op = *(const uint32_t *) ptr; ptr += sizeof(op);
flags = *(const int32_t *) ptr; ptr += sizeof(flags);
int64_t ne[GGML_MAX_DIMS];
size_t nb[GGML_MAX_DIMS];
@ -19549,20 +19896,19 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne);
tensor->op = (enum ggml_op) op;
tensor->op = (enum ggml_op) op;
tensor->flags = flags;
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
tensor->data = (void *) ptr;
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
tensor->nb[j] = nb[j];
}
result->leafs[i] = tensor;
tensor->data = (void *) ptr; ptr += ggml_nbytes(tensor);
ptr += ggml_nbytes(tensor);
result->leafs[i] = tensor;
fprintf(stderr, "%s: loaded leaf %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
}
@ -19574,10 +19920,12 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
{
uint32_t type;
uint32_t op;
int32_t flags;
for (uint32_t i = 0; i < n_nodes; ++i) {
type = *(const uint32_t *) ptr; ptr += sizeof(type);
op = *(const uint32_t *) ptr; ptr += sizeof(op);
flags = *(const int32_t *) ptr; ptr += sizeof(flags);
enum ggml_op eop = (enum ggml_op) op;
@ -19667,6 +20015,11 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
result->nodes[i] = tensor;
// TODO tensor data is be duplicated due to ggml_new_tensor call above
if (flags & GGML_TENSOR_FLAG_PARAM) {
tensor->data = (void *) ptr; ptr += ggml_nbytes(tensor);
}
fprintf(stderr, "%s: loaded node %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
}
}
@ -20701,6 +21054,8 @@ enum ggml_opt_result ggml_opt(
struct ggml_context * ctx,
struct ggml_opt_params params,
struct ggml_tensor * f) {
GGML_ASSERT(f->grad && "ggml_set_param called for at least one parent tensor.");
bool free_ctx = false;
if (ctx == NULL) {
struct ggml_init_params params_ctx = {
@ -20755,6 +21110,8 @@ enum ggml_opt_result ggml_opt_resume_g(
ggml_opt_callback callback,
void * callback_data) {
GGML_ASSERT(f->grad && "ggml_set_param must be called for at least one ancestor");
// build forward + backward compute graphs
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
@ -21842,6 +22199,7 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
void gguf_add_tensor(
struct gguf_context * ctx,
const struct ggml_tensor * tensor) {
GGML_ASSERT(tensor);
if (gguf_find_tensor(ctx, tensor->name) != -1) {
GGML_ABORT("duplicated tensor name");
}