mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-01-30 17:58:40 +01:00
examples: add MNIST training + missing ops
This commit is contained in:
parent
d2986f8b07
commit
1fbd828a5d
@ -220,7 +220,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
||||
#define GGML_FILE_VERSION 1
|
||||
#define GGML_FILE_VERSION 2
|
||||
|
||||
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
|
||||
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
||||
@ -490,9 +490,11 @@ extern "C" {
|
||||
GGML_OP_CLAMP,
|
||||
GGML_OP_CONV_TRANSPOSE_1D,
|
||||
GGML_OP_IM2COL,
|
||||
GGML_OP_IM2COL_BACK,
|
||||
GGML_OP_CONV_TRANSPOSE_2D,
|
||||
GGML_OP_POOL_1D,
|
||||
GGML_OP_POOL_2D,
|
||||
GGML_OP_POOL_2D_BACK,
|
||||
GGML_OP_UPSCALE, // nearest interpolate
|
||||
GGML_OP_PAD,
|
||||
GGML_OP_ARANGE,
|
||||
@ -1582,34 +1584,49 @@ extern "C" {
|
||||
float min,
|
||||
float max);
|
||||
|
||||
// im2col
|
||||
// converts data into a format that effectively results in a convolution when combined with matrix multiplication
|
||||
GGML_API struct ggml_tensor * ggml_im2col(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int s0,
|
||||
int s1,
|
||||
int p0,
|
||||
int p1,
|
||||
int d0,
|
||||
int d1,
|
||||
bool is_2D,
|
||||
enum ggml_type dst_type);
|
||||
struct ggml_tensor * a, // convolution kernel
|
||||
struct ggml_tensor * b, // data
|
||||
int s0, // stride dimension 0
|
||||
int s1, // stride dimension 1
|
||||
int p0, // padding dimension 0
|
||||
int p1, // padding dimension 1
|
||||
int d0, // dilation dimension 0
|
||||
int d1, // dilation dimension 1
|
||||
bool is_2D,
|
||||
enum ggml_type dst_type);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_im2col_back(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a, // convolution kernel
|
||||
struct ggml_tensor * b, // gradient of im2col output
|
||||
int64_t * ne, // shape of im2col input
|
||||
int s0, // stride dimension 0
|
||||
int s1, // stride dimension 1
|
||||
int p0, // padding dimension 0
|
||||
int p1, // padding dimension 1
|
||||
int d0, // dilation dimension 0
|
||||
int d1, // dilation dimension 1
|
||||
bool is_2D);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int s0,
|
||||
int s1,
|
||||
int p0,
|
||||
int p1,
|
||||
int d0,
|
||||
int d1);
|
||||
struct ggml_tensor * a, // convolution kernel
|
||||
struct ggml_tensor * b, // data
|
||||
int s0, // stride dimension 0
|
||||
int s1, // stride dimension 1
|
||||
int p0, // padding dimension 0
|
||||
int p1, // padding dimension 1
|
||||
int d0, // dilation dimension 0
|
||||
int d1); // dilation dimension 1
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_conv_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * a, // convolution kernel
|
||||
struct ggml_tensor * b, // data
|
||||
int s0, // stride
|
||||
int p0, // padding
|
||||
int d0); // dilation
|
||||
@ -1618,29 +1635,29 @@ extern "C" {
|
||||
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
||||
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int s,
|
||||
int d);
|
||||
struct ggml_tensor * a, // convolution kernel
|
||||
struct ggml_tensor * b, // data
|
||||
int s, // stride
|
||||
int d); // dilation
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int s0,
|
||||
int p0,
|
||||
int d0);
|
||||
struct ggml_tensor * a, // convolution kernel
|
||||
struct ggml_tensor * b, // data
|
||||
int s0, // stride
|
||||
int p0, // padding
|
||||
int d0); // dilation
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_conv_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int s0,
|
||||
int s1,
|
||||
int p0,
|
||||
int p1,
|
||||
int d0,
|
||||
int d1);
|
||||
struct ggml_tensor * a, // convolution kernel
|
||||
struct ggml_tensor * b, // data
|
||||
int s0, // stride dimension 0
|
||||
int s1, // stride dimension 1
|
||||
int p0, // padding dimension 0
|
||||
int p1, // padding dimension 1
|
||||
int d0, // dilation dimension 0
|
||||
int d1); // dilation dimension 1
|
||||
|
||||
|
||||
// kernel size is a->ne[0] x a->ne[1]
|
||||
@ -1702,6 +1719,18 @@ extern "C" {
|
||||
float p0,
|
||||
float p1);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_pool_2d_back(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * af, // "a"/input used in forward pass
|
||||
enum ggml_op_pool op,
|
||||
int k0,
|
||||
int k1,
|
||||
int s0,
|
||||
int s1,
|
||||
float p0,
|
||||
float p1);
|
||||
|
||||
// nearest interpolate
|
||||
// multiplies ne0 and ne1 by scale factor
|
||||
// used in stable-diffusion
|
||||
|
394
ggml/src/ggml.c
394
ggml/src/ggml.c
@ -2801,9 +2801,11 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||
"CLAMP",
|
||||
"CONV_TRANSPOSE_1D",
|
||||
"IM2COL",
|
||||
"IM2COL_BACK",
|
||||
"CONV_TRANSPOSE_2D",
|
||||
"POOL_1D",
|
||||
"POOL_2D",
|
||||
"POOL_2D_BACK",
|
||||
"UPSCALE",
|
||||
"PAD",
|
||||
"ARANGE",
|
||||
@ -2837,7 +2839,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||
"CROSS_ENTROPY_LOSS_BACK",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 76, "GGML_OP_COUNT != 76");
|
||||
static_assert(GGML_OP_COUNT == 78, "GGML_OP_COUNT != 78");
|
||||
|
||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"none",
|
||||
@ -2891,9 +2893,11 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"clamp(x)",
|
||||
"conv_transpose_1d(x)",
|
||||
"im2col(x)",
|
||||
"im2col_back(x)",
|
||||
"conv_transpose_2d(x)",
|
||||
"pool_1d(x)",
|
||||
"pool_2d(x)",
|
||||
"pool_2d_back(x)",
|
||||
"upscale(x)",
|
||||
"pad(x)",
|
||||
"arange(start, stop, step)",
|
||||
@ -2927,7 +2931,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"cross_entropy_loss_back(x,y)",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 76, "GGML_OP_COUNT != 76");
|
||||
static_assert(GGML_OP_COUNT == 78, "GGML_OP_COUNT != 78");
|
||||
|
||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
||||
|
||||
@ -3741,6 +3745,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
||||
|
||||
size_t data_size = ggml_row_size(type, ne[0]);
|
||||
for (int i = 1; i < n_dims; i++) {
|
||||
assert(ne[i] > 0);
|
||||
data_size *= ne[i];
|
||||
}
|
||||
|
||||
@ -3773,6 +3778,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
||||
}
|
||||
|
||||
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
||||
GGML_ASSERT(obj_new);
|
||||
|
||||
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
||||
|
||||
@ -4492,8 +4498,6 @@ static struct ggml_tensor * ggml_add_impl(
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && (a->grad || b->grad)) {
|
||||
// TODO: support backward pass for broadcasting
|
||||
GGML_ASSERT(ggml_are_same_shape(a, b));
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
@ -6801,17 +6805,20 @@ struct ggml_tensor * ggml_im2col(
|
||||
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
||||
} else {
|
||||
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
||||
GGML_ASSERT(b->ne[3] == 1);
|
||||
}
|
||||
bool is_node = false;
|
||||
|
||||
if (a->grad || b->grad) {
|
||||
GGML_ABORT("fatal error"); // TODO: implement backward
|
||||
if (/*a->grad ||*/ b->grad) { // a is only used for its shape, not its data
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
|
||||
const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
||||
|
||||
GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
|
||||
GGML_ASSERT((OW > 0) && "b too small compared to a");
|
||||
|
||||
const int64_t ne[4] = {
|
||||
is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
|
||||
OW,
|
||||
@ -6831,6 +6838,37 @@ struct ggml_tensor * ggml_im2col(
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_im2col_back(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int64_t * ne,
|
||||
int s0,
|
||||
int s1,
|
||||
int p0,
|
||||
int p1,
|
||||
int d0,
|
||||
int d1,
|
||||
bool is_2D) {
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (/*a->grad ||*/ b->grad) { // a is only used for its shape, not its data
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_IM2COL_BACK;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// a: [OC,IC, KH, KW]
|
||||
// b: [N, IC, IH, IW]
|
||||
// result: [N, OC, OH, OW]
|
||||
@ -6844,7 +6882,7 @@ struct ggml_tensor * ggml_conv_2d(
|
||||
int p1,
|
||||
int d0,
|
||||
int d1) {
|
||||
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N, OH, OW, IC * KH * KW]
|
||||
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
|
||||
|
||||
struct ggml_tensor * result =
|
||||
ggml_mul_mat(ctx,
|
||||
@ -6970,17 +7008,17 @@ struct ggml_tensor * ggml_pool_2d(
|
||||
bool is_node = false;
|
||||
|
||||
if (a->grad) {
|
||||
GGML_ABORT("fatal error"); // TODO: implement backward
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result;
|
||||
const int64_t ne[3] = {
|
||||
const int64_t ne[4] = {
|
||||
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
||||
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
|
||||
a->ne[2],
|
||||
a->ne[3],
|
||||
};
|
||||
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
|
||||
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
@ -6991,6 +7029,37 @@ struct ggml_tensor * ggml_pool_2d(
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_pool_2d_back(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * af,
|
||||
enum ggml_op_pool op,
|
||||
int k0,
|
||||
int k1,
|
||||
int s0,
|
||||
int s1,
|
||||
float p0,
|
||||
float p1) {
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (a->grad) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result;
|
||||
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
|
||||
|
||||
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_POOL_2D_BACK;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = af;
|
||||
return result;
|
||||
}
|
||||
|
||||
// ggml_upscale
|
||||
|
||||
static struct ggml_tensor * ggml_upscale_impl(
|
||||
@ -14714,6 +14783,7 @@ static void ggml_compute_forward_conv_transpose_1d(
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_im2col_f32
|
||||
// src0: kernel [OC, IC, KH, KW]
|
||||
// src1: image [N, IC, IH, IW]
|
||||
// dst: result [N, OH, OW, IC*KH*KW]
|
||||
@ -14724,7 +14794,6 @@ static void ggml_compute_forward_im2col_f32(
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
const struct ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
|
||||
@ -14755,7 +14824,6 @@ static void ggml_compute_forward_im2col_f32(
|
||||
int ofs0 = is_2D ? nb13 : nb12;
|
||||
int ofs1 = is_2D ? nb12 : nb11;
|
||||
|
||||
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
||||
GGML_ASSERT(nb10 == sizeof(float));
|
||||
|
||||
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
||||
@ -14791,6 +14859,7 @@ static void ggml_compute_forward_im2col_f32(
|
||||
}
|
||||
|
||||
|
||||
// ggml_compute_forward_im2col_f16
|
||||
// src0: kernel [OC, IC, KH, KW]
|
||||
// src1: image [N, IC, IH, IW]
|
||||
// dst: result [N, OH, OW, IC*KH*KW]
|
||||
@ -14886,6 +14955,99 @@ static void ggml_compute_forward_im2col(
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_im2col_back_f32
|
||||
|
||||
static void ggml_compute_forward_im2col_back_f32(
|
||||
const struct ggml_compute_params * params,
|
||||
struct ggml_tensor * dst) {
|
||||
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
const struct ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
|
||||
const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
|
||||
const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
|
||||
const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
|
||||
const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
|
||||
const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
|
||||
const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
|
||||
const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
const int64_t N = is_2D ? ne3 : ne2;
|
||||
const int64_t IC = is_2D ? ne2 : ne1;
|
||||
const int64_t IH = is_2D ? ne1 : 1;
|
||||
const int64_t IW = ne0;
|
||||
|
||||
const int64_t KH = is_2D ? ne01 : 1;
|
||||
const int64_t KW = ne00;
|
||||
|
||||
const int64_t OH = is_2D ? ne12 : 1;
|
||||
const int64_t OW = ne11;
|
||||
|
||||
int ofs0 = is_2D ? nb3 : nb2;
|
||||
int ofs1 = is_2D ? nb2 : nb1;
|
||||
|
||||
GGML_ASSERT(nb0 == sizeof(float));
|
||||
|
||||
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
||||
{
|
||||
float * const wdata = (float *) dst->data;
|
||||
|
||||
for (int64_t in = 0; in < N; in++) {
|
||||
for (int64_t iic = ith; iic < IC; iic += nth) {
|
||||
for (int64_t iih = 0; iih < IH; iih++) {
|
||||
for (int64_t iiw = 0; iiw < IW; iiw++) {
|
||||
|
||||
// micro kernel
|
||||
float grad = 0.0f;
|
||||
for (int64_t ikh = 0; ikh < KH; ikh++) {
|
||||
for (int64_t ikw = 0; ikw < KW; ikw++) {
|
||||
// For s0 > 1 some values were skipped over in the forward pass.
|
||||
// These values have tmpw % s0 != 0 and need to be skipped in the backwards pass as well.
|
||||
const int64_t tmpw = (iiw + p0 - ikw*d0);
|
||||
if (tmpw % s0 != 0) {
|
||||
continue;
|
||||
}
|
||||
const int64_t iow = tmpw / s0;
|
||||
|
||||
// Equivalent logic as above except for s1.
|
||||
int64_t ioh;
|
||||
if (is_2D) {
|
||||
const int64_t tmph = iih + p1 - ikh*d1;
|
||||
|
||||
if (tmph % s1 != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ioh = tmph / s1;
|
||||
} else {
|
||||
ioh = 0;
|
||||
}
|
||||
|
||||
if (iow < 0 || iow >= OW || ioh < 0 || ioh >= OH) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const float * const src_data = (const float *) src1->data
|
||||
+ (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
|
||||
grad += src_data[iic*(KH*KW) + ikh*KW + ikw];
|
||||
}
|
||||
}
|
||||
float * dst_data = (float *)((char *) wdata + (in*ofs0 + iic*ofs1)); // [IH, IW]
|
||||
dst_data[iih*IW + iiw] = grad;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_conv_transpose_2d
|
||||
|
||||
@ -15128,6 +15290,128 @@ static void ggml_compute_forward_pool_2d(
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_pool_2d_back
|
||||
|
||||
static void ggml_compute_forward_pool_2d_back(
|
||||
const struct ggml_compute_params * params,
|
||||
struct ggml_tensor * dst) {
|
||||
|
||||
const struct ggml_tensor * src = dst->src[0];
|
||||
const struct ggml_tensor * dstf = dst->src[1]; // forward tensor of dst
|
||||
|
||||
assert(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
|
||||
|
||||
if (params->ith != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int32_t * opts = (const int32_t *)dst->op_params;
|
||||
enum ggml_op_pool op = opts[0];
|
||||
const int k0 = opts[1];
|
||||
const int k1 = opts[2];
|
||||
const int s0 = opts[3];
|
||||
const int s1 = opts[4];
|
||||
const int p0 = opts[5];
|
||||
const int p1 = opts[6];
|
||||
|
||||
char * cdata = (char *) dst->data;
|
||||
const char * cdataf = (const char *) dstf->data;
|
||||
const char * const data_end = cdata + ggml_nbytes(dst);
|
||||
|
||||
GGML_ASSERT(params->ith == 0);
|
||||
memset(cdata, 0, ggml_nbytes(dst));
|
||||
|
||||
const int64_t px = src->ne[0];
|
||||
const int64_t py = src->ne[1];
|
||||
const int64_t pa = px * py;
|
||||
|
||||
const float * splane = (const float *) src->data;
|
||||
|
||||
const int ka = k0 * k1;
|
||||
const int offset0 = -p0;
|
||||
const int offset1 = -p1;
|
||||
|
||||
while (cdata < data_end) {
|
||||
for (int oy = 0; oy < py; ++oy) {
|
||||
const float * const srow = splane + oy * px;
|
||||
for (int ox = 0; ox < px; ++ox) {
|
||||
const float grad0 = srow[ox];
|
||||
|
||||
const int ix = offset0 + ox * s0;
|
||||
const int iy = offset1 + oy * s1;
|
||||
|
||||
if (op == GGML_OP_POOL_MAX) {
|
||||
float maxval = -FLT_MAX;
|
||||
int kxmax = -1;
|
||||
int kymax = -1;
|
||||
|
||||
for (int ky = 0; ky < k1; ++ky) {
|
||||
if (iy + ky < 0 || iy + ky >= dst->ne[1]) {
|
||||
continue;
|
||||
}
|
||||
const void * drowf = (const void *)(cdataf + dst->nb[1] * (iy + ky));
|
||||
for (int kx = 0; kx < k0; ++kx) {
|
||||
int j = ix + kx;
|
||||
if (j < 0 || j >= dst->ne[0]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const float val = dst->type == GGML_TYPE_F32 ?
|
||||
((const float *) drowf)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t *) drowf)[j]);
|
||||
if (val <= maxval) {
|
||||
continue;
|
||||
}
|
||||
|
||||
maxval = val;
|
||||
kxmax = kx;
|
||||
kymax = ky;
|
||||
}
|
||||
}
|
||||
|
||||
if (kxmax == -1 || kymax == -1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
void * drow = (void *)(cdata + dst->nb[1] * (iy + kymax));
|
||||
const int j = ix + kxmax;
|
||||
if (dst->type == GGML_TYPE_F32) {
|
||||
((float *) drow)[j] += grad0;
|
||||
} else {
|
||||
((ggml_fp16_t *) drow)[j] = GGML_FP32_TO_FP16(grad0 + GGML_FP16_TO_FP32(((const ggml_fp16_t *) drow)[j]));
|
||||
}
|
||||
} else if (op == GGML_OP_POOL_AVG) {
|
||||
const float grad = grad0 / ka;
|
||||
|
||||
for (int ky = 0; ky < k1; ++ky) {
|
||||
if (iy + ky < 0 || iy + ky >= dst->ne[1]) {
|
||||
continue;
|
||||
}
|
||||
void * drow = (void *)(cdata + dst->nb[1] * (iy + ky));
|
||||
for (int kx = 0; kx < k0; ++kx) {
|
||||
int j = ix + kx;
|
||||
if (j < 0 || j >= dst->ne[0]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dst->type == GGML_TYPE_F32) {
|
||||
((float *) drow)[j] += grad;
|
||||
} else {
|
||||
((ggml_fp16_t *) drow)[j] += GGML_FP32_TO_FP16(grad);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cdata += dst->nb[2];
|
||||
cdataf += dst->nb[2];
|
||||
splane += pa;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_upscale
|
||||
|
||||
static void ggml_compute_forward_upscale_f32(
|
||||
@ -17097,6 +17381,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
||||
{
|
||||
ggml_compute_forward_im2col(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_IM2COL_BACK:
|
||||
{
|
||||
ggml_compute_forward_im2col_back_f32(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_CONV_TRANSPOSE_2D:
|
||||
{
|
||||
ggml_compute_forward_conv_transpose_2d(params, tensor);
|
||||
@ -17109,6 +17397,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
||||
{
|
||||
ggml_compute_forward_pool_2d(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_POOL_2D_BACK:
|
||||
{
|
||||
ggml_compute_forward_pool_2d_back(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_UPSCALE:
|
||||
{
|
||||
ggml_compute_forward_upscale(params, tensor);
|
||||
@ -17477,7 +17769,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
||||
src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table);
|
||||
}
|
||||
if (src1->grad) {
|
||||
src1->grad = ggml_add_or_set(ctx, src1->grad, tensor->grad, zero_table);
|
||||
if (ggml_are_same_shape(src0, src1)) {
|
||||
src1->grad = ggml_add_or_set(ctx, src1->grad, tensor->grad, zero_table);
|
||||
} else {
|
||||
src1->grad = ggml_add_or_set(ctx, src1->grad, ggml_repeat_back(ctx, tensor->grad, src1), zero_table);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_ADD1:
|
||||
@ -18074,6 +18370,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
||||
GGML_ABORT("fatal error"); // TODO: not implemented
|
||||
}
|
||||
case GGML_OP_IM2COL:
|
||||
{
|
||||
if (src1->grad) {
|
||||
const int32_t s0 = ggml_get_op_params_i32(tensor, 0);
|
||||
const int32_t s1 = ggml_get_op_params_i32(tensor, 1);
|
||||
const int32_t p0 = ggml_get_op_params_i32(tensor, 2);
|
||||
const int32_t p1 = ggml_get_op_params_i32(tensor, 3);
|
||||
const int32_t d0 = ggml_get_op_params_i32(tensor, 4);
|
||||
const int32_t d1 = ggml_get_op_params_i32(tensor, 5);
|
||||
const bool is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
|
||||
|
||||
src1->grad = ggml_add_or_set(ctx,
|
||||
src1->grad,
|
||||
ggml_im2col_back(ctx, src0, tensor->grad, src1->ne, s0, s1, p0, p1, d0, d1, is_2D),
|
||||
zero_table);
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_IM2COL_BACK:
|
||||
{
|
||||
GGML_ABORT("fatal error"); // TODO: not implemented
|
||||
}
|
||||
@ -18086,6 +18399,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
||||
GGML_ABORT("fatal error"); // TODO: not implemented
|
||||
}
|
||||
case GGML_OP_POOL_2D:
|
||||
{
|
||||
if (src0->grad) {
|
||||
const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
|
||||
const int32_t k0 = ggml_get_op_params_i32(tensor, 1);
|
||||
const int32_t k1 = ggml_get_op_params_i32(tensor, 2);
|
||||
const int32_t s0 = ggml_get_op_params_i32(tensor, 3);
|
||||
const int32_t s1 = ggml_get_op_params_i32(tensor, 4);
|
||||
const int32_t p0 = ggml_get_op_params_i32(tensor, 5);
|
||||
const int32_t p1 = ggml_get_op_params_i32(tensor, 6);
|
||||
|
||||
src0->grad = ggml_add_or_set(ctx,
|
||||
src0->grad,
|
||||
ggml_pool_2d_back(ctx, tensor->grad, src0, op, k0, k1, s0, s1, p0, p1),
|
||||
zero_table);
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_POOL_2D_BACK:
|
||||
{
|
||||
GGML_ABORT("fatal error"); // TODO: not implemented
|
||||
}
|
||||
@ -18375,6 +18705,7 @@ void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
||||
|
||||
void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep) {
|
||||
GGML_ASSERT(gf->n_nodes > 0);
|
||||
GGML_ASSERT(gf->grads);
|
||||
|
||||
// if we are keeping the gradient graph, we have to detach the gradient nodes from the original graph
|
||||
if (keep) {
|
||||
@ -18802,6 +19133,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
||||
n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
|
||||
} break;
|
||||
case GGML_OP_IM2COL:
|
||||
case GGML_OP_IM2COL_BACK:
|
||||
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||
case GGML_OP_CONV_TRANSPOSE_2D:
|
||||
{
|
||||
@ -18809,6 +19141,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
||||
} break;
|
||||
case GGML_OP_POOL_1D:
|
||||
case GGML_OP_POOL_2D:
|
||||
case GGML_OP_POOL_2D_BACK:
|
||||
{
|
||||
n_tasks = 1;
|
||||
} break;
|
||||
@ -19322,9 +19655,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
||||
|
||||
const uint32_t type = tensor->type;
|
||||
const uint32_t op = tensor->op;
|
||||
const int32_t flags = tensor->flags;
|
||||
|
||||
fwrite(&type, sizeof(uint32_t), 1, fout);
|
||||
fwrite(&op, sizeof(uint32_t), 1, fout);
|
||||
fwrite(&flags, sizeof(int32_t), 1, fout);
|
||||
|
||||
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
||||
const uint64_t ne = tensor->ne[j];
|
||||
@ -19354,9 +19689,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
||||
|
||||
const uint32_t type = tensor->type;
|
||||
const uint32_t op = tensor->op;
|
||||
const int32_t flags = tensor->flags;
|
||||
|
||||
fwrite(&type, sizeof(uint32_t), 1, fout);
|
||||
fwrite(&op, sizeof(uint32_t), 1, fout);
|
||||
fwrite(&flags, sizeof(int32_t), 1, fout);
|
||||
|
||||
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
||||
const uint64_t ne = tensor->ne[j];
|
||||
@ -19415,6 +19752,14 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// dump the data
|
||||
// TODO: pad this to 32 byte boundary
|
||||
if ((flags & GGML_TENSOR_FLAG_PARAM)) {
|
||||
const size_t size = ggml_nbytes(tensor);
|
||||
|
||||
fwrite(tensor->data, sizeof(char), size, fout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -19528,10 +19873,12 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
||||
{
|
||||
uint32_t type;
|
||||
uint32_t op;
|
||||
int32_t flags;
|
||||
|
||||
for (uint32_t i = 0; i < n_leafs; ++i) {
|
||||
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
||||
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
||||
flags = *(const int32_t *) ptr; ptr += sizeof(flags);
|
||||
|
||||
int64_t ne[GGML_MAX_DIMS];
|
||||
size_t nb[GGML_MAX_DIMS];
|
||||
@ -19549,20 +19896,19 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
||||
|
||||
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne);
|
||||
|
||||
tensor->op = (enum ggml_op) op;
|
||||
tensor->op = (enum ggml_op) op;
|
||||
tensor->flags = flags;
|
||||
|
||||
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
||||
memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
|
||||
|
||||
tensor->data = (void *) ptr;
|
||||
|
||||
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
||||
tensor->nb[j] = nb[j];
|
||||
}
|
||||
|
||||
result->leafs[i] = tensor;
|
||||
tensor->data = (void *) ptr; ptr += ggml_nbytes(tensor);
|
||||
|
||||
ptr += ggml_nbytes(tensor);
|
||||
result->leafs[i] = tensor;
|
||||
|
||||
fprintf(stderr, "%s: loaded leaf %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
||||
}
|
||||
@ -19574,10 +19920,12 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
||||
{
|
||||
uint32_t type;
|
||||
uint32_t op;
|
||||
int32_t flags;
|
||||
|
||||
for (uint32_t i = 0; i < n_nodes; ++i) {
|
||||
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
||||
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
||||
flags = *(const int32_t *) ptr; ptr += sizeof(flags);
|
||||
|
||||
enum ggml_op eop = (enum ggml_op) op;
|
||||
|
||||
@ -19667,6 +20015,11 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
||||
|
||||
result->nodes[i] = tensor;
|
||||
|
||||
// TODO tensor data is be duplicated due to ggml_new_tensor call above
|
||||
if (flags & GGML_TENSOR_FLAG_PARAM) {
|
||||
tensor->data = (void *) ptr; ptr += ggml_nbytes(tensor);
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: loaded node %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
||||
}
|
||||
}
|
||||
@ -20701,6 +21054,8 @@ enum ggml_opt_result ggml_opt(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_opt_params params,
|
||||
struct ggml_tensor * f) {
|
||||
GGML_ASSERT(f->grad && "ggml_set_param called for at least one parent tensor.");
|
||||
|
||||
bool free_ctx = false;
|
||||
if (ctx == NULL) {
|
||||
struct ggml_init_params params_ctx = {
|
||||
@ -20755,6 +21110,8 @@ enum ggml_opt_result ggml_opt_resume_g(
|
||||
ggml_opt_callback callback,
|
||||
void * callback_data) {
|
||||
|
||||
GGML_ASSERT(f->grad && "ggml_set_param must be called for at least one ancestor");
|
||||
|
||||
// build forward + backward compute graphs
|
||||
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
||||
|
||||
@ -21842,6 +22199,7 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
||||
void gguf_add_tensor(
|
||||
struct gguf_context * ctx,
|
||||
const struct ggml_tensor * tensor) {
|
||||
GGML_ASSERT(tensor);
|
||||
if (gguf_find_tensor(ctx, tensor->name) != -1) {
|
||||
GGML_ABORT("duplicated tensor name");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user