mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-04-21 17:58:30 +02:00
ggml : fix quantized cpy op (llama/12310)
* ggml : fix quantized cpy op ggml-ci * tests : add cpy tests for all types ggml-ci * tests : add BF16 copy tests ggml-ci * tests : fix loop for same-type copy ggml-ci * tests : add option to permute the dst tensor ggml-ci
This commit is contained in:
parent
d487a28ae1
commit
388ed98220
@ -3110,17 +3110,17 @@ static void ggml_compute_forward_dup_same_cont(
|
|||||||
const int ith = params->ith; // thread index
|
const int ith = params->ith; // thread index
|
||||||
const int nth = params->nth; // number of threads
|
const int nth = params->nth; // number of threads
|
||||||
|
|
||||||
// parallelize by elements
|
// parallelize by blocks
|
||||||
const int ne = ggml_nelements(dst);
|
const int nk = ggml_nelements(src0)/ggml_blck_size(src0->type);
|
||||||
const int dr = (ne + nth - 1) / nth;
|
const int dr = (nk + nth - 1) / nth;
|
||||||
const int ie0 = dr * ith;
|
const int k0 = dr * ith;
|
||||||
const int ie1 = MIN(ie0 + dr, ne);
|
const int k1 = MIN(k0 + dr, nk);
|
||||||
|
|
||||||
if (ie0 < ie1) {
|
if (k0 < k1) {
|
||||||
memcpy(
|
memcpy(
|
||||||
((char *) dst->data + ie0*nb0),
|
((char *) dst->data + k0*nb0),
|
||||||
((char *) src0->data + ie0*nb0),
|
((char *) src0->data + k0*nb0),
|
||||||
(ie1 - ie0) * nb0);
|
(k1 - k0) * nb0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4055,7 +4055,6 @@ static void ggml_compute_forward_dup_f32(
|
|||||||
static void ggml_compute_forward_dup_bytes(
|
static void ggml_compute_forward_dup_bytes(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
struct ggml_tensor * dst) {
|
struct ggml_tensor * dst) {
|
||||||
|
|
||||||
const struct ggml_tensor * src0 = dst->src[0];
|
const struct ggml_tensor * src0 = dst->src[0];
|
||||||
|
|
||||||
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
||||||
@ -4069,10 +4068,10 @@ static void ggml_compute_forward_dup_bytes(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const size_t type_size = ggml_type_size(src0->type);
|
const size_t type_size = ggml_type_size(src0->type);
|
||||||
|
|
||||||
const int ith = params->ith; // thread index
|
const int ith = params->ith; // thread index
|
||||||
const int nth = params->nth; // number of threads
|
const int nth = params->nth; // number of threads
|
||||||
|
|
||||||
|
|
||||||
// parallelize by rows
|
// parallelize by rows
|
||||||
const int nr = ne01;
|
const int nr = ne01;
|
||||||
// number of rows per thread
|
// number of rows per thread
|
||||||
@ -4082,10 +4081,10 @@ static void ggml_compute_forward_dup_bytes(
|
|||||||
const int ir1 = MIN(ir0 + dr, nr);
|
const int ir1 = MIN(ir0 + dr, nr);
|
||||||
|
|
||||||
if (src0->type == dst->type &&
|
if (src0->type == dst->type &&
|
||||||
ne00 == ne0 &&
|
ggml_are_same_shape(src0, dst) &&
|
||||||
nb00 == type_size && nb0 == type_size) {
|
nb00 == type_size && nb0 == type_size) {
|
||||||
// copy by rows
|
// copy by rows
|
||||||
const size_t rs = ne00 * type_size;
|
const size_t rs = ggml_row_size(src0->type, ne00);
|
||||||
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
||||||
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
||||||
for (int64_t i01 = ir0; i01 < ir1; i01++) {
|
for (int64_t i01 = ir0; i01 < ir1; i01++) {
|
||||||
@ -4140,17 +4139,20 @@ static void ggml_compute_forward_dup_bytes(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// dst counters
|
// dst counters
|
||||||
|
int64_t k10 = 0;
|
||||||
int64_t i10 = 0;
|
|
||||||
int64_t i11 = 0;
|
int64_t i11 = 0;
|
||||||
int64_t i12 = 0;
|
int64_t i12 = 0;
|
||||||
int64_t i13 = 0;
|
int64_t i13 = 0;
|
||||||
|
|
||||||
|
// number of blocks in a row
|
||||||
|
const int64_t nk00 = ne00 / ggml_blck_size(src0->type);
|
||||||
|
const int64_t nk0 = ne0 / ggml_blck_size(dst->type);
|
||||||
|
|
||||||
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
||||||
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
||||||
i10 += ne00 * ir0;
|
k10 += nk00 * ir0;
|
||||||
while (i10 >= ne0) {
|
while (k10 >= nk0) {
|
||||||
i10 -= ne0;
|
k10 -= nk0;
|
||||||
if (++i11 == ne1) {
|
if (++i11 == ne1) {
|
||||||
i11 = 0;
|
i11 = 0;
|
||||||
if (++i12 == ne2) {
|
if (++i12 == ne2) {
|
||||||
@ -4162,14 +4164,14 @@ static void ggml_compute_forward_dup_bytes(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int64_t i01 = ir0; i01 < ir1; i01++) {
|
for (int64_t i01 = ir0; i01 < ir1; i01++) {
|
||||||
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
for (int64_t k00 = 0; k00 < nk00; k00++) {
|
||||||
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
const char * src0_ptr = ((char *) src0->data + k00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
||||||
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
|
char * dst_ptr = ((char *) dst->data + k10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
|
||||||
|
|
||||||
memcpy(dst_ptr, src0_ptr, type_size);
|
memcpy(dst_ptr, src0_ptr, type_size);
|
||||||
|
|
||||||
if (++i10 == ne0) {
|
if (++k10 == nk0) {
|
||||||
i10 = 0;
|
k10 = 0;
|
||||||
if (++i11 == ne1) {
|
if (++i11 == ne1) {
|
||||||
i11 = 0;
|
i11 = 0;
|
||||||
if (++i12 == ne2) {
|
if (++i12 == ne2) {
|
||||||
@ -4182,9 +4184,9 @@ static void ggml_compute_forward_dup_bytes(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
i10 += ne00 * (ne01 - ir1);
|
k10 += nk00 * (ne01 - ir1);
|
||||||
while (i10 >= ne0) {
|
while (k10 >= nk0) {
|
||||||
i10 -= ne0;
|
k10 -= nk0;
|
||||||
if (++i11 == ne1) {
|
if (++i11 == ne1) {
|
||||||
i11 = 0;
|
i11 = 0;
|
||||||
if (++i12 == ne2) {
|
if (++i12 == ne2) {
|
||||||
@ -14308,7 +14310,9 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||||||
}
|
}
|
||||||
|
|
||||||
// extra_buffer op?
|
// extra_buffer op?
|
||||||
if (ggml_cpu_extra_compute_forward(params, tensor)) return;
|
if (ggml_cpu_extra_compute_forward(params, tensor)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
switch (tensor->op) {
|
switch (tensor->op) {
|
||||||
case GGML_OP_DUP:
|
case GGML_OP_DUP:
|
||||||
|
Loading…
Reference in New Issue
Block a user