diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index f4a06b36..9016314f 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -4187,30 +4187,25 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r acc_3 = __lsx_vfadd_s(p3_d, acc_3); } - *s = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3); - -#else - // scalar - float sumf = 0.0; - - for (int i = 0; i < nb; i++) { + sumf = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3); +#endif + for (; ib < nb; ++ib) { int sumi0 = 0; int sumi1 = 0; for (int j = 0; j < qk/2; ++j) { - const int v0 = (x[i].qs[j] & 0x0F) - 8; - const int v1 = (x[i].qs[j] >> 4) - 8; + const int v0 = (x[ib].qs[j] & 0x0F) - 8; + const int v1 = (x[ib].qs[j] >> 4) - 8; - sumi0 += (v0 * y[i].qs[j]); - sumi1 += (v1 * y[i].qs[j + qk/2]); + sumi0 += (v0 * y[ib].qs[j]); + sumi1 += (v1 * y[ib].qs[j + qk/2]); } int sumi = sumi0 + sumi1; - sumf += sumi*GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d); + sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d); } *s = sumf; -#endif } void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { @@ -4479,36 +4474,34 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r acc = __lasx_xvfmadd_s( d0d1, xy, acc ); } - *s = hsum_float_8(acc) + summs; - -#else - // scalar - float sumf = 0.0; - - for (int i = 0; i < nb; i++) { - int sumi0 = 0 + sumf = hsum_float_8(acc) + summs; +#endif + for (; ib < nb; ++ib) { + int sumi0 = 0; int sumi1 = 0; for (int j = 0; j < qk/2; ++j) { - const int v0 = (x[i].qs[j] & 0x0F); - const int v1 = (x[i].qs[j] >> 4); + const int v0 = (x[ib].qs[j] & 0x0F); + const int v1 = (x[ib].qs[j] >> 4); - sumi0 += (v0 * y[i].qs[j]); - sumi1 += (v1 * y[i].qs[j + qk/2]); + sumi0 += (v0 * y[ib].qs[j]); + sumi1 += (v1 * y[ib].qs[j + qk/2]); } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s); + sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); } *s = sumf; -#endif } void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { const int qk = QK8_0; const int nb = n / qk; + int ib = 0; + float sumf = 0; + assert(n % qk == 0); assert(qk == QK5_0); assert(nrc == 1); @@ -4830,15 +4823,11 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r acc = __lasx_xvfmadd_s(d, q, acc); } - *s = hsum_float_8(acc); - -#else - // scalar - float sumf = 0.0; - - for (int i = 0; i < nb; i++) { + sumf = hsum_float_8(acc); +#endif + for (; ib < nb; ++ib) { uint32_t qh; - memcpy(&qh, x[i].qh, sizeof(qh)); + memcpy(&qh, x[ib].qh, sizeof(qh)); int sumi0 = 0; int sumi1 = 0; @@ -4847,25 +4836,27 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4; const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12)); - const int32_t x0 = (int8_t)(((x[i].qs[j] & 0x0F) | xh_0) - 16); - const int32_t x1 = (int8_t)(((x[i].qs[j] >> 4) | xh_1) - 16); + const int32_t x0 = (int8_t)(((x[ib].qs[j] & 0x0F) | xh_0) - 16); + const int32_t x1 = (int8_t)(((x[ib].qs[j] >> 4) | xh_1) - 16); - sumi0 += (x0 * y[i].qs[j]); - sumi1 += (x1 * y[i].qs[j + qk/2]); + sumi0 += (x0 * y[ib].qs[j]); + sumi1 += (x1 * y[ib].qs[j + qk/2]); } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d)) * sumi; + sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)) * sumi; } *s = sumf; -#endif } void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { const int qk = QK8_1; const int nb = n / qk; + int ib = 0; + float sumf = 0; + assert(n % qk == 0); assert(qk == QK5_1); assert(nrc == 1); @@ -5188,33 +5179,29 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r float summs = 0.0f; // Main loop - for (int i = 0; i < nb; i++) { - const __m256 dx = __lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(x[i].d)); + for (; ib < nb; ++ib) { + const __m256 dx = __lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(x[ib].d)); - summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s); + summs += GGML_FP16_TO_FP32(x[ib].m) * GGML_FP16_TO_FP32(y[ib].s); - __m256i qx = bytes_from_nibbles_32(x[i].qs); - __m256i bxhi = bytes_from_bits_32(x[i].qh); + __m256i qx = bytes_from_nibbles_32(x[ib].qs); + __m256i bxhi = bytes_from_bits_32(x[ib].qh); bxhi = __lasx_xvand_v(bxhi, __lasx_xvreplgr2vr_b(0x10)); qx = __lasx_xvor_v(qx, bxhi); - const __m256 dy = __lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(y[i].d)); - const __m256i qy = __lasx_xvld((const __m256i *)y[i].qs, 0); + const __m256 dy = __lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(y[ib].d)); + const __m256i qy = __lasx_xvld((const __m256i *)y[ib].qs, 0); const __m256 q = mul_sum_us8_pairs_float(qx, qy); acc = __lasx_xvfmadd_s(q, __lasx_xvfmul_s(dx, dy), acc); } - *s = hsum_float_8(acc) + summs; - -#else - // scalar - float sumf = 0.0; - - for (int i = 0; i < nb; i++) { + sumf = hsum_float_8(acc) + summs; +#endif + for (; ib < nb; ++ib) { uint32_t qh; - memcpy(&qh, x[i].qh, sizeof(qh)); + memcpy(&qh, x[ib].qh, sizeof(qh)); int sumi0 = 0; int sumi1 = 0; @@ -5223,19 +5210,18 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10; const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10; - const int32_t x0 = (x[i].qs[j] & 0xF) | xh_0; - const int32_t x1 = (x[i].qs[j] >> 4) | xh_1; + const int32_t x0 = (x[ib].qs[j] & 0xF) | xh_0; + const int32_t x1 = (x[ib].qs[j] >> 4) | xh_1; - sumi0 += (x0 * y[i].qs[j]); - sumi1 += (x1 * y[i].qs[j + qk/2]); + sumi0 += (x0 * y[ib].qs[j]); + sumi1 += (x1 * y[ib].qs[j + qk/2]); } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s); + sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); } *s = sumf; -#endif } void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 93d2797b..c76d00a3 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -14765,21 +14765,21 @@ static void ggml_compute_forward_pool_1d_sk_p0( switch (op) { case GGML_OP_POOL_AVG: drow[i] = 0; break; case GGML_OP_POOL_MAX: drow[i] = -FLT_MAX; break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break; + case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } for (int ki = 0; ki < k; ++ki) { const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]); switch (op) { case GGML_OP_POOL_AVG: drow[i] += srow_j; break; case GGML_OP_POOL_MAX: if (srow_j > drow[i]) drow[i] = srow_j; break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break; + case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } ++j; } switch (op) { case GGML_OP_POOL_AVG: drow[i] /= k; break; case GGML_OP_POOL_MAX: break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break; + case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } } @@ -14848,7 +14848,7 @@ static void ggml_compute_forward_pool_2d( switch (op) { case GGML_OP_POOL_AVG: *out = 0; break; case GGML_OP_POOL_MAX: *out = -FLT_MAX; break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break; + case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } const int ix = offset0 + ox * s0; @@ -14864,14 +14864,14 @@ static void ggml_compute_forward_pool_2d( switch (op) { case GGML_OP_POOL_AVG: *out += srow_j; break; case GGML_OP_POOL_MAX: if (srow_j > *out) *out = srow_j; break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break; + case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } } } switch (op) { case GGML_OP_POOL_AVG: *out /= ka; break; case GGML_OP_POOL_MAX: break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break; + case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } } }