ggml-cpu: reduce asm calls for hsum (llama/14037)

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
Aaron Teo 2025-06-19 01:10:08 +08:00 committed by Georgi Gerganov
parent 34940abe53
commit 203451bcba

View File

@ -944,10 +944,8 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
for (int i = 0; i < offset; ++i) { \ for (int i = 0; i < offset; ++i) { \
x[i] = vec_add(x[i], x[offset + i]); \ x[i] = vec_add(x[i], x[offset + i]); \
} \ } \
res = vec_extract(x[0], 0) + \ float32x4_t tmp = x[0] + vec_reve(x[0]); \
vec_extract(x[0], 1) + \ res = tmp[0] + tmp[1]; \
vec_extract(x[0], 2) + \
vec_extract(x[0], 3); \
} }
#define GGML_F32_VEC GGML_F32x4 #define GGML_F32_VEC GGML_F32x4