From 2bc64832993b175d200300b870f7a1a5aea21fcb Mon Sep 17 00:00:00 2001 From: slaren Date: Sat, 18 May 2024 02:39:54 +0200 Subject: [PATCH] ggml : fix quants nans when all the group weights are very close to zero (llama/7313) --- ggml-quants.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 9b291d52..7008e5d8 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -14,6 +14,12 @@ #include // for qsort #include // for GGML_ASSERT +#define GROUP_MAX_EPS 1e-15f +#define GROUP_MAX_EPS_IQ3_XXS 1e-8f +#define GROUP_MAX_EPS_IQ2_S 1e-8f +#define GROUP_MAX_EPS_IQ1_M 1e-7f +#define GROUP_MAX_EPS_IQ1_S 1e-12f + #if defined(_MSC_VER) // disable "possible loss of data" to avoid warnings for hundreds of casts // we should just be careful :) @@ -1109,7 +1115,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t * float ax = fabsf(x[i]); if (ax > amax) { amax = ax; max = x[i]; } } - if (amax < 1e-30f) { // all zero + if (amax < GROUP_MAX_EPS) { // all zero for (int i = 0; i < n; ++i) { L[i] = 0; } @@ -1177,7 +1183,7 @@ static float make_q3_quants(int n, int nmax, const float * restrict x, int8_t * float ax = fabsf(x[i]); if (ax > amax) { amax = ax; max = x[i]; } } - if (!amax) { // all zero + if (amax < GROUP_MAX_EPS) { // all zero for (int i = 0; i < n; ++i) { L[i] = 0; } return 0.f; } @@ -1646,7 +1652,7 @@ static float make_qp_quants(int n, int nmax, const float * restrict x, uint8_t * break; } } - return sumlx / suml2; + return sumlx/suml2; } static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restrict y, int k, const float * restrict quant_weights) { @@ -2653,7 +2659,7 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict } - if (!max_abs_scale) { + if (max_abs_scale < GROUP_MAX_EPS) { memset(&y[i], 0, sizeof(block_q6_K)); y[i].d = GGML_FP32_TO_FP16(0.f); x += QK_K; @@ -2805,7 +2811,7 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri } - if (!max_abs_scale) { + if (max_abs_scale < GROUP_MAX_EPS) { memset(&y[i], 0, sizeof(block_q6_K)); y[i].d = GGML_FP32_TO_FP16(0.f); x += QK_K; @@ -12599,7 +12605,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict } float max = xval[0]; for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]); - if (!max) { + if (max < GROUP_MAX_EPS) { scales[ib] = 0; memset(L, 0, 32); continue; @@ -12775,7 +12781,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v } float max = xval[0]; for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]); - if (!max) { + if (max < GROUP_MAX_EPS) { scales[ib] = 0; memset(L, 0, 16); continue; @@ -13216,7 +13222,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v } float max = xval[0]; for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]); - if (!max) { + if (max < GROUP_MAX_EPS_IQ3_XXS) { scales[ib] = 0; memset(L, 0, 32); continue; @@ -13756,7 +13762,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]); float max = fabsf(xb[0]); for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i])); - if (!max) { + if (max < GROUP_MAX_EPS_IQ1_S) { scales[ib] = 0; memset(L, 1, block_size); continue; @@ -13944,7 +13950,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy } float max = fabsf(xb[0]); for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i])); - if (!max) { + if (max < GROUP_MAX_EPS_IQ1_M) { scales[ib] = 0; memset(L, 1, block_size); continue; @@ -14208,7 +14214,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block amax = ax; max = xb[j]; } } - if (!amax) { + if (amax < GROUP_MAX_EPS) { scales[ib] = 0; continue; } @@ -14429,7 +14435,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy } float max = xval[0]; for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]); - if (!max) { + if (max < GROUP_MAX_EPS_IQ2_S) { scales[ib] = 0; continue; }