metal : disable fast math in all quantize kernels (llama/14528)

ggml-ci
2025-08-13 22:38:34 +02:00 · 2025-07-04 19:19:09 +03:00
parent af304ef080
commit 10d0d28f7c
1 changed files with 3 additions and 0 deletions
--- a/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ggml/src/ggml-metal/ggml-metal.metal
@ -109,6 +109,7 @@ void dequantize_q4_0_t4(device const block_q4_0 * xb, short il, thread type4 & r
 }
 void quantize_q4_0(device const float * src, device block_q4_0 & dst) {
 #pragma METAL fp math_mode(safe)
    float amax = 0.0f; // absolute max
    float max  = 0.0f;
@ -167,6 +168,7 @@ void quantize_q4_1(device const float * src, device block_q4_1 & dst) {
 }
 void quantize_q5_0(device const float * src, device block_q5_0 & dst) {
 #pragma METAL fp math_mode(safe)
    float amax = 0.0f; // absolute max
    float max  = 0.0f;
@ -461,6 +463,7 @@ void dequantize_q8_0_t4(device const block_q8_0 *xb, short il, thread type4 & re
 }
 void quantize_q8_0(device const float * src, device block_q8_0 & dst) {
 #pragma METAL fp math_mode(safe)
    float amax = 0.0f; // absolute max
    for (int j = 0; j < QK8_0; j++) {