From: Georgi Gerganov Date: Fri, 4 Jul 2025 16:19:09 +0000 (+0300) Subject: metal : disable fast math in all quantize kernels (#14528) X-Git-Tag: upstream/0.0.5882~54 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=ef797db357e44ecb7437fa9d22f4e1614104b342;p=pkg%2Fggml%2Fsources%2Fllama.cpp metal : disable fast math in all quantize kernels (#14528) ggml-ci --- diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index dc7a0af2..22240bab 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -109,6 +109,7 @@ void dequantize_q4_0_t4(device const block_q4_0 * xb, short il, thread type4 & r } void quantize_q4_0(device const float * src, device block_q4_0 & dst) { +#pragma METAL fp math_mode(safe) float amax = 0.0f; // absolute max float max = 0.0f; @@ -167,6 +168,7 @@ void quantize_q4_1(device const float * src, device block_q4_1 & dst) { } void quantize_q5_0(device const float * src, device block_q5_0 & dst) { +#pragma METAL fp math_mode(safe) float amax = 0.0f; // absolute max float max = 0.0f; @@ -461,6 +463,7 @@ void dequantize_q8_0_t4(device const block_q8_0 *xb, short il, thread type4 & re } void quantize_q8_0(device const float * src, device block_q8_0 & dst) { +#pragma METAL fp math_mode(safe) float amax = 0.0f; // absolute max for (int j = 0; j < QK8_0; j++) {