From: Georgi Gerganov Date: Mon, 30 Jun 2025 14:04:05 +0000 (+0300) Subject: metal : disable fast-math for some cpy kernels (llama/14460) X-Git-Tag: upstream/0.0.2309~60 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=7213150b82f4477f84e98ac18256e8007fb6c7b8;p=pkg%2Fggml%2Fsources%2Fggml metal : disable fast-math for some cpy kernels (llama/14460) * metal : disable fast-math for some cpy kernels ggml-ci * cont : disable for q4_1 ggml-ci * cont : disable for iq4_nl ggml-ci --- diff --git a/src/ggml-metal/ggml-metal.metal b/src/ggml-metal/ggml-metal.metal index fc3cfe35..dac45c7a 100644 --- a/src/ggml-metal/ggml-metal.metal +++ b/src/ggml-metal/ggml-metal.metal @@ -138,6 +138,7 @@ void quantize_q4_0(device const float * src, device block_q4_0 & dst) { } void quantize_q4_1(device const float * src, device block_q4_1 & dst) { +#pragma METAL fp math_mode(safe) float min = FLT_MAX; float max = -FLT_MAX; @@ -203,6 +204,7 @@ void quantize_q5_0(device const float * src, device block_q5_0 & dst) { } void quantize_q5_1(device const float * src, device block_q5_1 & dst) { +#pragma METAL fp math_mode(safe) float max = src[0]; float min = src[0]; @@ -239,6 +241,7 @@ void quantize_q5_1(device const float * src, device block_q5_1 & dst) { } void quantize_iq4_nl(device const float * src, device block_iq4_nl & dst) { +#pragma METAL fp math_mode(safe) float amax = 0.0f; // absolute max float max = 0.0f;