gguf-py : add Numpy MXFP4 de/quantization support (llama/15111)

author compilade <redacted>

Fri, 8 Aug 2025 21:48:26 +0000 (17:48 -0400)

committer Georgi Gerganov <redacted>

Thu, 14 Aug 2025 11:17:28 +0000 (14:17 +0300)
author compilade <redacted>
Fri, 8 Aug 2025 21:48:26 +0000 (17:48 -0400)
committer Georgi Gerganov <redacted>
Thu, 14 Aug 2025 11:17:28 +0000 (14:17 +0300)
diff --git a/src/ggml-quants.c b/src/ggml-quants.c

index a57d2a16d6c540e8baca40138525d4813246a57b..94f6405ca1e059fb92f70e2a4d675e8083dc15e6 100644 (file)
--- a/src/ggml-quants.c
+++ b/src/ggml-quants.c
@@ -288,7 +288,7 @@ void quantize_row_mxfp4_ref(const float * GGML_RESTRICT x, block_mxfp4 * GGML_RE
              }
          }
  
-        const uint8_t e = (uint8_t) (floorf(log2f(amax)) - 2 + 127);
+        const uint8_t e = amax > 0.0f ? (uint8_t) (floorf(log2f(amax)) - 2 + 127) : 0;
  
          const float d = GGML_E8M0_TO_FP32_HALF(e);
author	compilade <redacted>
	Fri, 8 Aug 2025 21:48:26 +0000 (17:48 -0400)
committer	Georgi Gerganov <redacted>
	Thu, 14 Aug 2025 11:17:28 +0000 (14:17 +0300)