Guard against all weights in a super-block being zero (#3010)

author Kawrakow <redacted>

Tue, 5 Sep 2023 07:55:33 +0000 (09:55 +0200)

committer GitHub <redacted>

Tue, 5 Sep 2023 07:55:33 +0000 (09:55 +0200)
author Kawrakow <redacted>
Tue, 5 Sep 2023 07:55:33 +0000 (09:55 +0200)
committer GitHub <redacted>
Tue, 5 Sep 2023 07:55:33 +0000 (09:55 +0200)
diff --git a/k_quants.c b/k_quants.c

index 4accd2480b198bffccd498dad45cfff030b4f533..8742d4aee6f546efcc0cebbb53b918efbda0d3c0 100644 (file)
--- a/k_quants.c
+++ b/k_quants.c
@@ -83,7 +83,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
          float ax = fabsf(x[i]);
          if (ax > amax) { amax = ax; max = x[i]; }
      }
-    if (!amax) { // all zero
+    if (amax < 1e-30f) { // all zero
          for (int i = 0; i < n; ++i) {
              L[i] = 0;
          }
@@ -1086,6 +1086,12 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict
  
          }
  
+        if (!max_abs_scale) {
+            memset(&y[i], 0, sizeof(block_q6_K));
+            y[i].d = ggml_fp32_to_fp16(0.f);
+            continue;
+        }
+
          float iscale = -128.f/max_scale;
          y[i].d = ggml_fp32_to_fp16(1/iscale);
          for (int ib = 0; ib < QK_K/16; ++ib) {