gguf-py : add IQ1_M to GGML_QUANT_SIZES (#6761)

author pmysl <redacted>

Sun, 21 Apr 2024 12:49:30 +0000 (14:49 +0200)

committer GitHub <redacted>

Sun, 21 Apr 2024 12:49:30 +0000 (15:49 +0300)
author pmysl <redacted>
Sun, 21 Apr 2024 12:49:30 +0000 (14:49 +0200)
committer GitHub <redacted>
Sun, 21 Apr 2024 12:49:30 +0000 (15:49 +0300)
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py

index ba24065a891c71830c9e1750e6f3342ef0450a4a..06cb26a7d495b71b31d1a48b28b156f3fba45652 100644 (file)
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -872,6 +872,7 @@ GGML_QUANT_SIZES = {
      GGMLQuantizationType.I32:     (1, 4),
      GGMLQuantizationType.I64:     (1, 8),
      GGMLQuantizationType.F64:     (1, 8),
+    GGMLQuantizationType.IQ1_M:   (256, QK_K // 8 + QK_K // 16  + QK_K // 32),
  }