gguf : add support for I64 and F64 arrays (#6062)

author Ondřej Čertík <redacted>

Fri, 15 Mar 2024 08:46:51 +0000 (02:46 -0600)

committer GitHub <redacted>

Fri, 15 Mar 2024 08:46:51 +0000 (10:46 +0200)
author Ondřej Čertík <redacted>
Fri, 15 Mar 2024 08:46:51 +0000 (02:46 -0600)
committer GitHub <redacted>
Fri, 15 Mar 2024 08:46:51 +0000 (10:46 +0200)
diff --git a/ggml.c b/ggml.c

index fbc66f65b105214748f3d5e9020f52ffa3cd782f..c94006e51a092f132ae8baea338cbcde596f7bb9 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -470,6 +470,19 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
          .type_size                = sizeof(int32_t),
          .is_quantized             = false,
      },
+    [GGML_TYPE_I64] = {
+        .type_name                = "i64",
+        .blck_size                = 1,
+        .type_size                = sizeof(int64_t),
+        .is_quantized             = false,
+    },
+    [GGML_TYPE_F64] = {
+        .type_name                = "f64",
+        .blck_size                = 1,
+        .type_size                = sizeof(double),
+        .is_quantized             = false,
+        .nrows                    = 1,
+    },
      [GGML_TYPE_F32] = {
          .type_name                = "f32",
          .blck_size                = 1,
@@ -12418,6 +12431,8 @@ static void ggml_compute_forward_alibi(
          case GGML_TYPE_I8:
          case GGML_TYPE_I16:
          case GGML_TYPE_I32:
+        case GGML_TYPE_I64:
+        case GGML_TYPE_F64:
          case GGML_TYPE_COUNT:
              {
                  GGML_ASSERT(false);
@@ -12504,6 +12519,8 @@ static void ggml_compute_forward_clamp(
          case GGML_TYPE_I8:
          case GGML_TYPE_I16:
          case GGML_TYPE_I32:
+        case GGML_TYPE_I64:
+        case GGML_TYPE_F64:
          case GGML_TYPE_COUNT:
              {
                  GGML_ASSERT(false);
diff --git a/ggml.h b/ggml.h

index ab26c8f5908c72f4fe94336d6c55a44140731088..c937d4a535adbd3cef589234f637e60b62755de1 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -366,6 +366,8 @@ extern "C" {
          GGML_TYPE_I8      = 24,
          GGML_TYPE_I16     = 25,
          GGML_TYPE_I32     = 26,
+        GGML_TYPE_I64     = 27,
+        GGML_TYPE_F64     = 28,
          GGML_TYPE_COUNT,
      };
  
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py

index 2d7cf16c14ed1f86ba92b5441bef9de0c3c95448..458a641dcd2297e58f4213601f12f8c4cded9637 100644 (file)
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -665,6 +665,8 @@ class GGMLQuantizationType(IntEnum):
      I8      = 24
      I16     = 25
      I32     = 26
+    I64     = 27
+    F64     = 28
  
  
  class GGUFEndian(IntEnum):
@@ -734,6 +736,8 @@ GGML_QUANT_SIZES = {
      GGMLQuantizationType.I8:      (1, 1),
      GGMLQuantizationType.I16:     (1, 2),
      GGMLQuantizationType.I32:     (1, 4),
+    GGMLQuantizationType.I64:     (1, 8),
+    GGMLQuantizationType.F64:     (1, 8),
  }
  
  
diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py

index 1c10f575389924b18eb38f43257dbfa19a69b0b6..33afac552ca7523b7da16f854ef34908fafc11f3 100644 (file)
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -242,12 +242,15 @@ class GGUFReader:
              n_bytes = n_elems * type_size // block_size
              data_offs = int(start_offs + offset_tensor[0])
              item_type: npt.DTypeLike
-            if ggml_type == GGMLQuantizationType.F32:
+            if ggml_type == GGMLQuantizationType.F16:
+                item_count = n_elems
+                item_type = np.float16
+            elif ggml_type == GGMLQuantizationType.F32:
                  item_count = n_elems
                  item_type = np.float32
-            elif ggml_type == GGMLQuantizationType.F16:
+            elif ggml_type == GGMLQuantizationType.F64:
                  item_count = n_elems
-                item_type = np.float16
+                item_type = np.float64
              elif ggml_type == GGMLQuantizationType.I8:
                  item_count = n_elems
                  item_type = np.int8
@@ -257,6 +260,9 @@ class GGUFReader:
              elif ggml_type == GGMLQuantizationType.I32:
                  item_count = n_elems
                  item_type = np.int32
+            elif ggml_type == GGMLQuantizationType.I64:
+                item_count = n_elems
+                item_type = np.int64
              else:
                  item_count = n_bytes
                  item_type = np.uint8
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py

index 81b2eb884d4854cca98915fb94033324ba754429..1967b633ce2619d4814161686ab76feb0b8dc9d7 100644 (file)
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -204,18 +204,22 @@ class GGUFWriter:
          for i in range(n_dims):
              self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i])
          if raw_dtype is None:
-            if tensor_dtype == np.float32:
-                dtype = GGMLQuantizationType.F32
-            elif tensor_dtype == np.float16:
+            if tensor_dtype == np.float16:
                  dtype = GGMLQuantizationType.F16
+            elif tensor_dtype == np.float32:
+                dtype = GGMLQuantizationType.F32
+            elif tensor_dtype == np.float64:
+                dtype = GGMLQuantizationType.F64
              elif tensor_dtype == np.int8:
                  dtype = GGMLQuantizationType.I8
              elif tensor_dtype == np.int16:
                  dtype = GGMLQuantizationType.I16
              elif tensor_dtype == np.int32:
                  dtype = GGMLQuantizationType.I32
+            elif tensor_dtype == np.int64:
+                dtype = GGMLQuantizationType.I64
              else:
-                raise ValueError("Only F32, F16, I8, I16, I32 tensors are supported for now")
+                raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
          else:
              dtype = raw_dtype
          self.ti_data += self._pack("I", dtype)
author	Ondřej Čertík <redacted>
	Fri, 15 Mar 2024 08:46:51 +0000 (02:46 -0600)
committer	GitHub <redacted>
	Fri, 15 Mar 2024 08:46:51 +0000 (10:46 +0200)
ggml.c		patch \| blob \| history
ggml.h		patch \| blob \| history
gguf-py/gguf/constants.py		patch \| blob \| history
gguf-py/gguf/gguf_reader.py		patch \| blob \| history
gguf-py/gguf/gguf_writer.py		patch \| blob \| history