ggml/gguf : prevent integer overflows (#19856)

author Georgi Gerganov <redacted>

Tue, 24 Feb 2026 18:17:11 +0000 (20:17 +0200)

committer GitHub <redacted>

Tue, 24 Feb 2026 18:17:11 +0000 (20:17 +0200)
author Georgi Gerganov <redacted>
Tue, 24 Feb 2026 18:17:11 +0000 (20:17 +0200)
committer GitHub <redacted>
Tue, 24 Feb 2026 18:17:11 +0000 (20:17 +0200)
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h

index 77af0e7fb6aaa6ebf6d43434fc257d69b1f8af2a..fcc51f1f71a4b5e72d7c88386fb8ccc3e3ba5071 100644 (file)
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -730,10 +730,6 @@ extern "C" {
      GGML_API size_t  ggml_type_size(enum ggml_type type);             // size in bytes for all elements in a block
      GGML_API size_t  ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
  
-    GGML_DEPRECATED(
-    GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
-    "use ggml_row_size() instead");
-
      GGML_API const char * ggml_type_name(enum ggml_type type);
      GGML_API const char * ggml_op_name  (enum ggml_op   op);
      GGML_API const char * ggml_op_symbol(enum ggml_op   op);
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c

index ed819eaa4c5d790ab8f5ac0ce62b546b760f4190..e9529fbb66289b6a0fd4ee08f90a3d88e2ada3e8 100644 (file)
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -899,7 +899,8 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
  };
  
  const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
-    GGML_ASSERT(type < GGML_TYPE_COUNT);
+    assert(type >= 0);
+    assert(type < GGML_TYPE_COUNT);
      return &type_traits[type];
  }
  
@@ -1265,27 +1266,33 @@ size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
  }
  
  int64_t ggml_blck_size(enum ggml_type type) {
+    assert(type >= 0);
+    assert(type < GGML_TYPE_COUNT);
      return type_traits[type].blck_size;
  }
  
  size_t ggml_type_size(enum ggml_type type) {
+    assert(type >= 0);
+    assert(type < GGML_TYPE_COUNT);
      return type_traits[type].type_size;
  }
  
  size_t ggml_row_size(enum ggml_type type, int64_t ne) {
+    assert(type >= 0);
+    assert(type < GGML_TYPE_COUNT);
      assert(ne % ggml_blck_size(type) == 0);
      return ggml_type_size(type)*ne/ggml_blck_size(type);
  }
  
-double ggml_type_sizef(enum ggml_type type) {
-    return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
-}
-
  const char * ggml_type_name(enum ggml_type type) {
-    return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
+    assert(type >= 0);
+    assert(type < GGML_TYPE_COUNT);
+    return type_traits[type].type_name;
  }
  
  bool ggml_is_quantized(enum ggml_type type) {
+    assert(type >= 0);
+    assert(type < GGML_TYPE_COUNT);
      return type_traits[type].is_quantized;
  }
  
@@ -1629,11 +1636,23 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
      const size_t cur_end  = cur_offs + cur_size;
  
      // align to GGML_MEM_ALIGN
+    GGML_ASSERT(size <= SIZE_MAX - (GGML_MEM_ALIGN - 1));
      size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
  
      char * const mem_buffer = ctx->mem_buffer;
      struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
  
+    // integer overflow checks
+    if (cur_end > SIZE_MAX - size_needed) {
+        GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu)\n", __func__, cur_end, size_needed);
+        return NULL;
+    }
+    if (cur_end + size_needed > SIZE_MAX - GGML_OBJECT_SIZE) {
+        GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu) + GGML_OBJECT_SIZE (%zu)\n", __func__,
+                cur_end, size_needed, (size_t) GGML_OBJECT_SIZE);
+        return NULL;
+    }
+
      if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
          GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
                  __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
@@ -1702,6 +1721,8 @@ static struct ggml_tensor * ggml_new_tensor_impl(
          obj_alloc_size = data_size;
      }
  
+    GGML_ASSERT(GGML_TENSOR_SIZE <= SIZE_MAX - obj_alloc_size);
+
      struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
      GGML_ASSERT(obj_new);
  
diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp

index ed0d7f2cae12b10dccda77259862bf60ec672dc6..ddeda5a3c80aefe7b0d3d97661269c40a6933055 100644 (file)
--- a/ggml/src/gguf.cpp
+++ b/ggml/src/gguf.cpp
@@ -15,6 +15,9 @@
  #include <string>
  #include <vector>
  
+#define GGUF_MAX_STRING_LENGTH  (1024*1024*1024)
+#define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024)
+
  template <typename T>
  struct type_to_gguf_type;
  
@@ -228,6 +231,26 @@ struct gguf_reader {
  
      template <typename T>
      bool read(std::vector<T> & dst, const size_t n) const {
+        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
+            return false;
+        }
+        const uint64_t nbytes = nbytes_remain();
+        if constexpr (std::is_same<T, std::string>::value) {
+            // strings are prefixed with their length, so we need to account for that
+            if (n > SIZE_MAX / sizeof(uint64_t)) {
+                return false;
+            }
+            if (nbytes < n * sizeof(uint64_t)) {
+                return false;
+            }
+        } else {
+            if (n > SIZE_MAX / sizeof(T)) {
+                return false;
+            }
+            if (nbytes < n * sizeof(T)) {
+                return false;
+            }
+        }
          dst.resize(n);
          for (size_t i = 0; i < dst.size(); ++i) {
              if constexpr (std::is_same<T, bool>::value) {
@@ -277,13 +300,43 @@ struct gguf_reader {
          if (!read(size)) {
              return false;
          }
-        dst.resize(size);
+        if (size > GGUF_MAX_STRING_LENGTH) {
+            GGML_LOG_ERROR("%s: string length %" PRIu64 " exceeds maximum %" PRIu64 "\n", __func__, size, (uint64_t) GGUF_MAX_STRING_LENGTH);
+            return false;
+        }
+        const uint64_t nbytes = nbytes_remain();
+        if (size > nbytes) {
+            GGML_LOG_ERROR("%s: string length %" PRIu64 " exceeds remaining file size %" PRIu64 " bytes\n", __func__, size, nbytes);
+            return false;
+        }
+        dst.resize(static_cast<size_t>(size));
          return fread(dst.data(), 1, dst.length(), file) == dst.length();
      }
  
      bool read(void * dst, const size_t size) const {
          return fread(dst, 1, size, file) == size;
      }
+
+    // remaining bytes in the file
+    uint64_t nbytes_remain() const {
+        const long cur = ftell(file);
+        if (cur < 0) {
+            return 0;
+        }
+        if (fseek(file, 0, SEEK_END) != 0) {
+            fseek(file, cur, SEEK_SET);
+
+            return 0;
+        }
+        const long end = ftell(file);
+        if (end < 0) {
+            fseek(file, cur, SEEK_SET);
+
+            return 0;
+        }
+        fseek(file, cur, SEEK_SET);
+        return static_cast<uint64_t>(end - cur);
+    }
  };
  
  struct gguf_context * gguf_init_empty(void) {
@@ -568,8 +621,8 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
  
              // check that tensor type is within defined range
              if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) {
-                GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d (%s)\n",
-                    __func__, info.t.name, info.t.type, ggml_type_name(info.t.type));
+                GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d. should be in [0, %d)\n",
+                    __func__, info.t.name, info.t.type, GGML_TYPE_COUNT);
                  ok = false;
                  break;
              }
@@ -657,10 +710,34 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
          //   the ggml_tensor structs to the appropriate locations in the binary blob
  
          // compute the exact size needed for the new ggml_context
-        const size_t mem_size =
-            params.no_alloc ?
-            (n_tensors    )*ggml_tensor_overhead() :
-            (n_tensors + 1)*ggml_tensor_overhead() + ctx->size;
+        size_t mem_size = 0;
+        if (params.no_alloc) {
+            if (n_tensors != 0 && SIZE_MAX / n_tensors < ggml_tensor_overhead()) {
+                GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__);
+                gguf_free(ctx);
+                return nullptr;
+            }
+
+            const size_t overhead = n_tensors * ggml_tensor_overhead();
+
+            mem_size = overhead;
+        } else {
+            if ((n_tensors + 1) != 0 && SIZE_MAX / (n_tensors + 1) < ggml_tensor_overhead()) {
+                GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__);
+                gguf_free(ctx);
+                return nullptr;
+            }
+
+            const size_t overhead = (n_tensors + 1) * ggml_tensor_overhead();
+
+            if (SIZE_MAX - overhead < ctx->size) {
+                GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__);
+                gguf_free(ctx);
+                return nullptr;
+            }
+
+            mem_size = overhead + ctx->size;
+        }
  
          struct ggml_init_params pdata = {
              /*mem_size   =*/ mem_size,
diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py

index d87e8f72321b3f776cf214e117e21cdde9e1e18b..0a1b85f50641b1abf7f597cdad133100f7884a3b 100644 (file)
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -175,6 +175,9 @@ class GGUFReader:
              if new_align.types != [GGUFValueType.UINT32]:
                  raise ValueError('Bad type for general.alignment field')
              self.alignment = new_align.parts[-1][0]
+            # Ensure alignment is a non-zero power of two
+            if self.alignment == 0 or (self.alignment & (self.alignment - 1)) != 0:
+                raise ValueError('Invalid alignment: must be a non-zero power of two')
          padding = offs % self.alignment
          if padding != 0:
              offs += self.alignment - padding
@@ -202,11 +205,11 @@ class GGUFReader:
  
      def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
          if field.name in self.fields:
-            # TODO: add option to generate error on duplicate keys
-            # raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
+            # TODO: add option to make this a warning and accept duplicate keys like below
+            raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
  
-            logger.warning(f'Duplicate key {field.name} at offset {field.offset}')
-            self.fields[field.name + '_{}'.format(field.offset)] = field
+            # logger.warning(f'Duplicate key {field.name} at offset {field.offset}')
+            # self.fields[field.name + '_{}'.format(field.offset)] = field
          else:
              self.fields[field.name] = field
          return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py

index 4245d18bc4f09522f290338e35b8f9d2da2cfc7b..9ee3ac9e8ffe7df7df4e2a9fd81823fc928c92ce 100644 (file)
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -501,6 +501,8 @@ class GGUFWriter:
          self.add_uint32(Keys.General.QUANTIZATION_VERSION, quantization_version)
  
      def add_custom_alignment(self, alignment: int) -> None:
+        if alignment <= 0 or (alignment & (alignment - 1)) != 0:
+            raise ValueError('Invalid alignment: must be a non-zero power of two')
          self.data_alignment = alignment
          self.add_uint32(Keys.General.ALIGNMENT, alignment)
  
diff --git a/tests/test-gguf.cpp b/tests/test-gguf.cpp

index 84b7f3bc4916c49b0730f7c48f0e0266c580c9ce..8ebd16ba8225ac807398f65a51d4c81d66a10388 100644 (file)
--- a/tests/test-gguf.cpp
+++ b/tests/test-gguf.cpp
@@ -48,6 +48,7 @@ enum handcrafted_file_type {
      HANDCRAFTED_DATA_NOT_ENOUGH_DATA       =  10 + offset_has_data,
      HANDCRAFTED_DATA_BAD_ALIGN             =  15 + offset_has_data,
      HANDCRAFTED_DATA_INCONSISTENT_ALIGN    =  20 + offset_has_data,
+    HANDCRAFTED_DATA_MEM_SIZE_OVERFLOW     =  30 + offset_has_data,
      HANDCRAFTED_DATA_SUCCESS               = 800 + offset_has_data,
      HANDCRAFTED_DATA_CUSTOM_ALIGN          = 810 + offset_has_data,
  };
@@ -84,6 +85,7 @@ static std::string handcrafted_file_type_name(const enum handcrafted_file_type h
          case HANDCRAFTED_DATA_NOT_ENOUGH_DATA:       return "DATA_NOT_ENOUGH_DATA";
          case HANDCRAFTED_DATA_BAD_ALIGN:             return "DATA_BAD_ALIGN";
          case HANDCRAFTED_DATA_INCONSISTENT_ALIGN:    return "DATA_INCONSISTENT_ALIGN";
+        case HANDCRAFTED_DATA_MEM_SIZE_OVERFLOW:     return "DATA_MEM_SIZE_OVERFLOW";
          case HANDCRAFTED_DATA_SUCCESS:               return "DATA_SUCCESS";
          case HANDCRAFTED_DATA_CUSTOM_ALIGN:          return "DATA_CUSTOM_ALIGN";
      }
@@ -196,6 +198,13 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
          tensor_configs = get_tensor_configs(rng);
      }
  
+    if (hft == HANDCRAFTED_DATA_MEM_SIZE_OVERFLOW) {
+        tensor_configs.resize(2);
+
+        tensor_configs[0] = { GGML_TYPE_I8, { 0x7FFFFFFFFFFFFFC0, 1, 1, 1 } };
+        tensor_configs[1] = { GGML_TYPE_I8, { 0x7FFFFFFFFFFFFFC0, 1, 1, 1 } };
+    }
+
      if (hft == HANDCRAFTED_HEADER_BAD_N_TENSORS) {
          const uint64_t n_tensors = -1;
          helper_write(file, n_tensors);
@@ -397,7 +406,8 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
          for (uint32_t i = 1; i < n_dims; ++i) {
              ne *= shape[i];
          }
-        offset += GGML_PAD(ggml_row_size(type, ne), alignment);
+
+        offset += GGML_PAD(ggml_row_size(type, ne), (uint64_t) alignment);
      }
  
      while (ftell(file) % alignment != 0) {
@@ -411,6 +421,9 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
          if (hft == HANDCRAFTED_DATA_NOT_ENOUGH_DATA) {
              nbytes -= 1;
          }
+        if (hft == HANDCRAFTED_DATA_MEM_SIZE_OVERFLOW) {
+            nbytes = 32;
+        }
          for (uint64_t i = 0; i < nbytes; ++i) {
              const uint8_t random_byte = i % 256;
              helper_write(file, random_byte);
@@ -704,6 +717,7 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
          HANDCRAFTED_DATA_NOT_ENOUGH_DATA,
          HANDCRAFTED_DATA_BAD_ALIGN,
          HANDCRAFTED_DATA_INCONSISTENT_ALIGN,
+        HANDCRAFTED_DATA_MEM_SIZE_OVERFLOW,
          HANDCRAFTED_DATA_SUCCESS,
          HANDCRAFTED_DATA_CUSTOM_ALIGN,
      };
author	Georgi Gerganov <redacted>
	Tue, 24 Feb 2026 18:17:11 +0000 (20:17 +0200)
committer	GitHub <redacted>
	Tue, 24 Feb 2026 18:17:11 +0000 (20:17 +0200)
ggml/include/ggml.h		patch \| blob \| history
ggml/src/ggml.c		patch \| blob \| history
ggml/src/gguf.cpp		patch \| blob \| history
gguf-py/gguf/gguf_reader.py		patch \| blob \| history
gguf-py/gguf/gguf_writer.py		patch \| blob \| history
tests/test-gguf.cpp		patch \| blob \| history