From: Georgi Gerganov Date: Fri, 27 Feb 2026 10:24:59 +0000 (+0200) Subject: gguf : sync (ggml/0) X-Git-Tag: upstream/1.8.4~103 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=9453b4b9be9b73adfc35051083f37cefa039acee;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp gguf : sync (ggml/0) --- diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp index 53504399..cbeedf6c 100644 --- a/ggml/src/gguf.cpp +++ b/ggml/src/gguf.cpp @@ -15,6 +15,17 @@ #include #include +#define GGUF_MAX_STRING_LENGTH (1024*1024*1024) +#define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024) + +#ifdef _WIN32 +# define gguf_ftell _ftelli64 +# define gguf_fseek _fseeki64 +#else +# define gguf_ftell ftello +# define gguf_fseek fseeko +#endif + template struct type_to_gguf_type; @@ -217,17 +228,64 @@ struct gguf_context { }; struct gguf_reader { - FILE * file; + gguf_reader(FILE * file) : file(file) { + // read the remaining bytes once and update on each read + nbytes_remain = file_remain(file); + } - gguf_reader(FILE * file) : file(file) {} + // helper for remaining bytes in a file + static uint64_t file_remain(FILE * file) { + const int64_t cur = gguf_ftell(file); + if (cur < 0) { + return 0; + } + if (gguf_fseek(file, 0, SEEK_END) != 0) { + gguf_fseek(file, cur, SEEK_SET); + + return 0; + } + const int64_t end = gguf_ftell(file); + if (end < 0) { + gguf_fseek(file, cur, SEEK_SET); + + return 0; + } + gguf_fseek(file, cur, SEEK_SET); + return static_cast(end - cur); + } template bool read(T & dst) const { - return fread(&dst, 1, sizeof(dst), file) == sizeof(dst); + const size_t size = sizeof(dst); + if (nbytes_remain < size) { + return false; + } + const size_t nread = fread(&dst, 1, size, file); + nbytes_remain -= nread; + return nread == size; } template bool read(std::vector & dst, const size_t n) const { + if (n > GGUF_MAX_ARRAY_ELEMENTS) { + return false; + } + if constexpr (std::is_same::value) { + // strings are prefixed with their length, so we need to account for that + if (n > SIZE_MAX / sizeof(uint64_t)) { + return false; + } + if (nbytes_remain < n * sizeof(uint64_t)) { + return false; + } + } else { + if (n > SIZE_MAX / sizeof(T)) { + return false; + } + if (nbytes_remain < n * sizeof(T)) { + return false; + } + } dst.resize(n); for (size_t i = 0; i < dst.size(); ++i) { if constexpr (std::is_same::value) { @@ -273,17 +331,37 @@ struct gguf_reader { } bool read(std::string & dst) const { - uint64_t size = -1; + uint64_t size = 0; if (!read(size)) { return false; } - dst.resize(size); - return fread(dst.data(), 1, dst.length(), file) == dst.length(); + if (size > GGUF_MAX_STRING_LENGTH) { + GGML_LOG_ERROR("%s: string length %" PRIu64 " exceeds maximum %" PRIu64 "\n", __func__, size, (uint64_t) GGUF_MAX_STRING_LENGTH); + return false; + } + if (size > nbytes_remain) { + GGML_LOG_ERROR("%s: string length %" PRIu64 " exceeds remaining file size %" PRIu64 " bytes\n", __func__, size, nbytes_remain); + return false; + } + dst.resize(static_cast(size)); + const size_t nread = fread(dst.data(), 1, size, file); + nbytes_remain -= nread; + return nread == size; } bool read(void * dst, const size_t size) const { - return fread(dst, 1, size, file) == size; + if (size > nbytes_remain) { + return false; + } + const size_t nread = fread(dst, 1, size, file); + nbytes_remain -= nread; + return nread == size; } + +private: + FILE * file; + + mutable uint64_t nbytes_remain; }; struct gguf_context * gguf_init_empty(void) { @@ -523,7 +601,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par // tensor shape { - uint32_t n_dims = -1; + uint32_t n_dims = 0; ok = ok && gr.read(n_dims); if (n_dims > GGML_MAX_DIMS) { GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n", @@ -568,8 +646,8 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par // check that tensor type is within defined range if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) { - GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d (%s)\n", - __func__, info.t.name, info.t.type, ggml_type_name(info.t.type)); + GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d. should be in [0, %d)\n", + __func__, info.t.name, info.t.type, GGML_TYPE_COUNT); ok = false; break; } @@ -585,6 +663,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par break; } + // check that the size of the tensor in bytes is representable + if (ok && uint64_t(ggml_nelements(&info.t)/ggml_blck_size(info.t.type)) > SIZE_MAX/ggml_type_size(info.t.type)) { + GGML_LOG_ERROR("%s: tensor '%s' with shape (%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") has a size in bytes > %zu\n", + __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], SIZE_MAX); + ok = false; + break; + } + // calculate byte offsets given the tensor shape and type info.t.nb[0] = type_size; info.t.nb[1] = info.t.nb[0]*(info.t.ne[0]/blck_size); @@ -610,14 +696,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors); // we require the data section to be aligned, so take into account any padding - if (fseek(file, GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) { + if (gguf_fseek(file, GGML_PAD(gguf_ftell(file), ctx->alignment), SEEK_SET) != 0) { GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__); gguf_free(ctx); return nullptr; } // store the current file offset - this is where the data section starts - ctx->offset = ftell(file); + ctx->offset = gguf_ftell(file); // compute the total size of the data section, taking into account the alignment { @@ -649,10 +735,34 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par // the ggml_tensor structs to the appropriate locations in the binary blob // compute the exact size needed for the new ggml_context - const size_t mem_size = - params.no_alloc ? - (n_tensors )*ggml_tensor_overhead() : - (n_tensors + 1)*ggml_tensor_overhead() + ctx->size; + size_t mem_size = 0; + if (params.no_alloc) { + if (n_tensors != 0 && SIZE_MAX / n_tensors < ggml_tensor_overhead()) { + GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__); + gguf_free(ctx); + return nullptr; + } + + const size_t overhead = n_tensors * ggml_tensor_overhead(); + + mem_size = overhead; + } else { + if ((n_tensors + 1) != 0 && SIZE_MAX / (n_tensors + 1) < ggml_tensor_overhead()) { + GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__); + gguf_free(ctx); + return nullptr; + } + + const size_t overhead = (n_tensors + 1) * ggml_tensor_overhead(); + + if (SIZE_MAX - overhead < ctx->size) { + GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__); + gguf_free(ctx); + return nullptr; + } + + mem_size = overhead + ctx->size; + } struct ggml_init_params pdata = { /*mem_size =*/ mem_size, @@ -734,7 +844,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p FILE * file = ggml_fopen(fname, "rb"); if (!file) { - GGML_LOG_ERROR("%s: failed to open GGUF file '%s'\n", __func__, fname); + GGML_LOG_ERROR("%s: failed to open GGUF file '%s' (%s)\n", __func__, fname, strerror(errno)); return nullptr; } @@ -1166,50 +1276,51 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const } -struct gguf_writer { - std::vector & buf; +struct gguf_writer_base { + size_t written_bytes {0u}; + + ~gguf_writer_base(void) = default; - gguf_writer(std::vector & buf) : buf(buf) {} + // we bet on devirtualization + virtual void write(int8_t val) = 0; + virtual void write(const std::vector & val) = 0; + virtual void write_tensor_data(const struct gguf_tensor_info & info, size_t offset_data, size_t alignment) = 0; template - void write(const T & val) const { + void write(const T & val) { for (size_t i = 0; i < sizeof(val); ++i) { - buf.push_back(reinterpret_cast(&val)[i]); + write(reinterpret_cast(&val)[i]); } } - void write(const std::vector & val) const { - buf.insert(buf.end(), val.begin(), val.end()); - } - - void write(const bool & val) const { + void write(const bool & val) { const int8_t val8 = val ? 1 : 0; write(val8); } - void write(const std::string & val) const { + void write(const std::string & val) { { const uint64_t n = val.length(); write(n); } for (size_t i = 0; i < val.length(); ++i) { - buf.push_back(reinterpret_cast(val.data())[i]); + write((val.data())[i]); } } - void write(const char * val) const { + void write(const char * val) { write(std::string(val)); } - void write(const enum ggml_type & val) const { + void write(const enum ggml_type & val) { write(int32_t(val)); } - void write(const enum gguf_type & val) const { + void write(const enum gguf_type & val) { write(int32_t(val)); } - void write(const struct gguf_kv & kv) const { + void write(const struct gguf_kv & kv) { const uint64_t ne = kv.get_ne(); write(kv.get_key()); @@ -1250,7 +1361,7 @@ struct gguf_writer { } } - void write_tensor_meta(const struct gguf_tensor_info & info) const { + void write_tensor_meta(const struct gguf_tensor_info & info) { write(info.t.name); const uint32_t n_dims = ggml_n_dims(&info.t); @@ -1263,14 +1374,33 @@ struct gguf_writer { write(info.offset); } - void pad(const size_t alignment) const { - while (buf.size() % alignment != 0) { + void pad(const size_t alignment) { + while (written_bytes % alignment != 0) { const int8_t zero = 0; write(zero); } } +}; + +// vector buffer based writer +struct gguf_writer_buf final : public gguf_writer_base { + std::vector & buf; + + gguf_writer_buf(std::vector & buf) : buf(buf) {} - void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) const { + using gguf_writer_base::write; + + void write(const int8_t val) override { + buf.push_back(val); + written_bytes++; + } + + void write(const std::vector & val) override { + buf.insert(buf.end(), val.begin(), val.end()); + written_bytes += val.size(); + } + + void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override { GGML_ASSERT(buf.size() - offset_data == info.offset); GGML_ASSERT(ggml_is_contiguous(&info.t)); @@ -1284,14 +1414,58 @@ struct gguf_writer { GGML_ASSERT(info.t.data); memcpy(buf.data() + offset, info.t.data, nbytes); } + written_bytes += nbytes; pad(alignment); } }; -void gguf_write_to_buf(const struct gguf_context * ctx, std::vector & buf, bool only_meta) { - const struct gguf_writer gw(buf); +// file based writer +struct gguf_writer_file final : public gguf_writer_base { + FILE * file; + + gguf_writer_file(FILE* file) : file(file) {} + + using gguf_writer_base::write; + + void write(const int8_t val) override { + const auto real_val = static_cast(val); + const auto ret = fputc(real_val, file); + written_bytes++; + if (ret != real_val) { + throw std::runtime_error("unexpected fputc result '" + std::to_string(ret) + "' instead of '" + std::to_string((int)real_val) + "'"); + } + } + + void write(const std::vector & val) override { + const auto ret = fwrite(val.data(), 1, val.size(), file); + written_bytes += val.size(); + if (ret != val.size()) { + throw std::runtime_error("unexpected fwrite number of bytes written, '" + std::to_string(ret) + "' instead of '" + std::to_string(val.size()) + "'"); + } + } + + void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override { + GGML_ASSERT(written_bytes - offset_data == info.offset); + + GGML_ASSERT(ggml_is_contiguous(&info.t)); + const size_t nbytes = ggml_nbytes(&info.t); + + std::vector buf(nbytes); + if (info.t.buffer) { + ggml_backend_tensor_get(&info.t, buf.data(), 0, nbytes); + } else { + GGML_ASSERT(info.t.data); + memcpy(buf.data(), info.t.data, nbytes); + } + write(buf); + pad(alignment); + } +}; + +template +static void gguf_write_out(const struct gguf_context * ctx, writer_t & gw, bool only_meta) { const int64_t n_kv = gguf_get_n_kv(ctx); const int64_t n_tensors = gguf_get_n_tensors(ctx); @@ -1321,7 +1495,7 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector & bu return; } - const size_t offset_data = gw.buf.size(); + const size_t offset_data = gw.written_bytes; // write tensor data for (int64_t i = 0; i < n_tensors; ++i) { @@ -1329,6 +1503,11 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector & bu } } +void gguf_write_to_buf(const struct gguf_context * ctx, std::vector & buf, bool only_meta) { + gguf_writer_buf gw(buf); + gguf_write_out(ctx, gw, only_meta); +} + bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) { FILE * file = ggml_fopen(fname, "wb"); @@ -1337,11 +1516,17 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo return false; } - std::vector buf; - gguf_write_to_buf(ctx, buf, only_meta); - const bool ok = fwrite(buf.data(), 1, buf.size(), file) == buf.size(); + try { + gguf_writer_file gw(file); + gguf_write_out(ctx, gw, only_meta); + } catch (const std::runtime_error& ex) { + GGML_LOG_ERROR("%s: failed to write GGUF data into '%s': %s\n", __func__, fname, ex.what()); + fclose(file); + return false; + } + fclose(file); - return ok; + return true; } size_t gguf_get_meta_size(const struct gguf_context * ctx) {