ggml: check if non-native endian model is being loaded (llama/13943)

author Aaron Teo <redacted>

Sun, 1 Jun 2025 14:53:57 +0000 (22:53 +0800)

committer Georgi Gerganov <redacted>

Tue, 10 Jun 2025 09:40:33 +0000 (12:40 +0300)
author Aaron Teo <redacted>
Sun, 1 Jun 2025 14:53:57 +0000 (22:53 +0800)
committer Georgi Gerganov <redacted>
Tue, 10 Jun 2025 09:40:33 +0000 (12:40 +0300)
diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp

index 8667a80bd068540948d6768abe2878adb6f55df2..dab228e1ea32a49374e1df2a36ee6ed5f9c891a8 100644 (file)
--- a/ggml/src/gguf.cpp
+++ b/ggml/src/gguf.cpp
@@ -347,6 +347,20 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
      int64_t n_tensors = 0;
  
      if (ok && gr.read(ctx->version)) {
+        /*
+         * bit layout is different when reading non-native endian models.
+         * assuming that the GGUF version is 3, the non-native endian model
+         * would read it as 0x30000000. we can use the AND operation against
+         * the last 4 hexadecimal digits to check if the model is the same
+         * endianness as the host system.
+        */
+        if ((ctx->version & 0x0000FFFF) == 0x00000000) {
+            GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
+            gguf_free(ctx);
+            return nullptr;
+        }
+
+        GGML_ASSERT(ctx->version > 0 && ctx->version <= 65535);
          if (ctx->version == 1) {
              GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
              ok = false;
author	Aaron Teo <redacted>
	Sun, 1 Jun 2025 14:53:57 +0000 (22:53 +0800)
committer	Georgi Gerganov <redacted>
	Tue, 10 Jun 2025 09:40:33 +0000 (12:40 +0300)