gguf-split: add --no-tensor-first-split (#7072)

author Xuan Son Nguyen <redacted>

Sat, 4 May 2024 16:56:22 +0000 (18:56 +0200)

committer GitHub <redacted>

Sat, 4 May 2024 16:56:22 +0000 (18:56 +0200)
author Xuan Son Nguyen <redacted>
Sat, 4 May 2024 16:56:22 +0000 (18:56 +0200)
committer GitHub <redacted>
Sat, 4 May 2024 16:56:22 +0000 (18:56 +0200)
diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp

index 39c75e0a7a802209edbf09259b39a2cc773c103a..e04feeae3918841e224b631c99ff02fea14bd00f 100644 (file)
--- a/examples/gguf-split/gguf-split.cpp
+++ b/examples/gguf-split/gguf-split.cpp
@@ -32,6 +32,7 @@ struct split_params {
      int n_split_tensors = 128;
      std::string input;
      std::string output;
+    bool no_tensor_first_split = false;
      bool dry_run = false;
  };
  
@@ -49,6 +50,7 @@ static void split_print_usage(const char * executable) {
      printf("  --merge                 merge multiple GGUF to a single GGUF\n");
      printf("  --split-max-tensors     max tensors in each split (default: %d)\n", default_params.n_split_tensors);
      printf("  --split-max-size N(M|G) max size per split\n");
+    printf("  --no-tensor-first-split do not add tensors to the first split (disabled by default)\n");
      printf("  --dry-run               only print out a split plan and exit, without writing any new files\n");
      printf("\n");
  }
@@ -100,6 +102,10 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
              arg_found = true;
              params.dry_run = true;
          }
+        if (arg == "--no-tensor-first-split") {
+            arg_found = true;
+            params.no_tensor_first_split = true;
+        }
  
          if (is_op_set) {
              throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
@@ -200,10 +206,10 @@ struct split_strategy {
          // because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
          int i_split = -1;
          struct gguf_context * ctx_out = NULL;
-        auto new_ctx_out = [&]() {
+        auto new_ctx_out = [&](bool allow_no_tensors) {
              i_split++;
              if (ctx_out != NULL) {
-                if (gguf_get_n_tensors(ctx_out) == 0) {
+                if (gguf_get_n_tensors(ctx_out) == 0 && !allow_no_tensors) {
                      fprintf(stderr, "error: one of splits have 0 tensors. Maybe size or tensors limit is too small\n");
                      exit(EXIT_FAILURE);
                  }
@@ -220,7 +226,12 @@ struct split_strategy {
          };
  
          // initialize ctx_out for the first split
-        new_ctx_out();
+        new_ctx_out(false);
+
+        // skip first split if no_tensor_first_split is set
+        if (params.no_tensor_first_split) {
+            new_ctx_out(true);
+        }
  
          // process tensors one by one
          size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
@@ -230,7 +241,7 @@ struct split_strategy {
              size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
              size_t next_tensors_size = curr_tensors_size + n_bytes;
              if (should_split(i, next_tensors_size)) {
-                new_ctx_out();
+                new_ctx_out(false);
                  curr_tensors_size = n_bytes;
              } else {
                  curr_tensors_size = next_tensors_size;
diff --git a/examples/gguf-split/tests.sh b/examples/gguf-split/tests.sh

index 57588204dd62943c27eb85bad5b3ca6d5bfcea1b..7ca6fa7f20de88dfd08864d7b37c5b7d6591fed5 100755 (executable)
--- a/examples/gguf-split/tests.sh
+++ b/examples/gguf-split/tests.sh
@@ -55,15 +55,15 @@ $MAIN --model $WORK_PATH/ggml-model-merge.gguf --random-prompt --n-predict 32
  echo PASS
  echo
  
-# 4. Split with no tensor in metadata
-#$SPLIT --split-max-tensors 32 --no-tensor-in-metadata $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors
-#echo PASS
-#echo
+# 4. Split with no tensors in the first split
+$SPLIT --split-max-tensors 32 --no-tensor-first-split $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors
+echo PASS
+echo
  
  # 4b. Test the sharded model is loading properly
-#$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf --random-prompt --n-predict 32
-#echo PASS
-#echo
+$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00007.gguf --random-prompt --n-predict 32
+echo PASS
+echo
  
  # 5. Merge
  #$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf $WORK_PATH/ggml-model-merge-2.gguf
diff --git a/ggml.c b/ggml.c

index 74ecd592791679c68939b0439c6b3be53d966d9a..82179a1257f30f2b64dced028ff994732694275d 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -21139,7 +21139,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
      }
  
      // read the tensor infos
-    {
+    if (ctx->header.n_tensors > 0) {
          ctx->infos = GGML_CALLOC(ctx->header.n_tensors, sizeof(struct gguf_tensor_info));
  
          for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
author	Xuan Son Nguyen <redacted>
	Sat, 4 May 2024 16:56:22 +0000 (18:56 +0200)
committer	GitHub <redacted>
	Sat, 4 May 2024 16:56:22 +0000 (18:56 +0200)
examples/gguf-split/gguf-split.cpp		patch \| blob \| history
examples/gguf-split/tests.sh		patch \| blob \| history
ggml.c		patch \| blob \| history