llama.cpp : show model size and BPW on load (#3223)

author slaren <redacted>

Sun, 17 Sep 2023 12:33:28 +0000 (14:33 +0200)

committer GitHub <redacted>

Sun, 17 Sep 2023 12:33:28 +0000 (14:33 +0200)
author slaren <redacted>
Sun, 17 Sep 2023 12:33:28 +0000 (14:33 +0200)
committer GitHub <redacted>
Sun, 17 Sep 2023 12:33:28 +0000 (14:33 +0200)
diff --git a/llama.cpp b/llama.cpp

index 0b334b4e980e6dab950f9ddcfc7d859c95f72a64..79b48897d8bbeedc83d4a3423fc44634b12e9ff0 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -927,6 +927,7 @@ enum e_model {
  
  static const size_t kB = 1024;
  static const size_t MB = kB*kB;
+static const size_t GB = kB*kB*kB;
  
  // default hparams (LLaMA 7B)
  struct llama_hparams {
@@ -1280,6 +1281,7 @@ struct llama_model_loader {
      int n_created = 0;
  
      int64_t n_elements = 0;
+    size_t  n_bytes    = 0;
  
      bool use_mmap = false;
  
@@ -1312,6 +1314,7 @@ struct llama_model_loader {
              const char * name = gguf_get_tensor_name(ctx_gguf, i);
              struct ggml_tensor * t = ggml_get_tensor(ctx_meta, name);
              n_elements += ggml_nelements(t);
+            n_bytes    += ggml_nbytes(t);
          }
  
          LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n",
@@ -1909,7 +1912,12 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
      LLAMA_LOG_INFO("%s: freq_scale     = %g\n",     __func__, hparams.rope_freq_scale);
      LLAMA_LOG_INFO("%s: model type     = %s\n",     __func__, llama_model_type_name(model.type));
      LLAMA_LOG_INFO("%s: model ftype    = %s\n",     __func__, llama_model_ftype_name(model.ftype).c_str());
-    LLAMA_LOG_INFO("%s: model size     = %.2f B\n", __func__, ml.n_elements*1e-9);
+    LLAMA_LOG_INFO("%s: model params   = %.2f B\n", __func__, ml.n_elements*1e-9);
+    if (ml.n_bytes < GB) {
+        LLAMA_LOG_INFO("%s: model size     = %.2f MiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
+    } else {
+        LLAMA_LOG_INFO("%s: model size     = %.2f GiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
+    }
  
      // general kv
      LLAMA_LOG_INFO("%s: general.name   = %s\n",    __func__, model.name.c_str());
@@ -3495,7 +3503,7 @@ static struct ggml_cgraph * llm_build_starcoder(
  
          ggml_allocr_alloc(lctx.alloc, token);
          if (!ggml_allocr_is_measure(lctx.alloc)) {
-            memcpy(token->data, embd, N * n_embd * ggml_element_size(inpL));
+            memcpy(token->data, embd, N * n_embd * ggml_element_size(token));
          }
      }
author	slaren <redacted>
	Sun, 17 Sep 2023 12:33:28 +0000 (14:33 +0200)
committer	GitHub <redacted>
	Sun, 17 Sep 2023 12:33:28 +0000 (14:33 +0200)