talk-llama : sync llama.cpp

author Georgi Gerganov <redacted>

Sun, 14 Jan 2024 16:08:20 +0000 (18:08 +0200)

committer Georgi Gerganov <redacted>

Sun, 14 Jan 2024 16:08:20 +0000 (18:08 +0200)
author Georgi Gerganov <redacted>
Sun, 14 Jan 2024 16:08:20 +0000 (18:08 +0200)
committer Georgi Gerganov <redacted>
Sun, 14 Jan 2024 16:08:20 +0000 (18:08 +0200)
diff --git a/examples/talk-llama/llama.cpp b/examples/talk-llama/llama.cpp

index 51821965e1b470a92d68d507c7d66652316ee26b..7af38718c4130d1c329a1f48e5af4b3fc68655da 100644 (file)
--- a/examples/talk-llama/llama.cpp
+++ b/examples/talk-llama/llama.cpp
@@ -2190,6 +2190,11 @@ struct llama_model_loader {
      LLM_KV      llm_kv    = LLM_KV(LLM_ARCH_UNKNOWN);
  
      llama_model_loader(const std::string & fname, bool use_mmap, const struct llama_model_kv_override * param_overrides_p) : file(fname.c_str(), "rb") {
+        int trace = 0;
+        if (getenv("LLAMA_TRACE")) {
+            trace = atoi(getenv("LLAMA_TRACE"));
+        }
+
          struct gguf_init_params params = {
              /*.no_alloc = */ true,
              /*.ctx      = */ &ctx_meta,
@@ -2242,11 +2247,10 @@ struct llama_model_loader {
                      type_max   = type;
                  }
  
-                // TODO: make runtime configurable
-#if 0
-                struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
-                LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, ggml_get_name(meta), ggml_type_name(type), llama_format_tensor_shape(meta).c_str());
-#endif
+                if (trace > 0) {
+                    struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
+                    LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, ggml_get_name(meta), ggml_type_name(type), llama_format_tensor_shape(meta).c_str());
+                }
              }
  
              switch (type_max) {
@@ -6451,15 +6455,15 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) {
  static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
      static const char * hex = "0123456789ABCDEF";
      switch (llama_vocab_get_type(vocab)) {
-    case LLAMA_VOCAB_TYPE_SPM: {
-        const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
-        return vocab.token_to_id.at(buf);
-    }
-    case LLAMA_VOCAB_TYPE_BPE: {
-        return vocab.token_to_id.at(bytes_to_unicode_bpe(ch));
-    }
-    default:
-        GGML_ASSERT(false);
+        case LLAMA_VOCAB_TYPE_SPM: {
+            const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
+            return vocab.token_to_id.at(buf);
+        }
+        case LLAMA_VOCAB_TYPE_BPE: {
+            return vocab.token_to_id.at(bytes_to_unicode_bpe(ch));
+        }
+        default:
+            GGML_ASSERT(false);
      }
  }
  
@@ -7095,7 +7099,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
                          }
  
  #ifdef PRETOKENIZERDEBUG
-                        LLAMA_LOG_WARN(TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
+                        LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
  #endif
                          llm_tokenizer_spm tokenizer(vocab);
                          llama_escape_whitespace(raw_text);
@@ -7116,7 +7120,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
                          auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length);
  
  #ifdef PRETOKENIZERDEBUG
-                        LLAMA_LOG_WARN(TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
+                        LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
  #endif
                          llm_tokenizer_bpe tokenizer(vocab);
                          tokenizer.tokenize(raw_text, output);
author	Georgi Gerganov <redacted>
	Sun, 14 Jan 2024 16:08:20 +0000 (18:08 +0200)
committer	Georgi Gerganov <redacted>
	Sun, 14 Jan 2024 16:08:20 +0000 (18:08 +0200)