LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
llama_model_loader(const std::string & fname, bool use_mmap, const struct llama_model_kv_override * param_overrides_p) : file(fname.c_str(), "rb") {
+ int trace = 0;
+ if (getenv("LLAMA_TRACE")) {
+ trace = atoi(getenv("LLAMA_TRACE"));
+ }
+
struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ &ctx_meta,
type_max = type;
}
- // TODO: make runtime configurable
-#if 0
- struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
- LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, ggml_get_name(meta), ggml_type_name(type), llama_format_tensor_shape(meta).c_str());
-#endif
+ if (trace > 0) {
+ struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
+ LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, ggml_get_name(meta), ggml_type_name(type), llama_format_tensor_shape(meta).c_str());
+ }
}
switch (type_max) {
static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
static const char * hex = "0123456789ABCDEF";
switch (llama_vocab_get_type(vocab)) {
- case LLAMA_VOCAB_TYPE_SPM: {
- const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
- return vocab.token_to_id.at(buf);
- }
- case LLAMA_VOCAB_TYPE_BPE: {
- return vocab.token_to_id.at(bytes_to_unicode_bpe(ch));
- }
- default:
- GGML_ASSERT(false);
+ case LLAMA_VOCAB_TYPE_SPM: {
+ const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
+ return vocab.token_to_id.at(buf);
+ }
+ case LLAMA_VOCAB_TYPE_BPE: {
+ return vocab.token_to_id.at(bytes_to_unicode_bpe(ch));
+ }
+ default:
+ GGML_ASSERT(false);
}
}
}
#ifdef PRETOKENIZERDEBUG
- LLAMA_LOG_WARN(TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
+ LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
#endif
llm_tokenizer_spm tokenizer(vocab);
llama_escape_whitespace(raw_text);
auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length);
#ifdef PRETOKENIZERDEBUG
- LLAMA_LOG_WARN(TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
+ LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
#endif
llm_tokenizer_bpe tokenizer(vocab);
tokenizer.tokenize(raw_text, output);