throw std::runtime_error("cannot find tokenizer vocab in model file\n");
}
+ const uint32_t n_tokens = gguf_get_arr_n(ctx, token_idx);
+
const float * scores = nullptr;
const int score_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_SCORES).c_str());
if (score_idx != -1) {
+ const uint32_t n_scores = gguf_get_arr_n(ctx, score_idx);
+ if (n_scores < n_tokens) {
+ throw std::runtime_error("Index out of array bounds for scores (" + std::to_string(n_scores) + " < " + std::to_string(n_tokens) + ")\n");
+ }
scores = (const float * ) gguf_get_arr_data(ctx, score_idx);
}
const int * toktypes = nullptr;
const int toktype_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_TOKEN_TYPE).c_str());
if (toktype_idx != -1) {
+ const uint32_t n_toktypes = gguf_get_arr_n(ctx, toktype_idx);
+ if (n_toktypes < n_tokens) {
+ throw std::runtime_error("Index out of array bounds for toktypes (" + std::to_string(n_toktypes) + " < " + std::to_string(n_tokens) + ")\n");
+ }
toktypes = (const int * ) gguf_get_arr_data(ctx, toktype_idx);
}
- uint32_t n_tokens = gguf_get_arr_n(ctx, token_idx);
id_to_token.resize(n_tokens);
for (uint32_t i = 0; i < n_tokens; i++) {