for (size_t i = 0; i < candidates->size; ++i) {
const llama_token id = candidates->data[i].id;
- const std::string & piece = ctx->model.vocab.id_to_token[id].text;
+ const std::string piece = llama_token_to_piece(ctx, id);
if (id == eos) {
if (!allow_eos) {
candidates->data[i].logit = -INFINITY;
GGML_ASSERT(false);
}
- const std::string & piece = ctx->model.vocab.id_to_token[token].text;
+ const std::string piece = llama_token_to_piece(ctx, token);
// Note terminating 0 in decoded string
const auto decoded = decode_utf8(piece, grammar->partial_utf8);