grammar : revert the replacement of llama_token_to_piece with id_to_token (#4396)

author Xiang (Kevin) Li <redacted>

Sat, 9 Dec 2023 21:29:27 +0000 (16:29 -0500)

committer GitHub <redacted>

Sat, 9 Dec 2023 21:29:27 +0000 (23:29 +0200)
author Xiang (Kevin) Li <redacted>
Sat, 9 Dec 2023 21:29:27 +0000 (16:29 -0500)
committer GitHub <redacted>
Sat, 9 Dec 2023 21:29:27 +0000 (23:29 +0200)
diff --git a/llama.cpp b/llama.cpp

index b12bbd1b054422a4ef4a5492b2a39eca357a6a41..93d8f3e164a26790c86cc4ba10ad23ee1effc0e4 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -7503,7 +7503,7 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
  
      for (size_t i = 0; i < candidates->size; ++i) {
          const llama_token id    = candidates->data[i].id;
-        const std::string & piece = ctx->model.vocab.id_to_token[id].text;
+        const std::string piece = llama_token_to_piece(ctx, id);
          if (id == eos) {
              if (!allow_eos) {
                  candidates->data[i].logit = -INFINITY;
@@ -7715,7 +7715,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
          GGML_ASSERT(false);
      }
  
-    const std::string & piece = ctx->model.vocab.id_to_token[token].text;
+    const std::string piece = llama_token_to_piece(ctx, token);
  
      // Note terminating 0 in decoded string
      const auto   decoded     = decode_utf8(piece, grammar->partial_utf8);
author	Xiang (Kevin) Li <redacted>
	Sat, 9 Dec 2023 21:29:27 +0000 (16:29 -0500)
committer	GitHub <redacted>
	Sat, 9 Dec 2023 21:29:27 +0000 (23:29 +0200)