common : revert showing control tokens by default for server (#6860)

author Kyle Mistele <redacted>

Wed, 24 Apr 2024 10:15:29 +0000 (05:15 -0500)

committer GitHub <redacted>

Wed, 24 Apr 2024 10:15:29 +0000 (13:15 +0300)
author Kyle Mistele <redacted>
Wed, 24 Apr 2024 10:15:29 +0000 (05:15 -0500)
committer GitHub <redacted>
Wed, 24 Apr 2024 10:15:29 +0000 (13:15 +0300)
diff --git a/common/common.cpp b/common/common.cpp

index a0d1f8d59cb62ff13692fbfbaab351d0c3684864..97f55b053eee116494b6f2e359b9f28de5170c25 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2328,12 +2328,12 @@ std::vector<llama_token> llama_tokenize(
      return result;
  }
  
-std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
+std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
      std::vector<char> result(8, 0);
-    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
+    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
      if (n_tokens < 0) {
          result.resize(-n_tokens);
-        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
+        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
          GGML_ASSERT(check == -n_tokens);
      } else {
          result.resize(n_tokens);
diff --git a/common/common.h b/common/common.h

index cca44268e6df5d89b716414cc27941beca8a555c..157b54a3e9e0824ec4f02779ca5c4d193553c8fe 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -237,11 +237,12 @@ std::vector<llama_token> llama_tokenize(
                          bool   add_special,
                          bool   parse_special = false);
  
-// tokenizes a token into a piece
+// tokenizes a token into a piece, optionally renders special/control tokens
  // should work similar to Python's `tokenizer.id_to_piece`
  std::string llama_token_to_piece(
          const struct llama_context * ctx,
-                       llama_token   token);
+                       llama_token   token,
+                       bool          special = true);
  
  // TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
  //       that takes into account the tokenizer type and decides how to handle the leading space
diff --git a/examples/server/server.cpp b/examples/server/server.cpp

index 68c63f9f1b1a04c055580525c8392109db19c710..3acbd17df2ac7c293d15b2776438fb839a2682e3 100644 (file)
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1117,7 +1117,7 @@ struct server_context {
  
      bool process_token(completion_token_output & result, server_slot & slot) {
          // remember which tokens were sampled - used for repetition penalties during sampling
-        const std::string token_str = llama_token_to_piece(ctx, result.tok);
+        const std::string token_str = llama_token_to_piece(ctx, result.tok, false);
          slot.sampled = result.tok;
  
          // search stop word and delete it
author	Kyle Mistele <redacted>
	Wed, 24 Apr 2024 10:15:29 +0000 (05:15 -0500)
committer	GitHub <redacted>
	Wed, 24 Apr 2024 10:15:29 +0000 (13:15 +0300)
common/common.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
examples/server/server.cpp		patch \| blob \| history