server : add parse_special option to /tokenize endpoint (#14783)

author IsaacDynamo <redacted>

Mon, 21 Jul 2025 07:24:51 +0000 (09:24 +0200)

committer GitHub <redacted>

Mon, 21 Jul 2025 07:24:51 +0000 (10:24 +0300)
author IsaacDynamo <redacted>
Mon, 21 Jul 2025 07:24:51 +0000 (09:24 +0200)
committer GitHub <redacted>
Mon, 21 Jul 2025 07:24:51 +0000 (10:24 +0300)
diff --git a/tools/server/README.md b/tools/server/README.md

index e29511cb1b457b7873401db5b770e5c9e50ab4e4..aa07f1ef5b1776e654224579a19ed0531fd7b1f8 100644 (file)
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -575,6 +575,8 @@ These words will not be included in the completion, so make sure to add them to
  
  `add_special`: (Optional) Boolean indicating if special tokens, i.e. `BOS`, should be inserted.  Default: `false`
  
+`parse_special`: (Optional) Boolean indicating if special tokens should be tokenized. When `false` special tokens are treated as plaintext.  Default: `true`
+
  `with_pieces`: (Optional) Boolean indicating whether to return token pieces along with IDs.  Default: `false`
  
  **Response:**
diff --git a/tools/server/server.cpp b/tools/server/server.cpp

index 0afe213af1e47e44c48be1a15a8f2a163e4021bf..256a2928b826cfd4e5815a6b1fd193c03a71de8a 100644 (file)
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -4516,9 +4516,10 @@ int main(int argc, char ** argv) {
          json tokens_response = json::array();
          if (body.count("content") != 0) {
              const bool add_special = json_value(body, "add_special", false);
+            const bool parse_special = json_value(body, "parse_special", true);
              const bool with_pieces = json_value(body, "with_pieces", false);
  
-            llama_tokens tokens = tokenize_mixed(ctx_server.vocab, body.at("content"), add_special, true);
+            llama_tokens tokens = tokenize_mixed(ctx_server.vocab, body.at("content"), add_special, parse_special);
  
              if (with_pieces) {
                  for (const auto& token : tokens) {
author	IsaacDynamo <redacted>
	Mon, 21 Jul 2025 07:24:51 +0000 (09:24 +0200)
committer	GitHub <redacted>
	Mon, 21 Jul 2025 07:24:51 +0000 (10:24 +0300)
tools/server/README.md		patch \| blob \| history
tools/server/server.cpp		patch \| blob \| history