From: IsaacDynamo Date: Mon, 21 Jul 2025 07:24:51 +0000 (+0200) Subject: server : add parse_special option to /tokenize endpoint (#14783) X-Git-Tag: upstream/0.0.6073~125 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=b4efd77f8ab407836ca73a5176f041650c5b2411;p=pkg%2Fggml%2Fsources%2Fllama.cpp server : add parse_special option to /tokenize endpoint (#14783) --- diff --git a/tools/server/README.md b/tools/server/README.md index e29511cb..aa07f1ef 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -575,6 +575,8 @@ These words will not be included in the completion, so make sure to add them to `add_special`: (Optional) Boolean indicating if special tokens, i.e. `BOS`, should be inserted. Default: `false` +`parse_special`: (Optional) Boolean indicating if special tokens should be tokenized. When `false` special tokens are treated as plaintext. Default: `true` + `with_pieces`: (Optional) Boolean indicating whether to return token pieces along with IDs. Default: `false` **Response:** diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 0afe213a..256a2928 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -4516,9 +4516,10 @@ int main(int argc, char ** argv) { json tokens_response = json::array(); if (body.count("content") != 0) { const bool add_special = json_value(body, "add_special", false); + const bool parse_special = json_value(body, "parse_special", true); const bool with_pieces = json_value(body, "with_pieces", false); - llama_tokens tokens = tokenize_mixed(ctx_server.vocab, body.at("content"), add_special, true); + llama_tokens tokens = tokenize_mixed(ctx_server.vocab, body.at("content"), add_special, parse_special); if (with_pieces) { for (const auto& token : tokens) {