From: Ishaan Gandhi <redacted>
Date: Thu, 13 Mar 2025 10:10:05 +0000 (-0400)
Subject: server : fix crash when using verbose output with input tokens that are not in printa... 
X-Git-Tag: upstream/0.0.5028~148
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=2048b5913d51beab82dfe29955f9008130b936c0;p=pkg%2Fggml%2Fsources%2Fllama.cpp

server : fix crash when using verbose output with input tokens that are not in printable range (#12178) (#12338)

* Fix DOS index bug

* Remove new APIs

* remove extra line

* Remove from API

* Add extra newline

* Update examples/server/server.cpp

---------

Co-authored-by: Xuan-Son Nguyen <redacted>
---

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 8cb8d003..ce019547 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2040,6 +2040,18 @@ struct server_context {
         return ret;
     }
 
+    bool can_be_detokenized(const struct llama_context * ctx, const std::vector<llama_token> & tokens) {
+        const llama_model * model = llama_get_model(ctx);
+        const llama_vocab * vocab = llama_model_get_vocab(model);
+        const int32_t n_vocab = llama_vocab_n_tokens(vocab);
+        for (const auto & token : tokens) {
+            if (token < 0 || token >= n_vocab) {
+                return false;
+            }
+        }
+        return true;
+    }
+
     bool launch_slot_with_task(server_slot & slot, const server_task & task) {
         slot.reset();
         slot.id_task       = task.id;
@@ -2054,6 +2066,11 @@ struct server_context {
             slot.lora = task.params.lora;
         }
 
+        bool can_detokenize = can_be_detokenized(ctx, slot.prompt_tokens);
+        if (!can_detokenize) {
+            send_error(task, "Prompt contains invalid tokens", ERROR_TYPE_INVALID_REQUEST);
+            return false;
+        }
         SLT_DBG(slot, "launching slot : %s\n", safe_json_to_str(slot.to_json()).c_str());
 
         if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) {