From: Ishaan Gandhi Date: Thu, 13 Mar 2025 10:10:05 +0000 (-0400) Subject: server : fix crash when using verbose output with input tokens that are not in printa... X-Git-Tag: upstream/0.0.5028~148 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=2048b5913d51beab82dfe29955f9008130b936c0;p=pkg%2Fggml%2Fsources%2Fllama.cpp server : fix crash when using verbose output with input tokens that are not in printable range (#12178) (#12338) * Fix DOS index bug * Remove new APIs * remove extra line * Remove from API * Add extra newline * Update examples/server/server.cpp --------- Co-authored-by: Xuan-Son Nguyen --- diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 8cb8d003..ce019547 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2040,6 +2040,18 @@ struct server_context { return ret; } + bool can_be_detokenized(const struct llama_context * ctx, const std::vector & tokens) { + const llama_model * model = llama_get_model(ctx); + const llama_vocab * vocab = llama_model_get_vocab(model); + const int32_t n_vocab = llama_vocab_n_tokens(vocab); + for (const auto & token : tokens) { + if (token < 0 || token >= n_vocab) { + return false; + } + } + return true; + } + bool launch_slot_with_task(server_slot & slot, const server_task & task) { slot.reset(); slot.id_task = task.id; @@ -2054,6 +2066,11 @@ struct server_context { slot.lora = task.params.lora; } + bool can_detokenize = can_be_detokenized(ctx, slot.prompt_tokens); + if (!can_detokenize) { + send_error(task, "Prompt contains invalid tokens", ERROR_TYPE_INVALID_REQUEST); + return false; + } SLT_DBG(slot, "launching slot : %s\n", safe_json_to_str(slot.to_json()).c_str()); if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) {