]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
server: friendlier error msg when ctx < input (#18174)
authorAman Gupta <redacted>
Fri, 19 Dec 2025 11:10:00 +0000 (19:10 +0800)
committerGitHub <redacted>
Fri, 19 Dec 2025 11:10:00 +0000 (12:10 +0100)
* llama-server: friendlier error msg when ctx < input

This PR adds formatted strings to the server's send_error function

* llama-server: use string_format inline

* fix test

tools/server/server-context.cpp

index def57d0252c8c9634cd8dd007794d35b3779803d..9228fba9f578a6c6beacf02d146b1874cc316989 100644 (file)
@@ -1974,19 +1974,33 @@ struct server_context_impl {
 
                         if (!slot.can_split()) {
                             if (slot.task->n_tokens() > n_ubatch) {
-                                send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER);
+                                send_error(slot,
+                                           string_format(
+                                               "input (%d tokens) is too large to process. increase the physical batch "
+                                               "size (current batch size: %d)",
+                                               slot.task->n_tokens(), n_ubatch),
+                                           ERROR_TYPE_SERVER);
                                 slot.release();
                                 continue;
                             }
 
                             if (slot.task->n_tokens() > slot.n_ctx) {
-                                send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_EXCEED_CONTEXT_SIZE);
+                                send_error(
+                                    slot,
+                                    string_format(
+                                        "input (%d tokens) is larger than the max context size (%d tokens). skipping",
+                                        slot.task->n_tokens(), slot.n_ctx),
+                                    ERROR_TYPE_EXCEED_CONTEXT_SIZE);
                                 slot.release();
                                 continue;
                             }
                         } else {
                             if (slot.task->n_tokens() >= slot.n_ctx) {
-                                send_error(slot, "the request exceeds the available context size, try increasing it", ERROR_TYPE_EXCEED_CONTEXT_SIZE);
+                                send_error(slot,
+                                           string_format("request (%d tokens) exceeds the available context size (%d "
+                                                         "tokens), try increasing it",
+                                                         slot.task->n_tokens(), slot.n_ctx),
+                                           ERROR_TYPE_EXCEED_CONTEXT_SIZE);
                                 slot.release();
                                 continue;
                             }