]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
server: save generated text for the /slots endpoint (for LLAMA_SERVER_SLOTS_DEBUG...
authormatteo <redacted>
Wed, 18 Feb 2026 17:53:37 +0000 (18:53 +0100)
committerGitHub <redacted>
Wed, 18 Feb 2026 17:53:37 +0000 (18:53 +0100)
* save generated text for the /slots endpoint

* update debug_generated_text only when LLAMA_SERVER_SLOTS_DEBUG > 0

* Apply suggestions from code review

---------

Co-authored-by: Matteo <redacted>
Co-authored-by: Xuan-Son Nguyen <redacted>
tools/server/server-context.cpp

index ceafcac179c58358da9f022e62034185ec552c56..8aab0d4c1b12fec7dd7128d053a1c8d2927151e4 100644 (file)
@@ -77,6 +77,7 @@ struct server_slot {
     size_t last_nl_pos = 0;
 
     std::string  generated_text;
+    std::string  debug_generated_text;
     llama_tokens generated_tokens;
 
     // idx of draft tokens in the main batch
@@ -425,7 +426,7 @@ struct server_slot {
 
             if (!only_metrics) {
                 res["prompt"] = ptask->tokens.detokenize(ctx, true);
-                res["generated"] = generated_text;
+                res["generated"] = generated_text.empty() ? debug_generated_text : generated_text;
             }
         }
 
@@ -1442,6 +1443,12 @@ private:
         res->id_slot = slot.id;
 
         res->index           = slot.task->index;
+
+        // keep copy of last generated text for debugging purposes
+        if (slots_debug) {
+            slot.debug_generated_text = slot.generated_text;
+        }
+
         // in stream mode, content and tokens are already in last partial chunk
         if (slot.task->params.stream) {
             res->content     = "";