size_t last_nl_pos = 0;
std::string generated_text;
+ std::string debug_generated_text;
llama_tokens generated_tokens;
// idx of draft tokens in the main batch
if (!only_metrics) {
res["prompt"] = ptask->tokens.detokenize(ctx, true);
- res["generated"] = generated_text;
+ res["generated"] = generated_text.empty() ? debug_generated_text : generated_text;
}
}
res->id_slot = slot.id;
res->index = slot.task->index;
+
+ // keep copy of last generated text for debugging purposes
+ if (slots_debug) {
+ slot.debug_generated_text = slot.generated_text;
+ }
+
// in stream mode, content and tokens are already in last partial chunk
if (slot.task->params.stream) {
res->content = "";