From: Georgi Gerganov Date: Thu, 4 Jan 2024 17:56:33 +0000 (+0200) Subject: server : send token probs for "stream == false" (#4714) X-Git-Tag: upstream/0.0.4488~2720 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=012cf349aec8ffb47c9def5dc018240fa3721e8b;p=pkg%2Fggml%2Fsources%2Fllama.cpp server : send token probs for "stream == false" (#4714) --- diff --git a/examples/server/server.cpp b/examples/server/server.cpp index e45ea809..d1469fb0 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1265,7 +1265,7 @@ struct llama_server_context { std::vector probs_output = {}; const std::vector to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false); - size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); + size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size()); if (probs_pos < probs_stop_pos) { @@ -1325,7 +1325,7 @@ struct llama_server_context { probs = std::vector( slot.generated_token_probs.begin(), - slot.generated_token_probs.begin() + slot.sent_token_probs_index); + slot.generated_token_probs.end()); } res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs); }