From: DannyDaemonic Date: Mon, 29 May 2023 12:13:40 +0000 (-0700) Subject: Work around for recalculating logits in cached prompts (Fixes #1585) (#1609) X-Git-Tag: gguf-v0.4.0~702 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=248367605ead6fb7c36d2bfb1ebd8f00a23f7c71;p=pkg%2Fggml%2Fsources%2Fllama.cpp Work around for recalculating logits in cached prompts (Fixes #1585) (#1609) * Work around for recalculating logits in cached prompts --- diff --git a/examples/main/main.cpp b/examples/main/main.cpp index c7c59153..6131f5b4 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -360,6 +360,12 @@ int main(int argc, char ** argv) { } } if (i > 0) { + // check if we've used up all the prompt but not all cached tokens + if (embd.size() == i && n_session_consumed < (int) session_tokens.size()) { + // force revaluation of the last token to recalculate logits + i--; + n_past--; + } embd.erase(embd.begin(), embd.begin() + i); } }