main : fix session loading bug (#3400)

author Georgi Gerganov <redacted>

Wed, 11 Oct 2023 20:55:08 +0000 (23:55 +0300)

committer Georgi Gerganov <redacted>

Wed, 11 Oct 2023 20:55:41 +0000 (23:55 +0300)
author Georgi Gerganov <redacted>
Wed, 11 Oct 2023 20:55:08 +0000 (23:55 +0300)
committer Georgi Gerganov <redacted>
Wed, 11 Oct 2023 20:55:41 +0000 (23:55 +0300)
diff --git a/examples/main/main.cpp b/examples/main/main.cpp

index b39a67d979c8810f4fceb3abe26976095bfc6808..55f73356fb89a1cd154aae18b44ebeb57ec4abc6 100644 (file)
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -297,6 +297,9 @@ int main(int argc, char ** argv) {
              LOG_TEE("%s: session file matches %zu / %zu tokens of prompt\n",
                  __func__, n_matching_session_tokens, embd_inp.size());
          }
+
+        // remove any "future" tokens that we might have inherited from the previous session
+        llama_kv_cache_tokens_rm(ctx, n_matching_session_tokens, -1);
      }
  
      LOGLN(
@@ -545,9 +548,6 @@ int main(int argc, char ** argv) {
                  if (i > 0) {
                      embd.erase(embd.begin(), embd.begin() + i);
                  }
-
-                // remove any "future" tokens that we might have inherited from the session from the KV cache
-                llama_kv_cache_tokens_rm(ctx, n_past, -1);
              }
  
              // evaluate tokens in batches
author	Georgi Gerganov <redacted>
	Wed, 11 Oct 2023 20:55:08 +0000 (23:55 +0300)
committer	Georgi Gerganov <redacted>
	Wed, 11 Oct 2023 20:55:41 +0000 (23:55 +0300)