kv-cache : fix find_slot to not search for continuous slot (#15638)

author Georgi Gerganov <redacted>

Thu, 28 Aug 2025 14:09:05 +0000 (17:09 +0300)

committer GitHub <redacted>

Thu, 28 Aug 2025 14:09:05 +0000 (17:09 +0300)
author Georgi Gerganov <redacted>
Thu, 28 Aug 2025 14:09:05 +0000 (17:09 +0300)
committer GitHub <redacted>
Thu, 28 Aug 2025 14:09:05 +0000 (17:09 +0300)
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp

index 4485f78d5f5330bacac182aec38cd884583cf3ab..f1c6918738684c1ace92fa7c010a437423791abc 100644 (file)
--- a/src/llama-kv-cache.cpp
+++ b/src/llama-kv-cache.cpp
@@ -540,7 +540,7 @@ llama_kv_cache::slot_info_vec_t llama_kv_cache::prepare(const std::vector<llama_
  
      for (const auto & ubatch : ubatches) {
          // only find a suitable slot for the ubatch. don't modify the cells yet
-        const auto sinfo_new = find_slot(ubatch, true);
+        const auto sinfo_new = find_slot(ubatch, false);
          if (sinfo_new.empty()) {
              success = false;
              break;
author	Georgi Gerganov <redacted>
	Thu, 28 Aug 2025 14:09:05 +0000 (17:09 +0300)
committer	GitHub <redacted>
	Thu, 28 Aug 2025 14:09:05 +0000 (17:09 +0300)