memory : correctly handle failure in apply() (#14438)

author Georgi Gerganov <redacted>

Mon, 30 Jun 2025 15:03:03 +0000 (18:03 +0300)

committer GitHub <redacted>

Mon, 30 Jun 2025 15:03:03 +0000 (18:03 +0300)
author Georgi Gerganov <redacted>
Mon, 30 Jun 2025 15:03:03 +0000 (18:03 +0300)
committer GitHub <redacted>
Mon, 30 Jun 2025 15:03:03 +0000 (18:03 +0300)
diff --git a/src/llama-kv-cache-unified-iswa.cpp b/src/llama-kv-cache-unified-iswa.cpp

index b9169299c0760c7dd5160683c715ed34ccc568df..d1f839b63aaf55fd61bd6a422f722ceca4adaac4 100644 (file)
--- a/src/llama-kv-cache-unified-iswa.cpp
+++ b/src/llama-kv-cache-unified-iswa.cpp
@@ -246,7 +246,7 @@ bool llama_kv_cache_unified_iswa_context::next() {
  }
  
  bool llama_kv_cache_unified_iswa_context::apply() {
-    assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
+    assert(!llama_memory_status_is_fail(status));
  
      bool res = true;
  
diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp

index 8517b722a9f80715ce011c7b8b158fbbaeed1ae5..7f7b162ffd7cefd524ab9676110d0c61051574ef 100644 (file)
--- a/src/llama-kv-cache-unified.cpp
+++ b/src/llama-kv-cache-unified.cpp
@@ -1776,7 +1776,7 @@ bool llama_kv_cache_unified_context::next() {
  }
  
  bool llama_kv_cache_unified_context::apply() {
-    assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
+    assert(!llama_memory_status_is_fail(status));
  
      // no ubatches -> this is a KV cache update
      if (ubatches.empty()) {
diff --git a/src/llama-memory-hybrid.cpp b/src/llama-memory-hybrid.cpp

index 15cde98d138a85a7d9ea02512e6f7c587d7667fc..67cbf955482354a1097c5454107acc4afe4c8c36 100644 (file)
--- a/src/llama-memory-hybrid.cpp
+++ b/src/llama-memory-hybrid.cpp
@@ -218,7 +218,7 @@ bool llama_memory_hybrid_context::next() {
  }
  
  bool llama_memory_hybrid_context::apply() {
-    assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
+    assert(!llama_memory_status_is_fail(status));
  
      bool res = true;
  
diff --git a/src/llama-memory-recurrent.cpp b/src/llama-memory-recurrent.cpp

index e52156bf308b66b417ff9743e216dca93660fab2..6ed84057ccfe25b36006faee916fbcd7218e95c9 100644 (file)
--- a/src/llama-memory-recurrent.cpp
+++ b/src/llama-memory-recurrent.cpp
@@ -1071,7 +1071,15 @@ bool llama_memory_recurrent_context::next() {
  }
  
  bool llama_memory_recurrent_context::apply() {
-    assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
+    assert(!llama_memory_status_is_fail(status));
+
+    // no ubatches -> this is an update
+    if (ubatches.empty()) {
+        // recurrent cache never performs updates
+        assert(status == LLAMA_MEMORY_STATUS_NO_UPDATE);
+
+        return true;
+    }
  
      mem->find_slot(ubatches[i_next]);
  
diff --git a/src/llama-memory.cpp b/src/llama-memory.cpp

index f1107672c6476411b04521db02379255328e7728..ca6844c32a76748cb278d90e1898dfa788dd1e3f 100644 (file)
--- a/src/llama-memory.cpp
+++ b/src/llama-memory.cpp
@@ -40,3 +40,20 @@ llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_me
      // if either status has an update, then the combined status has an update
      return has_update ? LLAMA_MEMORY_STATUS_SUCCESS : LLAMA_MEMORY_STATUS_NO_UPDATE;
  }
+
+bool llama_memory_status_is_fail(llama_memory_status status) {
+    switch (status) {
+        case LLAMA_MEMORY_STATUS_SUCCESS:
+        case LLAMA_MEMORY_STATUS_NO_UPDATE:
+            {
+                return false;
+            }
+        case LLAMA_MEMORY_STATUS_FAILED_PREPARE:
+        case LLAMA_MEMORY_STATUS_FAILED_COMPUTE:
+            {
+                return true;
+            }
+    }
+
+    return false;
+}
diff --git a/src/llama-memory.h b/src/llama-memory.h

index 16b7e5ee2484a05b293de97ee3816cb0bcab6239..e8ba336e8525d16b2cd277eb53a60c4c36ecbc39 100644 (file)
--- a/src/llama-memory.h
+++ b/src/llama-memory.h
@@ -31,6 +31,9 @@ enum llama_memory_status {
  // useful for implementing hybrid memory types (e.g. iSWA)
  llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_memory_status s1);
  
+// helper function for checking if a memory status indicates a failure
+bool llama_memory_status_is_fail(llama_memory_status status);
+
  // the interface for managing the memory context during batch processing
  // this interface is implemented per memory type. see:
  //   - llama_kv_cache_unified_context
author	Georgi Gerganov <redacted>
	Mon, 30 Jun 2025 15:03:03 +0000 (18:03 +0300)
committer	GitHub <redacted>
	Mon, 30 Jun 2025 15:03:03 +0000 (18:03 +0300)
src/llama-kv-cache-unified-iswa.cpp		patch \| blob \| history
src/llama-kv-cache-unified.cpp		patch \| blob \| history
src/llama-memory-hybrid.cpp		patch \| blob \| history
src/llama-memory-recurrent.cpp		patch \| blob \| history
src/llama-memory.cpp		patch \| blob \| history
src/llama-memory.h		patch \| blob \| history