llama : enable LLAMA_SET_ROWS=1 by default (#14959)

author Georgi Gerganov <redacted>

Sat, 2 Aug 2025 14:14:21 +0000 (17:14 +0300)

committer GitHub <redacted>

Sat, 2 Aug 2025 14:14:21 +0000 (17:14 +0300)
author Georgi Gerganov <redacted>
Sat, 2 Aug 2025 14:14:21 +0000 (17:14 +0300)
committer GitHub <redacted>
Sat, 2 Aug 2025 14:14:21 +0000 (17:14 +0300)
diff --git a/src/llama-context.cpp b/src/llama-context.cpp

index bd637f3dffe9c5436769422ef48f98490c7a47b3..958bcc0477f7b4d47dce72b559badae1c6b8e799 100644 (file)
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -105,7 +105,7 @@ llama_context::llama_context(
  
      {
          const char * LLAMA_SET_ROWS = getenv("LLAMA_SET_ROWS");
-        supports_set_rows = LLAMA_SET_ROWS ? (atoi(LLAMA_SET_ROWS) != 0) : false;
+        supports_set_rows = LLAMA_SET_ROWS ? (atoi(LLAMA_SET_ROWS) != 0) : supports_set_rows;
  
          if (!supports_set_rows && !cparams.kv_unified) {
              LLAMA_LOG_WARN("%s: non-unified KV cache requires ggml_set_rows() - forcing unified KV cache\n", __func__);
diff --git a/src/llama-context.h b/src/llama-context.h

index 7cfdc6a51731a31fde9c2555b4bdc47434aed915..25c143d56dfb2cc567e0abd021fca2237e74828c 100644 (file)
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -289,7 +289,7 @@ private:
  
      // env: LLAMA_SET_ROWS (temporary)
      // ref: https://github.com/ggml-org/llama.cpp/pull/14285
-    bool supports_set_rows = false;
+    bool supports_set_rows = true;
  
      // env: LLAMA_GRAPH_REUSE_DISABLE
      bool graph_reuse_disable = false;
diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp

index 321dc79fc36ab708a4ac96076b3fabf200568a3e..c741014cf375776e5e4df8a38865945507099497 100644 (file)
--- a/src/llama-kv-cache-unified.cpp
+++ b/src/llama-kv-cache-unified.cpp
@@ -193,7 +193,7 @@ llama_kv_cache_unified::llama_kv_cache_unified(
      debug = LLAMA_KV_CACHE_DEBUG ? atoi(LLAMA_KV_CACHE_DEBUG) : 0;
  
      const char * LLAMA_SET_ROWS = getenv("LLAMA_SET_ROWS");
-    supports_set_rows = LLAMA_SET_ROWS ? atoi(LLAMA_SET_ROWS) != 0 : 0;
+    supports_set_rows = LLAMA_SET_ROWS ? atoi(LLAMA_SET_ROWS) != 0 : supports_set_rows;
  
      if (!supports_set_rows) {
          // ref: https://github.com/ggml-org/llama.cpp/pull/14363
diff --git a/src/llama-kv-cache-unified.h b/src/llama-kv-cache-unified.h

index 3e28e346c3fcf8d1ec09fa15de1b2bd6c4dcb3b4..342a675962e2a85668d882888d98d4e71cf115c4 100644 (file)
--- a/src/llama-kv-cache-unified.h
+++ b/src/llama-kv-cache-unified.h
@@ -230,7 +230,7 @@ private:
  
      // env: LLAMA_SET_ROWS (temporary)
      // ref: https://github.com/ggml-org/llama.cpp/pull/14285
-    bool supports_set_rows = false;
+    bool supports_set_rows = true;
  
      const llama_swa_type swa_type = LLAMA_SWA_TYPE_NONE;
author	Georgi Gerganov <redacted>
	Sat, 2 Aug 2025 14:14:21 +0000 (17:14 +0300)
committer	GitHub <redacted>
	Sat, 2 Aug 2025 14:14:21 +0000 (17:14 +0300)
src/llama-context.cpp		patch \| blob \| history
src/llama-context.h		patch \| blob \| history
src/llama-kv-cache-unified.cpp		patch \| blob \| history
src/llama-kv-cache-unified.h		patch \| blob \| history