llama : rename n_ctx -> cache.size, less confusing (#0)

author Georgi Gerganov <redacted>

Thu, 23 May 2024 09:38:18 +0000 (12:38 +0300)

committer Georgi Gerganov <redacted>

Thu, 23 May 2024 09:38:18 +0000 (12:38 +0300)
author Georgi Gerganov <redacted>
Thu, 23 May 2024 09:38:18 +0000 (12:38 +0300)
committer Georgi Gerganov <redacted>
Thu, 23 May 2024 09:38:18 +0000 (12:38 +0300)
diff --git a/llama.cpp b/llama.cpp

index 37b3d58c6e7890b5dd8b94fc14fafebd5ac8d8c6..3e09a239000c007b45621f40439a767935507f33 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -2475,7 +2475,6 @@ static bool llama_kv_cache_init(
  static bool llama_kv_cache_find_slot(
             struct llama_kv_cache & cache,
          const struct llama_batch & batch) {
-    const uint32_t n_ctx    = cache.size;
      const uint32_t n_tokens = batch.n_tokens;
  
      if (cache.recurrent) {
@@ -2526,16 +2525,16 @@ static bool llama_kv_cache_find_slot(
      }
      // otherwise, one cell per token.
  
-    if (n_tokens > n_ctx) {
-        LLAMA_LOG_ERROR("%s: n_tokens=%d > n_ctx=%d\n", __func__, n_tokens, n_ctx);
+    if (n_tokens > cache.size) {
+        LLAMA_LOG_ERROR("%s: n_tokens=%d > cache.size=%d\n", __func__, n_tokens, cache.size);
          return false;
      }
  
      uint32_t n_tested = 0;
  
      while (true) {
-        if (cache.head + n_tokens > n_ctx) {
-            n_tested += n_ctx - cache.head;
+        if (cache.head + n_tokens > cache.size) {
+            n_tested += cache.size - cache.head;
              cache.head = 0;
              continue;
          }
@@ -2554,7 +2553,7 @@ static bool llama_kv_cache_find_slot(
              break;
          }
  
-        if (n_tested >= n_ctx) {
+        if (n_tested >= cache.size) {
              //LLAMA_LOG_ERROR("%s: failed to find a slot for %d tokens\n", __func__, n_tokens);
              return false;
          }
author	Georgi Gerganov <redacted>
	Thu, 23 May 2024 09:38:18 +0000 (12:38 +0300)
committer	Georgi Gerganov <redacted>
	Thu, 23 May 2024 09:38:18 +0000 (12:38 +0300)