llama : add early return for empty range (#8327)

author Daniel Bevenius <redacted>

Sat, 6 Jul 2024 07:22:16 +0000 (09:22 +0200)

committer GitHub <redacted>

Sat, 6 Jul 2024 07:22:16 +0000 (10:22 +0300)
author Daniel Bevenius <redacted>
Sat, 6 Jul 2024 07:22:16 +0000 (09:22 +0200)
committer GitHub <redacted>
Sat, 6 Jul 2024 07:22:16 +0000 (10:22 +0300)
diff --git a/src/llama.cpp b/src/llama.cpp

index b770ca5bc33fc6621b653c49e07e35274e24cb1e..b39906fd53b6f11adfa0e928171f931929d78a39 100644 (file)
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -3260,6 +3260,8 @@ static void llama_kv_cache_seq_add(
  
      if (p0 < 0) p0 = 0;
      if (p1 < 0) p1 = std::numeric_limits<llama_pos>::max();
+    // If there is no range then return early to avoid looping over the cache.
+    if (p0 == p1) return;
  
      if (cache.recurrent) {
          // for Mamba-like models, only the pos needs to be shifted
@@ -3304,6 +3306,8 @@ static void llama_kv_cache_seq_div(
                            int   d) {
      if (p0 < 0) p0 = 0;
      if (p1 < 0) p1 = std::numeric_limits<llama_pos>::max();
+    // If there is no range then return early to avoid looping over the cache.
+    if (p0 == p1) return;
  
      if (cache.recurrent) {
          // for Mamba-like models, only the pos needs to be changed
author	Daniel Bevenius <redacted>
	Sat, 6 Jul 2024 07:22:16 +0000 (09:22 +0200)
committer	GitHub <redacted>
	Sat, 6 Jul 2024 07:22:16 +0000 (10:22 +0300)