memory : remove unused tmp_buf (#19199)

author Daniel Bevenius <redacted>

Fri, 30 Jan 2026 09:37:06 +0000 (10:37 +0100)

committer GitHub <redacted>

Fri, 30 Jan 2026 09:37:06 +0000 (10:37 +0100)
author Daniel Bevenius <redacted>
Fri, 30 Jan 2026 09:37:06 +0000 (10:37 +0100)
committer GitHub <redacted>
Fri, 30 Jan 2026 09:37:06 +0000 (10:37 +0100)
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp

index f3c9b49f30af020c28849b6b00a94efcda4ecade..c35cd6761b193599f7a4ffb44b508ae862c7cb3e 100644 (file)
--- a/src/llama-kv-cache.cpp
+++ b/src/llama-kv-cache.cpp
@@ -1772,8 +1772,6 @@ void llama_kv_cache::state_write_data(llama_io_write_i & io, const cell_ranges_t
      io.write(&v_trans, sizeof(v_trans));
      io.write(&n_layer, sizeof(n_layer));
  
-    std::vector<uint8_t> tmp_buf;
-
      // Iterate and write all the keys first, each row is a cell
      // Get whole range at a time
      for (const auto & layer : layers) {
@@ -1791,7 +1789,7 @@ void llama_kv_cache::state_write_data(llama_io_write_i & io, const cell_ranges_t
          const uint64_t k_size_row = ggml_row_size(k->type, n_embd_k_gqa);
          io.write(&k_size_row, sizeof(k_size_row));
  
-        // Read each range of cells of k_size length each into tmp_buf and write out
+        // Read each range of cells of k_size length and write out
          for (const auto & range : cr.data) {
              const size_t range_size = range.second - range.first;
              const size_t buf_size = range_size * k_size_row;
@@ -1818,7 +1816,7 @@ void llama_kv_cache::state_write_data(llama_io_write_i & io, const cell_ranges_t
              const uint64_t v_size_row = ggml_row_size(v->type, n_embd_v_gqa);
              io.write(&v_size_row, sizeof(v_size_row));
  
-            // Read each range of cells of v_size length each into tmp_buf and write out
+            // Read each range of cells of v_size length and write out
              for (const auto & range : cr.data) {
                  const size_t range_size = range.second - range.first;
                  const size_t buf_size = range_size * v_size_row;
@@ -1852,7 +1850,7 @@ void llama_kv_cache::state_write_data(llama_io_write_i & io, const cell_ranges_t
  
              // For each row, we get the element values of each cell
              for (uint32_t j = 0; j < n_embd_v_gqa; ++j) {
-                // Read each range of cells of v_size_el length each into tmp_buf and write out
+                // Read each range of cells of v_size_el length and write out
                  for (const auto & range : cr.data) {
                      const size_t range_size = range.second - range.first;
                      const size_t src_offset = (range.first + j * kv_size) * v_size_el;
diff --git a/src/llama-memory-recurrent.cpp b/src/llama-memory-recurrent.cpp

index 812bf2530491a74433b41c114601aa793cffd76a..c8361b18125c585685ff2553546c7557b6747254 100644 (file)
--- a/src/llama-memory-recurrent.cpp
+++ b/src/llama-memory-recurrent.cpp
@@ -785,8 +785,6 @@ void llama_memory_recurrent::state_write_data(llama_io_write_i & io, const std::
      io.write(&s_trans, sizeof(s_trans));
      io.write(&n_layer,   sizeof(n_layer));
  
-    std::vector<uint8_t> tmp_buf;
-
      // Iterate and write all the keys first, each row is a cell
      // Get whole range at a time
      for (uint32_t il = 0; il < n_layer; ++il) {
@@ -801,7 +799,7 @@ void llama_memory_recurrent::state_write_data(llama_io_write_i & io, const std::
          const uint64_t r_size_row = ggml_row_size(r_l[il]->type, hparams.n_embd_r());
          io.write(&r_size_row, sizeof(r_size_row));
  
-        // Read each range of cells of k_size length each into tmp_buf and write out
+        // Read each range of cells of k_size length and write out
          for (const auto & range : cell_ranges) {
              const size_t range_size = range.second - range.first;
              const size_t buf_size = range_size * r_size_row;
@@ -822,7 +820,7 @@ void llama_memory_recurrent::state_write_data(llama_io_write_i & io, const std::
              const uint64_t s_size_row = ggml_row_size(s_l[il]->type, hparams.n_embd_s());
              io.write(&s_size_row, sizeof(s_size_row));
  
-            // Read each range of cells of s_size length each into tmp_buf and write out
+            // Read each range of cells of s_size length and write out
              for (const auto & range : cell_ranges) {
                  const size_t range_size = range.second - range.first;
                  const size_t buf_size = range_size * s_size_row;
@@ -851,7 +849,7 @@ void llama_memory_recurrent::state_write_data(llama_io_write_i & io, const std::
  
              // For each row, we get the element values of each cell
              for (uint32_t j = 0; j < n_embd_s; ++j) {
-                // Read each range of cells of v_size_el length each into tmp_buf and write out
+                // Read each range of cells of v_size_el length and write out
                  for (const auto & range : cell_ranges) {
                      const size_t range_size = range.second - range.first;
                      const size_t src_offset = (range.first + j * mem_size) * s_size_el;
author	Daniel Bevenius <redacted>
	Fri, 30 Jan 2026 09:37:06 +0000 (10:37 +0100)
committer	GitHub <redacted>
	Fri, 30 Jan 2026 09:37:06 +0000 (10:37 +0100)
src/llama-kv-cache.cpp		patch \| blob \| history
src/llama-memory-recurrent.cpp		patch \| blob \| history