From: Georgi Gerganov <redacted>
Date: Thu, 2 Apr 2026 08:54:05 +0000 (+0300)
Subject: kv-cache : do not quantize SWA KV cache (#21277)
X-Git-Tag: upstream/0.0.8681~48
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=17193cce34036a6488b092ca79313d4ee1f895f5;p=pkg%2Fggml%2Fsources%2Fllama.cpp

kv-cache : do not quantize SWA KV cache (#21277)
---

diff --git a/src/llama-kv-cache-iswa.cpp b/src/llama-kv-cache-iswa.cpp
index 26e2cb427..15b3fe16e 100644
--- a/src/llama-kv-cache-iswa.cpp
+++ b/src/llama-kv-cache-iswa.cpp
@@ -66,8 +66,9 @@ llama_kv_cache_iswa::llama_kv_cache_iswa(
 
     LLAMA_LOG_INFO("%s: creating     SWA KV cache, size = %u cells\n", __func__, size_swa);
 
+    // note: the SWA cache is never quantized because it is relatively small
     kv_swa = std::make_unique<llama_kv_cache>(
-            model, type_k, type_v,
+            model, GGML_TYPE_F16, GGML_TYPE_F16,
             v_trans, offload, unified, size_swa, n_seq_max, n_pad,
             hparams.n_swa, hparams.swa_type, filter_swa, reuse);
 }