From: Georgi Gerganov Date: Fri, 3 Apr 2026 06:07:01 +0000 (+0300) Subject: (revert) kv-cache : do not quantize SWA KV cache (#21332) X-Git-Tag: upstream/0.0.8681~37 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=39b27f0da0271c06986cb31b68bc0fe68e780616;p=pkg%2Fggml%2Fsources%2Fllama.cpp (revert) kv-cache : do not quantize SWA KV cache (#21332) This reverts commit 17193cce34036a6488b092ca79313d4ee1f895f5. --- diff --git a/src/llama-kv-cache-iswa.cpp b/src/llama-kv-cache-iswa.cpp index 15b3fe16e..26e2cb427 100644 --- a/src/llama-kv-cache-iswa.cpp +++ b/src/llama-kv-cache-iswa.cpp @@ -66,9 +66,8 @@ llama_kv_cache_iswa::llama_kv_cache_iswa( LLAMA_LOG_INFO("%s: creating SWA KV cache, size = %u cells\n", __func__, size_swa); - // note: the SWA cache is never quantized because it is relatively small kv_swa = std::make_unique( - model, GGML_TYPE_F16, GGML_TYPE_F16, + model, type_k, type_v, v_trans, offload, unified, size_swa, n_seq_max, n_pad, hparams.n_swa, hparams.swa_type, filter_swa, reuse); }