]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
RWKV v6: Add time_mix_decay_w1/w2 in quant exclusion list (#9387)
authorMolly Sophia <redacted>
Tue, 10 Sep 2024 07:02:30 +0000 (15:02 +0800)
committerGitHub <redacted>
Tue, 10 Sep 2024 07:02:30 +0000 (10:02 +0300)
Signed-off-by: Molly Sophia <redacted>
convert_hf_to_gguf.py
src/llama.cpp

index 0a9bbc8294ef7a6f89fe161b1241ccfc0e25b1c5..ca473244eb929cb9f0f9861cd54c75f3dbd9a078 100755 (executable)
@@ -302,6 +302,8 @@ class Model:
                             gguf.MODEL_TENSOR.TIME_MIX_FIRST,
                             gguf.MODEL_TENSOR.TIME_MIX_W1,
                             gguf.MODEL_TENSOR.TIME_MIX_W2,
+                            gguf.MODEL_TENSOR.TIME_MIX_DECAY_W1,
+                            gguf.MODEL_TENSOR.TIME_MIX_DECAY_W2,
                         )
                     )
                     or not new_name.endswith(".weight")
index 39e20440eea83889697eca5e612c0220c0e328a9..ee27cbd1c3c44c72dce021d8d46f9f143f98fbab 100644 (file)
@@ -17530,6 +17530,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         quantize &= name.find("time_mix_first.weight") == std::string::npos;
         quantize &= name.find("time_mix_w1.weight") == std::string::npos;
         quantize &= name.find("time_mix_w2.weight") == std::string::npos;
+        quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos;
+        quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos;
 
         // do not quantize relative position bias (T5)
         quantize &= name.find("attn_rel_b.weight") == std::string::npos;