RWKV v6: Add time_mix_decay_w1/w2 in quant exclusion list (#9387)

author Molly Sophia <redacted>

Tue, 10 Sep 2024 07:02:30 +0000 (15:02 +0800)

committer GitHub <redacted>

Tue, 10 Sep 2024 07:02:30 +0000 (10:02 +0300)
author Molly Sophia <redacted>
Tue, 10 Sep 2024 07:02:30 +0000 (15:02 +0800)
committer GitHub <redacted>
Tue, 10 Sep 2024 07:02:30 +0000 (10:02 +0300)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index 0a9bbc8294ef7a6f89fe161b1241ccfc0e25b1c5..ca473244eb929cb9f0f9861cd54c75f3dbd9a078 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -302,6 +302,8 @@ class Model:
                              gguf.MODEL_TENSOR.TIME_MIX_FIRST,
                              gguf.MODEL_TENSOR.TIME_MIX_W1,
                              gguf.MODEL_TENSOR.TIME_MIX_W2,
+                            gguf.MODEL_TENSOR.TIME_MIX_DECAY_W1,
+                            gguf.MODEL_TENSOR.TIME_MIX_DECAY_W2,
                          )
                      )
                      or not new_name.endswith(".weight")
diff --git a/src/llama.cpp b/src/llama.cpp

index 39e20440eea83889697eca5e612c0220c0e328a9..ee27cbd1c3c44c72dce021d8d46f9f143f98fbab 100644 (file)
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -17530,6 +17530,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
          quantize &= name.find("time_mix_first.weight") == std::string::npos;
          quantize &= name.find("time_mix_w1.weight") == std::string::npos;
          quantize &= name.find("time_mix_w2.weight") == std::string::npos;
+        quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos;
+        quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos;
  
          // do not quantize relative position bias (T5)
          quantize &= name.find("attn_rel_b.weight") == std::string::npos;
author	Molly Sophia <redacted>
	Tue, 10 Sep 2024 07:02:30 +0000 (15:02 +0800)
committer	GitHub <redacted>
	Tue, 10 Sep 2024 07:02:30 +0000 (10:02 +0300)
convert_hf_to_gguf.py		patch \| blob \| history
src/llama.cpp		patch \| blob \| history