convert : fix RWKV v6 model conversion (#10913)

author Molly Sophia <redacted>

Fri, 20 Dec 2024 09:44:58 +0000 (17:44 +0800)

committer GitHub <redacted>

Fri, 20 Dec 2024 09:44:58 +0000 (11:44 +0200)
author Molly Sophia <redacted>
Fri, 20 Dec 2024 09:44:58 +0000 (17:44 +0800)
committer GitHub <redacted>
Fri, 20 Dec 2024 09:44:58 +0000 (11:44 +0200)
diff --git a/common/arg.cpp b/common/arg.cpp

index c3d66efb5a701f571f3de403e90aa88001d9f08c..deb11378657f48229c958fa6eb020d6e71a2c45e 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -626,7 +626,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
          [](common_params & params) {
              params.ctx_shift = false;
          }
-    ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
+    ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
      add_opt(common_arg(
          {"--chunks"}, "N",
          string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index ecd69be6bbb07224e63d078643d43d3c53bfb0de..76ab11ebe8e2c92d4fc7d8a394d3e344f0bec7b6 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -3065,6 +3065,9 @@ class Rwkv6Model(Model):
          if new_name.endswith("time_mix_w2.weight"):
              data_torch = data_torch.permute(0, 2, 1)
  
+        if new_name.endswith("time_mix_decay.weight") or "lerp" in new_name:
+            data_torch = data_torch.squeeze()
+
          rescale_every_n_layers = self.hparams["rescale_every"]
          if rescale_every_n_layers > 0:
              if new_name.endswith("time_mix_output.weight") or new_name.endswith("channel_mix_value.weight"):
author	Molly Sophia <redacted>
	Fri, 20 Dec 2024 09:44:58 +0000 (17:44 +0800)
committer	GitHub <redacted>
	Fri, 20 Dec 2024 09:44:58 +0000 (11:44 +0200)
common/arg.cpp		patch \| blob \| history
convert_hf_to_gguf.py		patch \| blob \| history