From: Molly Sophia Date: Fri, 20 Dec 2024 09:44:58 +0000 (+0800) Subject: convert : fix RWKV v6 model conversion (#10913) X-Git-Tag: upstream/0.0.4488~120 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=0a11f8b7b5c39fdf6e91ef9674bc68ff08681af7;p=pkg%2Fggml%2Fsources%2Fllama.cpp convert : fix RWKV v6 model conversion (#10913) * Enable --no-context-shift for llama-perplexity example Signed-off-by: Molly Sophia * RWKV 6: Fix error in ggml_cuda_op_bin_bcast Signed-off-by: Molly Sophia --------- Signed-off-by: Molly Sophia --- diff --git a/common/arg.cpp b/common/arg.cpp index c3d66efb..deb11378 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -626,7 +626,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params) { params.ctx_shift = false; } - ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT")); + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT")); add_opt(common_arg( {"--chunks"}, "N", string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks), diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index ecd69be6..76ab11eb 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3065,6 +3065,9 @@ class Rwkv6Model(Model): if new_name.endswith("time_mix_w2.weight"): data_torch = data_torch.permute(0, 2, 1) + if new_name.endswith("time_mix_decay.weight") or "lerp" in new_name: + data_torch = data_torch.squeeze() + rescale_every_n_layers = self.hparams["rescale_every"] if rescale_every_n_layers > 0: if new_name.endswith("time_mix_output.weight") or new_name.endswith("channel_mix_value.weight"):