* Enable --no-context-shift for llama-perplexity example
Signed-off-by: Molly Sophia <redacted>
* RWKV 6: Fix error in ggml_cuda_op_bin_bcast
Signed-off-by: Molly Sophia <redacted>
---------
Signed-off-by: Molly Sophia <redacted>
[](common_params & params) {
params.ctx_shift = false;
}
- ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
+ ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
add_opt(common_arg(
{"--chunks"}, "N",
string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
if new_name.endswith("time_mix_w2.weight"):
data_torch = data_torch.permute(0, 2, 1)
+ if new_name.endswith("time_mix_decay.weight") or "lerp" in new_name:
+ data_torch = data_torch.squeeze()
+
rescale_every_n_layers = self.hparams["rescale_every"]
if rescale_every_n_layers > 0:
if new_name.endswith("time_mix_output.weight") or new_name.endswith("channel_mix_value.weight"):