Fix incorrect use of ctx_split for bias tensors (#9063)

author Yoshi Suhara <redacted>

Sat, 17 Aug 2024 13:34:21 +0000 (06:34 -0700)

committer GitHub <redacted>

Sat, 17 Aug 2024 13:34:21 +0000 (15:34 +0200)
author Yoshi Suhara <redacted>
Sat, 17 Aug 2024 13:34:21 +0000 (06:34 -0700)
committer GitHub <redacted>
Sat, 17 Aug 2024 13:34:21 +0000 (15:34 +0200)
diff --git a/src/llama.cpp b/src/llama.cpp

index 7e9149eb9830249bd69afdbc22e5d0190d406e16..5ab65ea97defadb106f3f4a7d55eabde9f14d033 100644 (file)
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -6172,9 +6172,9 @@ static bool llm_load_tensors(
                              layer.ffn_up   = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP,   "weight", i), {n_embd,   n_ff});
  
                              // optional MLP bias
-                            layer.ffn_gate_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
-                            layer.ffn_down_b = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
-                            layer.ffn_up_b   = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP,   "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
+                            layer.ffn_gate_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
+                            layer.ffn_down_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
+                            layer.ffn_up_b   = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP,   "bias", i), {n_ff}, llama_model_loader::TENSOR_NOT_REQUIRED);
                          } else {
                              layer.ffn_gate_inp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert});
  
@@ -6498,7 +6498,7 @@ static bool llm_load_tensors(
                          layer.bv = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_V, "bias",   i), {n_embd_gqa});
  
                          layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}); //output_dens
-                        layer.bo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "bias",   i), {n_embd}); //output_dens
+                        layer.bo = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT, "bias",   i), {n_embd}); //output_dens
  
                          layer.attn_out_norm   = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm
                          layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias",   i), {n_embd});
author	Yoshi Suhara <redacted>
	Sat, 17 Aug 2024 13:34:21 +0000 (06:34 -0700)
committer	GitHub <redacted>
	Sat, 17 Aug 2024 13:34:21 +0000 (15:34 +0200)