From: Aman Gupta Date: Tue, 24 Mar 2026 04:57:57 +0000 (+0800) Subject: llama-fit: fix regex pattern for gate_up tensors (#20910) X-Git-Tag: upstream/0.0.8611~114 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=e852eb490136dfa335586b9dd6efb94b02c8fdd3;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama-fit: fix regex pattern for gate_up tensors (#20910) * llama-fit: fix regex pattern for gate_up tensors * Apply suggestions from code review Co-authored-by: Johannes Gäßler --------- Co-authored-by: Johannes Gäßler --- diff --git a/src/llama.cpp b/src/llama.cpp index 872e659ed..4a8a71b08 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -365,14 +365,14 @@ static void llama_params_fit_impl( case LAYER_FRACTION_ATTN: { static std::array patterns; if (patterns[il].empty()) { - patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(up|gate|down).*"; + patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(gate|up|gate_up|down).*"; } return patterns[il].c_str(); } case LAYER_FRACTION_UP: { static std::array patterns; if (patterns[il].empty()) { - patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(gate|down).*"; + patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(gate|gate_up|down).*"; } return patterns[il].c_str(); } @@ -386,7 +386,7 @@ static void llama_params_fit_impl( case LAYER_FRACTION_MOE: { static std::array patterns; if (patterns[il].empty()) { - patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(up|down|gate)_(ch|)exps"; + patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(up|down|gate_up|gate)_(ch|)exps"; } return patterns[il].c_str(); } @@ -480,7 +480,7 @@ static void llama_params_fit_impl( int64_t global_surplus_cpu_moe = 0; if (hp_nex > 0) { - const static std::string pattern_moe_all = "blk\\.\\d+\\.ffn_(up|down|gate)_(ch|)exps"; // matches all MoE tensors + const static std::string pattern_moe_all = "blk\\.\\d+\\.ffn_(up|down|gate_up|gate)_(ch|)exps"; // matches all MoE tensors ggml_backend_buffer_type_t cpu_buft = ggml_backend_cpu_buffer_type(); tensor_buft_overrides[0] = {pattern_moe_all.c_str(), cpu_buft}; tensor_buft_overrides[1] = {nullptr, nullptr};