This reverts a single line from #5475
quantize &= !params->only_copy;
// do not quantize expert gating tensors
- quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_FFN_GATE_INP, "weight");
+ // NOTE: can't use LLM_TN here because the layer number is not known
+ quantize &= name.find("ffn_gate_inp.weight") == std::string::npos;
// do not quantize positional embeddings and token types (BERT)
quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_POS_EMBD, "weight");