From: Nexesenex Date: Fri, 22 Mar 2024 13:32:02 +0000 (+0100) Subject: llama : correction of the attn.v.weight quantization for IQ3_XS (#6209) X-Git-Tag: upstream/0.0.4488~1985 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=e80f06d2a194be62ab5b1cd7ef7c7a5b241dd4fb;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama : correction of the attn.v.weight quantization for IQ3_XS (#6209) IQ3_XS was not mentioned, IQ3_S and IQ3_M were present twice. That PR corrects this in the manner which was probably intended initially. --- diff --git a/llama.cpp b/llama.cpp index 9de4a860..91bd6b8d 100644 --- a/llama.cpp +++ b/llama.cpp @@ -12027,13 +12027,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) { new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS; } - else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) { - new_type = GGML_TYPE_Q4_K; - } - else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) { - new_type = GGML_TYPE_Q4_K; - } - else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) { + else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) && qs.model.hparams.n_gqa() >= 4) { new_type = GGML_TYPE_Q4_K; } else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {