else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && qs.model.hparams.n_gqa() >= 4) {
new_type = GGML_TYPE_Q4_K;
}
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && qs.model.hparams.n_gqa() >= 4) {
- new_type = GGML_TYPE_Q4_K;
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
+ new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_Q3_K : GGML_TYPE_IQ3_XXS;
}
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS) {
if (i_layer < n_layer/8) new_type = GGML_TYPE_Q4_K;
}
- //else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
- // if (i_layer < n_layer/8) new_type = GGML_TYPE_Q5_K;
- //}
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && !qs.has_imatrix) {
+ new_type = i_layer < n_layer/8 ? GGML_TYPE_Q4_K : GGML_TYPE_Q3_K;
+ }
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
new_type = i_layer < n_layer/16 ? GGML_TYPE_Q5_K
: arch != LLM_ARCH_FALCON || use_more_bits(i_layer, n_layer) ? GGML_TYPE_Q4_K