From: Georgi Gerganov Date: Sat, 26 Aug 2023 14:37:35 +0000 (+0300) Subject: k-quants : remove unnecessary tensor shape restrictions (#2811) X-Git-Tag: gguf-v0.4.0~230 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=04f4b1eb10f3e25750ca3e530265ce2841730e6b;p=pkg%2Fggml%2Fsources%2Fllama.cpp k-quants : remove unnecessary tensor shape restrictions (#2811) --- diff --git a/llama.cpp b/llama.cpp index 52fcacef..59105db1 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4762,8 +4762,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s if (name == tn(LLM_TENSOR_OUTPUT, "weight")) { int nx = tensor->ne[0]; - int ny = tensor->ne[1]; - if (nx % QK_K == 0 && ny % QK_K == 0) { + if (nx % QK_K == 0) { new_type = GGML_TYPE_Q6_K; } } else if (name.find("attn_v.weight") != std::string::npos) { @@ -4812,8 +4811,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K) { int nx = tensor->ne[0]; int ny = tensor->ne[1]; - if (nx % QK_K != 0 || ny % QK_K != 0) { - LLAMA_LOG_INFO("\n\nTensor sizes %d x %d are not divisible by %d, required for k-quants.\n",nx,ny,QK_K); + if (nx % QK_K != 0) { + LLAMA_LOG_WARN("\n\n%s : tensor cols %d x %d are not divisible by %d, required for k-quants\n", __func__, nx, ny, QK_K); convert_incompatible_tensor = true; } }