k-quants : remove unnecessary tensor shape restrictions (#2811)

author Georgi Gerganov <redacted>

Sat, 26 Aug 2023 14:37:35 +0000 (17:37 +0300)

committer GitHub <redacted>

Sat, 26 Aug 2023 14:37:35 +0000 (17:37 +0300)
author Georgi Gerganov <redacted>
Sat, 26 Aug 2023 14:37:35 +0000 (17:37 +0300)
committer GitHub <redacted>
Sat, 26 Aug 2023 14:37:35 +0000 (17:37 +0300)
diff --git a/llama.cpp b/llama.cpp

index 52fcaceff95253b250ccfffd7b72cc75fa004c88..59105db1c990ce9a95861b825edc8f319fe36f42 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -4762,8 +4762,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
  
              if (name == tn(LLM_TENSOR_OUTPUT, "weight")) {
                  int nx = tensor->ne[0];
-                int ny = tensor->ne[1];
-                if (nx % QK_K == 0 && ny % QK_K == 0) {
+                if (nx % QK_K == 0) {
                      new_type = GGML_TYPE_Q6_K;
                  }
              } else if (name.find("attn_v.weight") != std::string::npos) {
@@ -4812,8 +4811,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                  new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K) {
                  int nx = tensor->ne[0];
                  int ny = tensor->ne[1];
-                if (nx % QK_K != 0 || ny % QK_K != 0) {
-                    LLAMA_LOG_INFO("\n\nTensor sizes %d x %d are not divisible by %d, required for k-quants.\n",nx,ny,QK_K);
+                if (nx % QK_K != 0) {
+                    LLAMA_LOG_WARN("\n\n%s : tensor cols %d x %d are not divisible by %d, required for k-quants\n", __func__, nx, ny, QK_K);
                      convert_incompatible_tensor = true;
                  }
              }
author	Georgi Gerganov <redacted>
	Sat, 26 Aug 2023 14:37:35 +0000 (17:37 +0300)
committer	GitHub <redacted>
	Sat, 26 Aug 2023 14:37:35 +0000 (17:37 +0300)