llama : fix comment for "output.weight" tensor

author Georgi Gerganov <redacted>

Fri, 21 Apr 2023 07:23:36 +0000 (10:23 +0300)

committer Georgi Gerganov <redacted>

Fri, 21 Apr 2023 07:24:02 +0000 (10:24 +0300)
author Georgi Gerganov <redacted>
Fri, 21 Apr 2023 07:23:36 +0000 (10:23 +0300)
committer Georgi Gerganov <redacted>
Fri, 21 Apr 2023 07:24:02 +0000 (10:24 +0300)
diff --git a/llama.cpp b/llama.cpp

index 4a646eb911621ea65f292c2213ccba207c2935ca..33ee4fbb5947461a0f2685a6fb7ce73354c22bf1 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -1618,8 +1618,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
          // quantize only 2D tensors
          quantize &= (tensor.ne.size() == 2);
  
-        // GG: uncomment this to keep the output layer in FP16
-        //if (tensor.name.rfind("output")) {
+        // uncomment this to keep the output layer in FP16
+        //if (tensor.name == "output.weight") {
          //    quantize = false;
          //}
author	Georgi Gerganov <redacted>
	Fri, 21 Apr 2023 07:23:36 +0000 (10:23 +0300)
committer	Georgi Gerganov <redacted>
	Fri, 21 Apr 2023 07:24:02 +0000 (10:24 +0300)