gpt-2 : fix qunatize tool to quantize the "lm_head" tensor

author Georgi Gerganov <redacted>

Thu, 30 Mar 2023 21:34:14 +0000 (00:34 +0300)

committer Georgi Gerganov <redacted>

Thu, 30 Mar 2023 21:34:14 +0000 (00:34 +0300)
author Georgi Gerganov <redacted>
Thu, 30 Mar 2023 21:34:14 +0000 (00:34 +0300)
committer Georgi Gerganov <redacted>
Thu, 30 Mar 2023 21:34:14 +0000 (00:34 +0300)
diff --git a/examples/gpt-2/quantize.cpp b/examples/gpt-2/quantize.cpp

index 3cc48ea3909dbe6fb9b8f23f1a6416cafce6c991..693b0787cb6701118c02911e1bce146dc331f21b 100644 (file)
--- a/examples/gpt-2/quantize.cpp
+++ b/examples/gpt-2/quantize.cpp
@@ -162,6 +162,7 @@ bool gpt2_model_quantize(const std::string & fname_inp, const std::string & fnam
              // regexes of tensor names to be quantized
              const std::vector<std::string> k_names = {
                  "model/wte",
+                "model/lm_head",
                  "model/h.*/attn/c_attn/w",
                  "model/h.*/attn/c_proj/w",
                  "model/h.*/mlp/c_fc/w",
author	Georgi Gerganov <redacted>
	Thu, 30 Mar 2023 21:34:14 +0000 (00:34 +0300)
committer	Georgi Gerganov <redacted>
	Thu, 30 Mar 2023 21:34:14 +0000 (00:34 +0300)