fix: llama arch implementation (#17665)

author Gilad S. <redacted>

Mon, 1 Dec 2025 20:21:13 +0000 (22:21 +0200)

committer GitHub <redacted>

Mon, 1 Dec 2025 20:21:13 +0000 (21:21 +0100)
author Gilad S. <redacted>
Mon, 1 Dec 2025 20:21:13 +0000 (22:21 +0200)
committer GitHub <redacted>
Mon, 1 Dec 2025 20:21:13 +0000 (21:21 +0100)
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index 584efbf3c84049b52f2822cdee9fe65837494f62..c46ee3707fa54e9352ffebf1b8fe80a3424d6e8b 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -626,6 +626,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
      switch (arch) {
          case LLM_ARCH_LLAMA:
              {
+                ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
+
                  if (hparams.n_expert == 8) {
                      switch (hparams.n_layer) {
                          case 32: type = LLM_TYPE_8x7B; break;