llama-model : fix the reported size class for nomic-embed-text-v2-moe (#13223)

author Jared Van Bortel <redacted>

Thu, 1 May 2025 07:09:41 +0000 (03:09 -0400)

committer GitHub <redacted>

Thu, 1 May 2025 07:09:41 +0000 (10:09 +0300)
author Jared Van Bortel <redacted>
Thu, 1 May 2025 07:09:41 +0000 (03:09 -0400)
committer GitHub <redacted>
Thu, 1 May 2025 07:09:41 +0000 (10:09 +0300)
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index 822e2bb2cf0181e303523f47f2a0dcc7b846a241..51092a128c5c6e9f216b5ccd3c1d9d555905c671 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -40,6 +40,7 @@ const char * llm_type_name(llm_type type) {
          case LLM_TYPE_335M:          return "335M";
          case LLM_TYPE_410M:          return "410M";
          case LLM_TYPE_450M:          return "450M";
+        case LLM_TYPE_475M:          return "475M";
          case LLM_TYPE_770M:          return "770M";
          case LLM_TYPE_780M:          return "780M";
          case LLM_TYPE_0_5B:          return "0.5B";
@@ -707,7 +708,11 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                  ml.get_key(LLM_KV_MOE_EVERY_N_LAYERS,         hparams.moe_every_n_layers, 0);
  
                  if (hparams.n_layer == 12 && hparams.n_embd == 768) {
-                    type = LLM_TYPE_137M;
+                    if (arch == LLM_ARCH_NOMIC_BERT) {
+                        type = LLM_TYPE_137M;
+                    } else if (arch == LLM_ARCH_NOMIC_BERT_MOE && hparams.moe_every_n_layers == 2) {
+                        type = LLM_TYPE_475M;
+                    }
                  }
              } break;
          case LLM_ARCH_BLOOM:
diff --git a/src/llama-model.h b/src/llama-model.h

index 95eca00266a4b17a2aef612f8e3f4d954ce5f08f..34aac337cff2769c085e5a9157a7cf58b76e7b83 100644 (file)
--- a/src/llama-model.h
+++ b/src/llama-model.h
@@ -36,6 +36,7 @@ enum llm_type {
      LLM_TYPE_335M,
      LLM_TYPE_410M,
      LLM_TYPE_450M,
+    LLM_TYPE_475M,
      LLM_TYPE_770M,
      LLM_TYPE_780M,
      LLM_TYPE_0_5B,
author	Jared Van Bortel <redacted>
	Thu, 1 May 2025 07:09:41 +0000 (03:09 -0400)
committer	GitHub <redacted>
	Thu, 1 May 2025 07:09:41 +0000 (10:09 +0300)
src/llama-model.cpp		patch \| blob \| history
src/llama-model.h		patch \| blob \| history