model : update Qwen3.5 model type detection (#20126)

author Eric Zhang <redacted>

Thu, 5 Mar 2026 11:47:14 +0000 (19:47 +0800)

committer GitHub <redacted>

Thu, 5 Mar 2026 11:47:14 +0000 (12:47 +0100)
author Eric Zhang <redacted>
Thu, 5 Mar 2026 11:47:14 +0000 (19:47 +0800)
committer GitHub <redacted>
Thu, 5 Mar 2026 11:47:14 +0000 (12:47 +0100)
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index 60b7cc6946f640ee1482a4782c952852b8aaf4e2..924e5708cdea2dce28a3af552ff1b15abf29e563 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -61,6 +61,7 @@ const char * llm_type_name(llm_type type) {
          case LLM_TYPE_0_3B:          return "0.3B";
          case LLM_TYPE_0_5B:          return "0.5B";
          case LLM_TYPE_0_6B:          return "0.6B";
+        case LLM_TYPE_0_8B:          return "0.8B";
          case LLM_TYPE_1B:            return "1B";
          case LLM_TYPE_1_2B:          return "1.2B";
          case LLM_TYPE_1_3B:          return "1.3B";
@@ -132,12 +133,14 @@ const char * llm_type_name(llm_type type) {
          case LLM_TYPE_100B_A6B:      return "100B.A6B";
          case LLM_TYPE_102B_A12B:     return "102B.A12B";
          case LLM_TYPE_106B_A12B:     return "106B.A12B";
+        case LLM_TYPE_122B_A10B:     return "122B.A10B";
          case LLM_TYPE_196B_A11B:     return "196B.A11B";
          case LLM_TYPE_230B_A10B:     return "230B.A10B";
          case LLM_TYPE_235B_A22B:     return "235B.A22B";
          case LLM_TYPE_300B_A47B:     return "300B.A47B";
          case LLM_TYPE_310B_A15B:     return "310B.A15B";
          case LLM_TYPE_355B_A32B:     return "355B.A32B";
+        case LLM_TYPE_397B_A17B:     return "397B.A17B";
          case LLM_TYPE_744B_A40B:     return "744B.A40B";
          case LLM_TYPE_E2B:           return "E2B";
          case LLM_TYPE_E4B:           return "E4B";
@@ -2528,7 +2531,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                  }
  
                  switch (hparams.n_layer) {
-                    case 24: type = LLM_TYPE_2B; break;
+                    case 24: type = hparams.n_embd == 1024 ? LLM_TYPE_0_8B : LLM_TYPE_2B; break;
+                    case 32: type = hparams.n_embd == 2560 ? LLM_TYPE_4B : LLM_TYPE_9B; break;
+                    case 64: type = LLM_TYPE_27B; break;
                      default: type = LLM_TYPE_UNKNOWN;
                  }
              } break;
@@ -2557,8 +2562,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                  }
  
                  switch (hparams.n_layer) {
-                    case 28: type = LLM_TYPE_35B_A3B; break;
-                    case 48: type = LLM_TYPE_80B_A3B; break;
+                    case 40: type = LLM_TYPE_35B_A3B; break;
+                    case 48: type = LLM_TYPE_122B_A10B; break;
+                    case 60: type = LLM_TYPE_397B_A17B; break;
                      default: type = LLM_TYPE_UNKNOWN;
                  }
              } break;
diff --git a/src/llama-model.h b/src/llama-model.h

index d7c3e7d1c1a32f378d6602044ae919981ff9e39f..5ecb8344a25375e3bdd5cf6f9546af5874af3cce 100644 (file)
--- a/src/llama-model.h
+++ b/src/llama-model.h
@@ -54,6 +54,7 @@ enum llm_type {
      LLM_TYPE_0_3B,
      LLM_TYPE_0_5B,
      LLM_TYPE_0_6B,
+    LLM_TYPE_0_8B,
      LLM_TYPE_1B,
      LLM_TYPE_1_2B,
      LLM_TYPE_1_3B,
@@ -125,12 +126,14 @@ enum llm_type {
      LLM_TYPE_100B_A6B,
      LLM_TYPE_102B_A12B, // Solar-Open
      LLM_TYPE_106B_A12B, // GLM-4.5-Air
+    LLM_TYPE_122B_A10B, // Qwen3.5
      LLM_TYPE_196B_A11B, // Step3.5-Flash
      LLM_TYPE_230B_A10B, // Minimax M2
      LLM_TYPE_235B_A22B,
      LLM_TYPE_300B_A47B, // Ernie MoE big
      LLM_TYPE_310B_A15B, // /MiMo-V2-Flash
      LLM_TYPE_355B_A32B, // GLM-4.5
+    LLM_TYPE_397B_A17B, // Qwen3.5
      LLM_TYPE_744B_A40B, // GLM-5
      LLM_TYPE_E2B,
      LLM_TYPE_E4B,
author	Eric Zhang <redacted>
	Thu, 5 Mar 2026 11:47:14 +0000 (19:47 +0800)
committer	GitHub <redacted>
	Thu, 5 Mar 2026 11:47:14 +0000 (12:47 +0100)
src/llama-model.cpp		patch \| blob \| history
src/llama-model.h		patch \| blob \| history