model: add Solar Open model (#18511)

author HelloKS <redacted>

Thu, 1 Jan 2026 17:01:43 +0000 (02:01 +0900)

committer GitHub <redacted>

Thu, 1 Jan 2026 17:01:43 +0000 (18:01 +0100)
author HelloKS <redacted>
Thu, 1 Jan 2026 17:01:43 +0000 (02:01 +0900)
committer GitHub <redacted>
Thu, 1 Jan 2026 17:01:43 +0000 (18:01 +0100)
diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp

index d740dac0651fadffb456ad97448cad238f609750..23e23ca8c7cfe41228fb24e96f6252699f1689f7 100644 (file)
--- a/common/chat-parser.cpp
+++ b/common/chat-parser.cpp
@@ -1395,6 +1395,14 @@ static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
      builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
  }
  
+static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
+    builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>");
+
+    // TODO: Tool calling
+
+    builder.add_content(builder.consume_rest());
+}
+
  static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
      builder.try_parse_reasoning("<think>", "</think>");
      builder.add_content(builder.consume_rest());
@@ -1479,6 +1487,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
          case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
              common_chat_parse_xiaomi_mimo(builder);
              break;
+        case COMMON_CHAT_FORMAT_SOLAR_OPEN:
+            common_chat_parse_solar_open(builder);
+            break;
          default:
              throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
      }
diff --git a/common/chat.cpp b/common/chat.cpp

index 7e940695bdab4302189a50853ea726ff05da2bce..b98ab21ce1cf21e37e1c09245e1c64bd518c7c45 100644 (file)
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -669,6 +669,7 @@ const char * common_chat_format_name(common_chat_format format) {
          case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
          case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
          case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
+        case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
          case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
          case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
          case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
@@ -2517,6 +2518,27 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
      return data;
  }
  
+static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) {
+    common_chat_params data;
+
+    // TODO: Reasoning effort
+    json additional_context = {};
+
+    data.prompt = apply(tmpl, inputs, std::nullopt, std::nullopt, additional_context);
+    data.format = COMMON_CHAT_FORMAT_SOLAR_OPEN;
+
+    data.preserved_tokens = {
+        "<|think|>",
+        "<|content|>",
+        "<|begin|>",
+        "<|end|>",
+    };
+
+    // TODO: Tool calling
+
+    return data;
+}
+
  static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
      common_chat_params data;
      data.prompt = apply(tmpl, inputs);
@@ -2780,6 +2802,13 @@ static common_chat_params common_chat_templates_apply_jinja(
          return common_chat_params_init_magistral(tmpl, params);
      }
  
+    // Solar Open
+    if (src.find("<|tool_response:begin|>") != std::string::npos &&
+        src.find("<|tool_response:name|>") != std::string::npos &&
+        src.find("<|tool_response:result|>") != std::string::npos) {
+        return common_chat_params_init_solar_open(tmpl, params);
+    }
+
      // Plain handler (no tools)
      if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
          return common_chat_params_init_without_tools(tmpl, params);
diff --git a/common/chat.h b/common/chat.h

index 6085510a402a2079b37984158eb63deb46e59118..8bd4a325ff85477e9843905af5c307dc3432d968 100644 (file)
--- a/common/chat.h
+++ b/common/chat.h
@@ -124,6 +124,7 @@ enum common_chat_format {
      COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
      COMMON_CHAT_FORMAT_APRIEL_1_5,
      COMMON_CHAT_FORMAT_XIAOMI_MIMO,
+    COMMON_CHAT_FORMAT_SOLAR_OPEN,
  
      // These are intended to be parsed by the PEG parser
      COMMON_CHAT_FORMAT_PEG_SIMPLE,
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index edc0ed539d553ad6cc0221b4600fa20dee75c6af..a1080b15f03efef9dc9434b30c46e87b1c222fa1 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1230,6 +1230,9 @@ class TextModel(ModelBase):
          if chkhsh == "4a2e2abae11ca2b86d570fc5b44be4d5eb5e72cc8f22dd136a94b37da83ab665":
              # ref: https://huggingface.co/KORMo-Team/KORMo-tokenizer
              res = "kormo"
+        if chkhsh == "16389f0a1f51ee53e562ffd51c371dc508639ab0e4261502071836e50e223e91":
+            # ref: https://huggingface.co/upstage/Solar-Open-100B
+            res = "solar-open"
  
          if res is None:
              logger.warning("\n")
@@ -10617,6 +10620,26 @@ class JanusProVisionModel(MmprojModel):
          return []
  
  
+@ModelBase.register("SolarOpenForCausalLM")
+class SolarOpenModel(Glm4MoeModel):
+    model_arch = gguf.MODEL_ARCH.GLM4_MOE
+
+    def set_vocab(self):
+        from transformers import AutoTokenizer
+        tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
+        special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
+        tokens, toktypes, tokpre = self.get_vocab_base()
+        self.gguf_writer.add_tokenizer_model("gpt2")
+        self.gguf_writer.add_tokenizer_pre(tokpre)
+        self.gguf_writer.add_token_list(tokens)
+        self.gguf_writer.add_token_types(toktypes)
+        special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])
+        special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|endoftext|>"])
+        special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<unk>"])
+        special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["<|startoftext|>"])
+        special_vocab.add_to_gguf(self.gguf_writer)
+
+
  ###### CONVERSION LOGIC ######
  
  
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py

index 4378378309f36776b1eb9c3456ae3d1fb588bdc6..b1ae4105ed4c15d693b1bb89c421c8927b80cab7 100755 (executable)
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -145,6 +145,7 @@ models = [
      {"name": "granite-docling",  "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
      {"name": "minimax-m2",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/MiniMaxAI/MiniMax-M2", },
      {"name": "kormo",            "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/KORMo-Team/KORMo-tokenizer", },
+    {"name": "solar-open",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", },
  ]
  
  # some models are known to be broken upstream, so we will skip them as exceptions
diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp

index fc6a6223cfe2f86b8d4bf7ff2b596e2fb6c945fb..b54ebbd155dbab1c510ce45300ab3d534fa48a86 100644 (file)
--- a/src/llama-chat.cpp
+++ b/src/llama-chat.cpp
@@ -74,6 +74,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
      { "seed_oss",          LLM_CHAT_TEMPLATE_SEED_OSS          },
      { "grok-2",            LLM_CHAT_TEMPLATE_GROK_2            },
      { "pangu-embedded",    LLM_CHAT_TEMPLATE_PANGU_EMBED       },
+    { "solar-open",        LLM_CHAT_TEMPLATE_SOLAR_OPEN        },
  };
  
  llm_chat_template llm_chat_template_from_str(const std::string & name) {
@@ -216,6 +217,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
          return LLM_CHAT_TEMPLATE_GROK_2;
      } else if (tmpl_contains(LU8("[unused9]系统：[unused10]"))) {
          return LLM_CHAT_TEMPLATE_PANGU_EMBED;
+    } else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) {
+        return LLM_CHAT_TEMPLATE_SOLAR_OPEN;
      }
      return LLM_CHAT_TEMPLATE_UNKNOWN;
  }
@@ -845,6 +848,14 @@ int32_t llm_chat_apply_template(
          if (add_ass) {
              ss << "[unused9]助手：";
          }
+    } else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) {
+        for (auto message : chat) {
+            std::string role(message->role);
+            ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>";
+        }
+        if (add_ass) {
+            ss << "<|begin|>assistant";
+        }
      } else {
          // template not supported
          return -1;
diff --git a/src/llama-chat.h b/src/llama-chat.h

index 684efb4d67f45b84f3cf6d96ac5909e22af29395..e1f795249c88625dbee3aed5954a3cb98370ae3d 100644 (file)
--- a/src/llama-chat.h
+++ b/src/llama-chat.h
@@ -54,6 +54,7 @@ enum llm_chat_template {
      LLM_CHAT_TEMPLATE_SEED_OSS,
      LLM_CHAT_TEMPLATE_GROK_2,
      LLM_CHAT_TEMPLATE_PANGU_EMBED,
+    LLM_CHAT_TEMPLATE_SOLAR_OPEN,
      LLM_CHAT_TEMPLATE_UNKNOWN,
  };
  
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index 5e664c8c574a045c5177ae558c9a9b3629d8a21c..dfb5c0ce82b10ec2bdc258ef127cc36b44657146 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -126,6 +126,7 @@ const char * llm_type_name(llm_type type) {
          case LLM_TYPE_31B_A3_5B:     return "31B.A3.5B";
          case LLM_TYPE_80B_A3B:       return "80B.A3B";
          case LLM_TYPE_100B_A6B:      return "100B.A6B";
+        case LLM_TYPE_102B_A12B:     return "102B.A12B";
          case LLM_TYPE_106B_A12B:     return "106B.A12B";
          case LLM_TYPE_230B_A10B:     return "230B.A10B";
          case LLM_TYPE_235B_A22B:     return "235B.A22B";
@@ -1778,6 +1779,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
  
                  switch (hparams.n_layer) {
                      case 47: type = LLM_TYPE_106B_A12B; break; // GLM-4.5-Air (46 layers + 1 NextN layer)
+                    case 48: type = LLM_TYPE_102B_A12B; break; // Solar Open
                      case 93: type = LLM_TYPE_355B_A32B; break; // GLM-4.5 (92 layers + 1 NextN layer)
                      default: type = LLM_TYPE_UNKNOWN;
                  }
@@ -5206,9 +5208,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                          layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), { n_embd, n_embd_head_k * n_head }, flags);
                          layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), { n_embd, n_embd_k_gqa }, flags);
                          layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), { n_embd, n_embd_v_gqa }, flags);
-                        layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, flags);
-                        layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, flags);
-                        layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, flags);
+                        layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, TENSOR_NOT_REQUIRED | flags);
+                        layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, TENSOR_NOT_REQUIRED | flags);
+                        layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, TENSOR_NOT_REQUIRED | flags);
  
                          layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, flags);
  
diff --git a/src/llama-model.h b/src/llama-model.h

index f4f44a92b63ad9787e25e431b12b2c3034a1f1ff..79200a0d97a84cadf82657335561ce9861028e73 100644 (file)
--- a/src/llama-model.h
+++ b/src/llama-model.h
@@ -119,6 +119,7 @@ enum llm_type {
      LLM_TYPE_31B_A3_5B,
      LLM_TYPE_80B_A3B, // Qwen3 Next
      LLM_TYPE_100B_A6B,
+    LLM_TYPE_102B_A12B, // Solar-Open
      LLM_TYPE_106B_A12B, // GLM-4.5-Air
      LLM_TYPE_230B_A10B, // Minimax M2
      LLM_TYPE_235B_A22B,
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp

index cd4092ca0772ad0af6ca01ece0a6f31eddf44a82..c57055082b0c556607715750c0b3ab0197b3518a 100644 (file)
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -355,6 +355,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
              case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
              case LLAMA_VOCAB_PRE_TYPE_QWEN2:
              case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
+            case LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN:
                  regex_exprs = {
                      // original regex from tokenizer.json
                      // "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
@@ -2015,6 +2016,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                  tokenizer_pre == "minimax-m2") {
                  pre_type = LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2;
                  clean_spaces = false;
+            } else if (
+                tokenizer_pre == "solar-open") {
+                pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN;
+                clean_spaces = false;
              } else {
                  throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
              }
@@ -2358,6 +2363,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                      || t.first == "<|end|>"
                      || t.first == "<|return|>" // o200k_harmony
                      || t.first == "<|call|>"   // o200k_harmony
+                    || t.first == "<|flush|>"  // solar-open
+                    || t.first == "<|calls|>"  // solar-open
                      || t.first == "<end_of_turn>"
                      || t.first == "<|endoftext|>"
                      || t.first == "<|eom_id|>"
@@ -2404,13 +2411,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
              LLAMA_LOG_WARN("%s: special_eom_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
          }
  
-        // TODO: workaround for o200k_harmony tokenizer: the "<|end|>" token should not be EOG
-        //       we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens,
+        // TODO: workaround for o200k_harmony and solar-open tokenizer: the "<|end|>" token should not be EOG
+        //       we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens ("<|calls|>" and "<|flush|>" for solar-open),
          //       we remove the "<|end|>" token from the EOG list
          {
              bool has_return = false;
              bool has_call   = false;
              bool has_end    = false;
+            bool has_flush  = false;
  
              llama_token end_id = LLAMA_TOKEN_NULL;
  
@@ -2420,18 +2428,20 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
  
                  if (id_to_token[tid].text == "<|return|>") {
                      has_return = true;
-                } else if (id_to_token[tid].text == "<|call|>") {
+                } else if (id_to_token[tid].text == "<|call|>" || id_to_token[tid].text == "<|calls|>") {
                      has_call = true;
+                } else if (id_to_token[tid].text == "<|flush|>") {
+                    has_flush = true;
                  } else if (id_to_token[tid].text == "<|end|>") {
                      has_end = true;
                      end_id = tid;
                  }
              }
  
-            if (has_return && has_call && has_end) {
+            if ((has_return && has_call && has_end) || (has_call && has_flush && has_end)) {
                  special_eog_ids.erase(end_id);
                  id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
-                LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
+                LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>', or '<|calls|>' and '<|flush|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
              }
          }
      }
diff --git a/src/llama-vocab.h b/src/llama-vocab.h

index 55f8f3923c95bcecd3033c94dcee1584c3e5060e..f5bdd2231183980a83ae1c67d4b10ae761aad78f 100644 (file)
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -51,6 +51,7 @@ enum llama_vocab_pre_type {
      LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING = 40,
      LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2      = 41,
      LLAMA_VOCAB_PRE_TYPE_AFMOE           = 42,
+    LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN      = 43,
  };
  
  struct LLM_KV;
author	HelloKS <redacted>
	Thu, 1 Jan 2026 17:01:43 +0000 (02:01 +0900)
committer	GitHub <redacted>
	Thu, 1 Jan 2026 17:01:43 +0000 (18:01 +0100)
common/chat-parser.cpp		patch \| blob \| history
common/chat.cpp		patch \| blob \| history
common/chat.h		patch \| blob \| history
convert_hf_to_gguf.py		patch \| blob \| history
convert_hf_to_gguf_update.py		patch \| blob \| history
src/llama-chat.cpp		patch \| blob \| history
src/llama-chat.h		patch \| blob \| history
src/llama-model.cpp		patch \| blob \| history
src/llama-model.h		patch \| blob \| history
src/llama-vocab.cpp		patch \| blob \| history
src/llama-vocab.h		patch \| blob \| history