builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
}
+static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
+ builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>");
+
+ // TODO: Tool calling
+
+ builder.add_content(builder.consume_rest());
+}
+
static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
builder.try_parse_reasoning("<think>", "</think>");
builder.add_content(builder.consume_rest());
case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
common_chat_parse_xiaomi_mimo(builder);
break;
+ case COMMON_CHAT_FORMAT_SOLAR_OPEN:
+ common_chat_parse_solar_open(builder);
+ break;
default:
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
}
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
+ case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
return data;
}
+static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) {
+ common_chat_params data;
+
+ // TODO: Reasoning effort
+ json additional_context = {};
+
+ data.prompt = apply(tmpl, inputs, std::nullopt, std::nullopt, additional_context);
+ data.format = COMMON_CHAT_FORMAT_SOLAR_OPEN;
+
+ data.preserved_tokens = {
+ "<|think|>",
+ "<|content|>",
+ "<|begin|>",
+ "<|end|>",
+ };
+
+ // TODO: Tool calling
+
+ return data;
+}
+
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
data.prompt = apply(tmpl, inputs);
return common_chat_params_init_magistral(tmpl, params);
}
+ // Solar Open
+ if (src.find("<|tool_response:begin|>") != std::string::npos &&
+ src.find("<|tool_response:name|>") != std::string::npos &&
+ src.find("<|tool_response:result|>") != std::string::npos) {
+ return common_chat_params_init_solar_open(tmpl, params);
+ }
+
// Plain handler (no tools)
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
return common_chat_params_init_without_tools(tmpl, params);
COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
COMMON_CHAT_FORMAT_APRIEL_1_5,
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
+ COMMON_CHAT_FORMAT_SOLAR_OPEN,
// These are intended to be parsed by the PEG parser
COMMON_CHAT_FORMAT_PEG_SIMPLE,
if chkhsh == "4a2e2abae11ca2b86d570fc5b44be4d5eb5e72cc8f22dd136a94b37da83ab665":
# ref: https://huggingface.co/KORMo-Team/KORMo-tokenizer
res = "kormo"
+ if chkhsh == "16389f0a1f51ee53e562ffd51c371dc508639ab0e4261502071836e50e223e91":
+ # ref: https://huggingface.co/upstage/Solar-Open-100B
+ res = "solar-open"
if res is None:
logger.warning("\n")
return []
+@ModelBase.register("SolarOpenForCausalLM")
+class SolarOpenModel(Glm4MoeModel):
+ model_arch = gguf.MODEL_ARCH.GLM4_MOE
+
+ def set_vocab(self):
+ from transformers import AutoTokenizer
+ tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
+ special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
+ tokens, toktypes, tokpre = self.get_vocab_base()
+ self.gguf_writer.add_tokenizer_model("gpt2")
+ self.gguf_writer.add_tokenizer_pre(tokpre)
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_types(toktypes)
+ special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])
+ special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|endoftext|>"])
+ special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<unk>"])
+ special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["<|startoftext|>"])
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+
###### CONVERSION LOGIC ######
{"name": "granite-docling", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
{"name": "minimax-m2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/MiniMaxAI/MiniMax-M2", },
{"name": "kormo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/KORMo-Team/KORMo-tokenizer", },
+ {"name": "solar-open", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", },
]
# some models are known to be broken upstream, so we will skip them as exceptions
{ "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS },
{ "grok-2", LLM_CHAT_TEMPLATE_GROK_2 },
{ "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED },
+ { "solar-open", LLM_CHAT_TEMPLATE_SOLAR_OPEN },
};
llm_chat_template llm_chat_template_from_str(const std::string & name) {
return LLM_CHAT_TEMPLATE_GROK_2;
} else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) {
return LLM_CHAT_TEMPLATE_PANGU_EMBED;
+ } else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) {
+ return LLM_CHAT_TEMPLATE_SOLAR_OPEN;
}
return LLM_CHAT_TEMPLATE_UNKNOWN;
}
if (add_ass) {
ss << "[unused9]助手:";
}
+ } else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) {
+ for (auto message : chat) {
+ std::string role(message->role);
+ ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>";
+ }
+ if (add_ass) {
+ ss << "<|begin|>assistant";
+ }
} else {
// template not supported
return -1;
LLM_CHAT_TEMPLATE_SEED_OSS,
LLM_CHAT_TEMPLATE_GROK_2,
LLM_CHAT_TEMPLATE_PANGU_EMBED,
+ LLM_CHAT_TEMPLATE_SOLAR_OPEN,
LLM_CHAT_TEMPLATE_UNKNOWN,
};
case LLM_TYPE_31B_A3_5B: return "31B.A3.5B";
case LLM_TYPE_80B_A3B: return "80B.A3B";
case LLM_TYPE_100B_A6B: return "100B.A6B";
+ case LLM_TYPE_102B_A12B: return "102B.A12B";
case LLM_TYPE_106B_A12B: return "106B.A12B";
case LLM_TYPE_230B_A10B: return "230B.A10B";
case LLM_TYPE_235B_A22B: return "235B.A22B";
switch (hparams.n_layer) {
case 47: type = LLM_TYPE_106B_A12B; break; // GLM-4.5-Air (46 layers + 1 NextN layer)
+ case 48: type = LLM_TYPE_102B_A12B; break; // Solar Open
case 93: type = LLM_TYPE_355B_A32B; break; // GLM-4.5 (92 layers + 1 NextN layer)
default: type = LLM_TYPE_UNKNOWN;
}
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), { n_embd, n_embd_head_k * n_head }, flags);
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), { n_embd, n_embd_k_gqa }, flags);
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), { n_embd, n_embd_v_gqa }, flags);
- layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, flags);
- layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, flags);
- layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, flags);
+ layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, TENSOR_NOT_REQUIRED | flags);
+ layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, TENSOR_NOT_REQUIRED | flags);
+ layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, TENSOR_NOT_REQUIRED | flags);
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, flags);
LLM_TYPE_31B_A3_5B,
LLM_TYPE_80B_A3B, // Qwen3 Next
LLM_TYPE_100B_A6B,
+ LLM_TYPE_102B_A12B, // Solar-Open
LLM_TYPE_106B_A12B, // GLM-4.5-Air
LLM_TYPE_230B_A10B, // Minimax M2
LLM_TYPE_235B_A22B,
case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
case LLAMA_VOCAB_PRE_TYPE_QWEN2:
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
+ case LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN:
regex_exprs = {
// original regex from tokenizer.json
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
tokenizer_pre == "minimax-m2") {
pre_type = LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2;
clean_spaces = false;
+ } else if (
+ tokenizer_pre == "solar-open") {
+ pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN;
+ clean_spaces = false;
} else {
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
}
|| t.first == "<|end|>"
|| t.first == "<|return|>" // o200k_harmony
|| t.first == "<|call|>" // o200k_harmony
+ || t.first == "<|flush|>" // solar-open
+ || t.first == "<|calls|>" // solar-open
|| t.first == "<end_of_turn>"
|| t.first == "<|endoftext|>"
|| t.first == "<|eom_id|>"
LLAMA_LOG_WARN("%s: special_eom_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
}
- // TODO: workaround for o200k_harmony tokenizer: the "<|end|>" token should not be EOG
- // we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens,
+ // TODO: workaround for o200k_harmony and solar-open tokenizer: the "<|end|>" token should not be EOG
+ // we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens ("<|calls|>" and "<|flush|>" for solar-open),
// we remove the "<|end|>" token from the EOG list
{
bool has_return = false;
bool has_call = false;
bool has_end = false;
+ bool has_flush = false;
llama_token end_id = LLAMA_TOKEN_NULL;
if (id_to_token[tid].text == "<|return|>") {
has_return = true;
- } else if (id_to_token[tid].text == "<|call|>") {
+ } else if (id_to_token[tid].text == "<|call|>" || id_to_token[tid].text == "<|calls|>") {
has_call = true;
+ } else if (id_to_token[tid].text == "<|flush|>") {
+ has_flush = true;
} else if (id_to_token[tid].text == "<|end|>") {
has_end = true;
end_id = tid;
}
}
- if (has_return && has_call && has_end) {
+ if ((has_return && has_call && has_end) || (has_call && has_flush && has_end)) {
special_eog_ids.erase(end_id);
id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
- LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
+ LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>', or '<|calls|>' and '<|flush|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
}
}
}
LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING = 40,
LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2 = 41,
LLAMA_VOCAB_PRE_TYPE_AFMOE = 42,
+ LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN = 43,
};
struct LLM_KV;