From: Sigbjørn Skjæret Date: Wed, 2 Apr 2025 09:21:48 +0000 (+0200) Subject: vocab : BailingMoE : change possessive quantifiers to greedy (#12677) X-Git-Tag: upstream/0.0.5028~2 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=83a88bd6affbe148a622ac730952ac5b8b585979;p=pkg%2Fggml%2Fsources%2Fllama.cpp vocab : BailingMoE : change possessive quantifiers to greedy (#12677) --- diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 31e2055f..521a6ec5 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -411,7 +411,8 @@ struct llm_tokenizer_bpe : llm_tokenizer { regex_exprs = { // original regex from tokenizer.json // "'(?i:[sdmt]|ll|ve|re)|[^\\r\\n\\p{L}\\p{N}]?+\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]++[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+" - "'(?:[sSdDmMtT]|[lL][lL]|[vV][eE]|[rR][eE])|[^\\r\\n\\p{L}\\p{N}]?+\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]++[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+", + // FIXME? Changed possessive quantifiers (?+ and ++) to greedy to avoid errors and imatrix hanging (tried atomic grouping but it's not supported?) + "'(?:[sSdDmMtT]|[lL][lL]|[vV][eE]|[rR][eE])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+", }; break; default: