id special_cls_id = -1;
id special_mask_id = -1;
- int special_add_bos = -1; // -1 unknown, 1 add, 0 don't add.
- int special_add_eos = -1; // -1 unknown, 1 add, 0 don't add.
-
id linefeed_id = 13;
id special_prefix_id = -1;
id special_suffix_id = -1;
id special_middle_id = -1;
id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token
- bool add_space_prefix = true;
+ // tokenizer flags
+ bool tokenizer_add_space_prefix = true;
+ bool tokenizer_add_bos = false;
+ bool tokenizer_add_eos = false;
+ bool tokenizer_ignore_merges = false;
int find_bpe_rank(const std::string & token_left, const std::string & token_right) const {
GGML_ASSERT(token_left.find(' ') == std::string::npos);
const int add_space_prefix_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_ADD_PREFIX).c_str());
if (add_space_prefix_keyidx != -1) {
- vocab.add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx);
+ vocab.tokenizer_add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx);
} // The default value of add_space_prefix is true.
} else if (tokenizer_model == "bert") {
vocab.type = LLAMA_VOCAB_TYPE_WPM;
vocab.special_pad_id = 0;
vocab.special_cls_id = 101;
vocab.special_mask_id = 103;
- vocab.add_space_prefix = false;
+ vocab.tokenizer_add_space_prefix = false;
} else if (tokenizer_model == "gpt2") {
vocab.type = LLAMA_VOCAB_TYPE_BPE;
const int add_space_prefix_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_ADD_PREFIX).c_str());
if (add_space_prefix_keyidx != -1) {
- vocab.add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx);
+ vocab.tokenizer_add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx);
}
// read bpe merges and populate bpe ranks
tokenizer_pre == "llama-v3" ||
tokenizer_pre == "llama-bpe") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
+ vocab.tokenizer_ignore_merges = true;
+ vocab.tokenizer_add_bos = true;
} else if (
tokenizer_pre == "deepseek-llm") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM;
} else {
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
}
+ } else if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+ vocab.tokenizer_add_bos = true;
+ vocab.tokenizer_add_eos = false;
+ } else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) {
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+ vocab.tokenizer_add_bos = true;
+ vocab.tokenizer_add_eos = false;
} else {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
}
bool temp = true;
if (ml.get_key(LLM_KV_TOKENIZER_ADD_BOS, temp, false)) {
- vocab.special_add_bos = int(temp);
+ vocab.tokenizer_add_bos = temp;
}
if (ml.get_key(LLM_KV_TOKENIZER_ADD_EOS, temp, false)) {
- vocab.special_add_eos = int(temp);
+ vocab.tokenizer_add_eos = temp;
}
}
);
// set attributes by model/tokenizer name
- if (_contains_any(tokenizer_pre, {"jina-v2-es", "jina-v2-de"})) {
+ if (_contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})) {
_set_token_attr("<mask>", LLAMA_TOKEN_ATTR_LSTRIP, true);
} else if (_contains_any(model_name, {"phi-3", "phi3"})) {
for (auto id : vocab.cache_special_tokens) {
};
struct llm_tokenizer_bpe {
- llm_tokenizer_bpe(const llama_vocab & vocab): vocab(vocab) {}
-
- void tokenize(const std::string & text, std::vector<llama_vocab::id> & output) {
- int final_prev_index = -1;
- bool ignore_merges = false;
-
- std::vector<std::string> word_collection;
- switch (vocab.type) {
- case LLAMA_VOCAB_TYPE_BPE:
- switch (vocab.type_pre) {
- case LLAMA_VOCAB_PRE_TYPE_LLAMA3:
- ignore_merges = true;
- word_collection = unicode_regex_split(text, {
- // original regex from tokenizer.json
- //"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
-
- // adapted: https://github.com/ggerganov/llama.cpp/pull/6920#issuecomment-2080233989
- "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
- });
- break;
- case LLAMA_VOCAB_PRE_TYPE_DBRX:
- case LLAMA_VOCAB_PRE_TYPE_SMAUG:
- word_collection = unicode_regex_split(text, {
- // same as llama3
- "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
- });
- break;
- case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM:
- word_collection = unicode_regex_split(text, {
- "[\r\n]",
- "\\s?[A-Za-zµÀ-ÖØ-öø-ƺƼ-ƿDŽ-ʓʕ-ʯͰ-ͳͶͷͻ-ͽͿΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-ՖႠ-ჅᎠ-Ᏽᏸ-ᏽᲐ-ᲺᲽ-Ჿᴀ-ᴫᵫ-ᵷᵹ-ᶚḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼℂℇℊ-ℓℕℙ-ℝℤΩℨK-ℭℯ-ℴℹℼ-ℿⅅ-ⅉⅎↃↄⰀ-ⱻⱾ-ⳤⳫ-ⳮⳲⳳꙀ-ꙭꚀ-ꚛꜢ-ꝯꝱ-ꞇꞋ-ꞎꭰ-ꮿff-stﬓ-ﬗA-Za-z𐐀-𐑏𐒰-𐓓𐓘-𐓻𐲀-𐲲𐳀-𐳲𑢠-𑣟𞤀-𞥃]+",
- "\\s?[!-/:-~!-/:-~‘-‟ -。]+",
- "\\s+$",
- "[一-龥ࠀ-一가-]+",
- "\\p{N}+",
- });
- break;
- case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER:
- word_collection = unicode_regex_split(text, {
- "[\r\n]",
- "\\s?\\p{L}+",
- "\\s?\\p{P}+",
- "[一-龥ࠀ-一가-]+",
- "\\p{N}",
- });
- break;
- case LLAMA_VOCAB_PRE_TYPE_FALCON:
- word_collection = unicode_regex_split(text, {
- "[\\p{P}\\$\\+<=>\\^~\\|]+",
- "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
- "[0-9][0-9][0-9]",
- });
- break;
- case LLAMA_VOCAB_PRE_TYPE_MPT:
- // TODO: MPT pre-tokenization regexes are unknown
- // the following are close, but not exact. run the following:
- // ./bin/test-tokenizer-0 ../models/ggml-vocab-mpt.gguf
- GGML_ASSERT("MPT pre-tokenization regexes are unknown - fixes needed");
- word_collection = unicode_regex_split(text, {
- "\\s?\\p{L}+",
- "\\s?\\p{P}+",
- "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
- });
- break;
- case LLAMA_VOCAB_PRE_TYPE_STARCODER:
- case LLAMA_VOCAB_PRE_TYPE_REFACT:
- case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
- word_collection = unicode_regex_split(text, {
- "\\p{N}",
- "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
- });
- break;
- case LLAMA_VOCAB_PRE_TYPE_GPT2:
- case LLAMA_VOCAB_PRE_TYPE_OLMO:
- word_collection = unicode_regex_split(text, {
- "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
- });
- break;
- case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
- case LLAMA_VOCAB_PRE_TYPE_QWEN2:
- word_collection = unicode_regex_split(text, {
- // original regex from tokenizer.json
- // "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
- "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
- });
- break;
- case LLAMA_VOCAB_PRE_TYPE_PORO:
- word_collection = unicode_regex_split(text, {
- " ?[^(\\s|.,!?…。,、।۔،)]+",
- });
- break;
- default:
- // default regex for BPE tokenization pre-processing
- word_collection = unicode_regex_split(text, {
- "[\\p{P}\\$\\+<=>\\^~\\|]+",
- "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
- "\\p{N}+",
- "[0-9][0-9][0-9]",
- });
- break;
- }
+ llm_tokenizer_bpe(const llama_vocab & vocab): vocab(vocab) {
+ GGML_ASSERT(vocab.type == LLAMA_VOCAB_TYPE_BPE);
+ switch (vocab.type_pre) {
+ case LLAMA_VOCAB_PRE_TYPE_LLAMA3:
+ regex_exprs = {
+ // original regex from tokenizer.json
+ //"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+
+ // adapted: https://github.com/ggerganov/llama.cpp/pull/6920#issuecomment-2080233989
+ "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ };
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_DBRX:
+ case LLAMA_VOCAB_PRE_TYPE_SMAUG:
+ regex_exprs = {
+ // same as llama3
+ "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ };
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM:
+ regex_exprs = {
+ "[\r\n]",
+ "\\s?[A-Za-zµÀ-ÖØ-öø-ƺƼ-ƿDŽ-ʓʕ-ʯͰ-ͳͶͷͻ-ͽͿΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-ՖႠ-ჅᎠ-Ᏽᏸ-ᏽᲐ-ᲺᲽ-Ჿᴀ-ᴫᵫ-ᵷᵹ-ᶚḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼℂℇℊ-ℓℕℙ-ℝℤΩℨK-ℭℯ-ℴℹℼ-ℿⅅ-ⅉⅎↃↄⰀ-ⱻⱾ-ⳤⳫ-ⳮⳲⳳꙀ-ꙭꚀ-ꚛꜢ-ꝯꝱ-ꞇꞋ-ꞎꭰ-ꮿff-stﬓ-ﬗA-Za-z𐐀-𐑏𐒰-𐓓𐓘-𐓻𐲀-𐲲𐳀-𐳲𑢠-𑣟𞤀-𞥃]+",
+ "\\s?[!-/:-~!-/:-~‘-‟ -。]+",
+ "\\s+$",
+ "[一-龥ࠀ-一가-]+",
+ "\\p{N}+",
+ };
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER:
+ regex_exprs = {
+ "[\r\n]",
+ "\\s?\\p{L}+",
+ "\\s?\\p{P}+",
+ "[一-龥ࠀ-一가-]+",
+ "\\p{N}",
+ };
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_FALCON:
+ regex_exprs = {
+ "[\\p{P}\\$\\+<=>\\^~\\|`]+",
+ "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
+ "[0-9][0-9][0-9]",
+ };
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_MPT:
+ // TODO: MPT pre-tokenization regexes are unknown
+ // the following are close, but not exact. run the following:
+ // ./bin/test-tokenizer-0 ../models/ggml-vocab-mpt.gguf
+ GGML_ASSERT("MPT pre-tokenization regexes are unknown - fixes needed");
+ regex_exprs = {
+ "\\s?\\p{L}+",
+ "\\s?\\p{P}+",
+ "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
+ };
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_STARCODER:
+ case LLAMA_VOCAB_PRE_TYPE_REFACT:
+ case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
+ regex_exprs = {
+ "\\p{N}",
+ "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
+ };
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_GPT2:
+ case LLAMA_VOCAB_PRE_TYPE_OLMO:
+ regex_exprs = {
+ "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
+ };
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
+ case LLAMA_VOCAB_PRE_TYPE_QWEN2:
+ regex_exprs = {
+ // original regex from tokenizer.json
+ // "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
+ "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ };
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_PORO:
+ regex_exprs = {
+ " ?[^(\\s|.,!?…。,、।۔،)]+",
+ };
break;
default:
- GGML_ASSERT(false);
+ // default regex for BPE tokenization pre-processing
+ regex_exprs = {
+ "[\\p{P}\\$\\+<=>\\^~\\|]+",
+ "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
+ "\\p{N}+",
+ "[0-9][0-9][0-9]",
+ };
break;
}
+ }
+
+ void append(const llama_vocab::id token_id, std::vector<llama_vocab::id> & output) const {
+ output.push_back(token_id);
+ }
+
+ bool append_bos(std::vector<llama_vocab::id> & output) const {
+ if (vocab.tokenizer_add_bos) {
+ GGML_ASSERT(vocab.special_bos_id != -1);
+ output.push_back(vocab.special_bos_id);
+ return true;
+ }
+ return false;
+ }
+
+ bool append_eos(std::vector<llama_vocab::id> & output) const {
+ if (vocab.tokenizer_add_eos) {
+ GGML_ASSERT(vocab.special_eos_id != -1);
+ output.push_back(vocab.special_eos_id);
+ return true;
+ }
+ return false;
+ }
+
+ void check_double_bos_eos(const std::vector<llama_vocab::id> & output) const {
+ if (vocab.tokenizer_add_bos && output.size() >= 2 && output[1] == vocab.special_bos_id) {
+ LLAMA_LOG_WARN(
+ "%s: Added a BOS token to the prompt as specified by the model but the prompt "
+ "also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. "
+ "Are you sure this is what you want?\n", __FUNCTION__);
+ }
+ if (vocab.tokenizer_add_eos && output.size() >= 2 && *(output.end()-2) == vocab.special_eos_id) {
+ LLAMA_LOG_WARN(
+ "%s: Added a EOS token to the prompt as specified by the model but the prompt "
+ "also ends with a EOS token. So now the final prompt ends with 2 EOS tokens. "
+ "Are you sure this is what you want?\n", __FUNCTION__);
+ }
+ }
+
+ void tokenize(const std::string & text, std::vector<llama_vocab::id> & output) {
+ int final_prev_index = -1;
+
+ const auto word_collection = unicode_regex_split(text, regex_exprs);
symbols_final.clear();
int index = 0;
size_t offset = 0;
- if (ignore_merges && vocab.token_to_id.find(word) != vocab.token_to_id.end()) {
+ if (vocab.tokenizer_ignore_merges && vocab.token_to_id.find(word) != vocab.token_to_id.end()) {
symbols.emplace_back(llm_symbol{-1, -1, word.c_str(), word.size()});
offset = word.size();
}
for (auto j = str.begin(); j != str.end(); ++j) {
std::string byte_str(1, *j);
auto token_multibyte = vocab.token_to_id.find(byte_str);
- if (token_multibyte == vocab.token_to_id.end()) {
- throw std::runtime_error("ERROR: byte not found in vocab");
+ if (token_multibyte != vocab.token_to_id.end()) {
+ output.push_back(token_multibyte->second);
}
- output.push_back((*token_multibyte).second);
}
} else {
output.push_back((*token).second);
const llama_vocab & vocab;
+ std::vector<std::string> regex_exprs;
+
std::vector<llm_symbol> symbols;
std::vector<llm_symbol> symbols_final;
bool is_prev_special = false;
- if (add_special && vocab.special_add_bos != 0) {
+ if (add_special && vocab.tokenizer_add_bos) {
GGML_ASSERT(vocab.special_bos_id != -1);
output.push_back(vocab.special_bos_id);
is_prev_special = true;
if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) {
auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length);
- if (vocab.add_space_prefix) {
+ if (vocab.tokenizer_add_space_prefix) {
if (!output.size() || is_prev_special) { // prefix with space if first token
raw_text = " " + raw_text;
}
}
}
- if (add_special && vocab.special_add_bos != 0 && output.size() >= 2 && output[1] == vocab.special_bos_id) {
+ if (add_special && vocab.tokenizer_add_bos && output.size() >= 2 && output[1] == vocab.special_bos_id) {
LLAMA_LOG_WARN(
"%s: Added a BOS token to the prompt as specified by the model but the prompt "
"also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. "
"Are you sure this is what you want?\n", __FUNCTION__);
}
- if (add_special && vocab.special_add_eos == 1) {
+ if (add_special && vocab.tokenizer_add_eos) {
GGML_ASSERT(vocab.special_eos_id != -1);
output.push_back(vocab.special_eos_id);
}
} break;
case LLAMA_VOCAB_TYPE_BPE:
{
- if (add_special && vocab.special_add_bos != 0) {
- GGML_ASSERT(vocab.special_bos_id != -1);
- output.push_back(vocab.special_bos_id);
+ llm_tokenizer_bpe tokenizer(vocab);
+
+ if (add_special) {
+ tokenizer.append_bos(output);
}
for (const auto & fragment : fragment_buffer) {
#ifdef PRETOKENIZERDEBUG
LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
#endif
- llm_tokenizer_bpe tokenizer(vocab);
tokenizer.tokenize(raw_text, output);
} else { // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN)
- output.push_back(fragment.token);
+ tokenizer.append(fragment.token, output);
}
}
- if (add_special && vocab.special_add_bos != 0 && output.size() >= 2 && output[1] == vocab.special_bos_id) {
- LLAMA_LOG_WARN(
- "%s: Added a BOS token to the prompt as specified by the model but the prompt "
- "also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. "
- "Are you sure this is what you want?\n", __FUNCTION__);
- }
-
- if (add_special && vocab.special_add_eos == 1) {
- GGML_ASSERT(vocab.special_add_eos != -1);
- output.push_back(vocab.special_eos_id);
+ if (add_special) {
+ tokenizer.append_eos(output);
+ tokenizer.check_double_bos_eos(output);
}
} break;
case LLAMA_VOCAB_TYPE_WPM:
}
int32_t llama_add_bos_token(const struct llama_model * model) {
- return model->vocab.special_add_bos;
+ return model->vocab.tokenizer_add_bos;
}
int32_t llama_add_eos_token(const struct llama_model * model) {
- return model->vocab.special_add_eos;
+ return model->vocab.tokenizer_add_eos;
}
llama_token llama_token_prefix(const struct llama_model * model) {
-import regex
-import ctypes
+import array
import unicodedata
-
-
-class CoodepointFlags (ctypes.Structure):
- _fields_ = [ # see definition in unicode.h
- ("is_undefined", ctypes.c_uint16, 1),
- ("is_number", ctypes.c_uint16, 1), # regex: \p{N}
- ("is_letter", ctypes.c_uint16, 1), # regex: \p{L}
- ("is_separator", ctypes.c_uint16, 1), # regex: \p{Z}
- ("is_accent_mark", ctypes.c_uint16, 1), # regex: \p{M}
- ("is_punctuation", ctypes.c_uint16, 1), # regex: \p{P}
- ("is_symbol", ctypes.c_uint16, 1), # regex: \p{S}
- ("is_control", ctypes.c_uint16, 1), # regex: \p{C}
- ]
-
-
-assert (ctypes.sizeof(CoodepointFlags) == 2)
+import requests
MAX_CODEPOINTS = 0x110000
-regex_number = regex.compile(r'\p{N}')
-regex_letter = regex.compile(r'\p{L}')
-regex_separator = regex.compile(r'\p{Z}')
-regex_accent_mark = regex.compile(r'\p{M}')
-regex_punctuation = regex.compile(r'\p{P}')
-regex_symbol = regex.compile(r'\p{S}')
-regex_control = regex.compile(r'\p{C}')
-regex_whitespace = regex.compile(r'\s')
-
-codepoint_flags = (CoodepointFlags * MAX_CODEPOINTS)()
+UNICODE_DATA_URL = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"
+
+
+# see https://www.unicode.org/L2/L1999/UnicodeData.html
+def unicode_data_iter():
+ res = requests.get(UNICODE_DATA_URL)
+ res.raise_for_status()
+ data = res.content.decode()
+
+ prev = []
+
+ for line in data.splitlines():
+ # ej: 0000;<control>;Cc;0;BN;;;;;N;NULL;;;;
+ line = line.split(";")
+
+ cpt = int(line[0], base=16)
+ assert cpt < MAX_CODEPOINTS
+
+ cpt_lower = int(line[-2] or "0", base=16)
+ assert cpt_lower < MAX_CODEPOINTS
+
+ cpt_upper = int(line[-3] or "0", base=16)
+ assert cpt_upper < MAX_CODEPOINTS
+
+ categ = line[2].strip()
+ assert len(categ) == 2
+
+ bidir = line[4].strip()
+ assert len(categ) == 2
+
+ name = line[1]
+ if name.endswith(", First>"):
+ prev = (cpt, cpt_lower, cpt_upper, categ, bidir)
+ continue
+ if name.endswith(", Last>"):
+ assert prev[1:] == (0, 0, categ, bidir)
+ for c in range(prev[0], cpt):
+ yield (c, cpt_lower, cpt_upper, categ, bidir)
+
+ yield (cpt, cpt_lower, cpt_upper, categ, bidir)
+
+
+# see definition in unicode.h
+CODEPOINT_FLAG_UNDEFINED = 0x0001 #
+CODEPOINT_FLAG_NUMBER = 0x0002 # \p{N}
+CODEPOINT_FLAG_LETTER = 0x0004 # \p{L}
+CODEPOINT_FLAG_SEPARATOR = 0x0008 # \p{Z}
+CODEPOINT_FLAG_MARK = 0x0010 # \p{M}
+CODEPOINT_FLAG_PUNCTUATION = 0x0020 # \p{P}
+CODEPOINT_FLAG_SYMBOL = 0x0040 # \p{S}
+CODEPOINT_FLAG_CONTROL = 0x0080 # \p{C}
+
+UNICODE_CATEGORY_TO_FLAG = {
+ "Cn": CODEPOINT_FLAG_UNDEFINED, # Undefined
+ "Cc": CODEPOINT_FLAG_CONTROL, # Control
+ "Cf": CODEPOINT_FLAG_CONTROL, # Format
+ "Co": CODEPOINT_FLAG_CONTROL, # Private Use
+ "Cs": CODEPOINT_FLAG_CONTROL, # Surrrogate
+ "Ll": CODEPOINT_FLAG_LETTER, # Lowercase Letter
+ "Lm": CODEPOINT_FLAG_LETTER, # Modifier Letter
+ "Lo": CODEPOINT_FLAG_LETTER, # Other Letter
+ "Lt": CODEPOINT_FLAG_LETTER, # Titlecase Letter
+ "Lu": CODEPOINT_FLAG_LETTER, # Uppercase Letter
+ "L&": CODEPOINT_FLAG_LETTER, # Cased Letter
+ "Mc": CODEPOINT_FLAG_MARK, # Spacing Mark
+ "Me": CODEPOINT_FLAG_MARK, # Enclosing Mark
+ "Mn": CODEPOINT_FLAG_MARK, # Nonspacing Mark
+ "Nd": CODEPOINT_FLAG_NUMBER, # Decimal Number
+ "Nl": CODEPOINT_FLAG_NUMBER, # Letter Number
+ "No": CODEPOINT_FLAG_NUMBER, # Other Number
+ "Pc": CODEPOINT_FLAG_PUNCTUATION, # Connector Punctuation
+ "Pd": CODEPOINT_FLAG_PUNCTUATION, # Dash Punctuation
+ "Pe": CODEPOINT_FLAG_PUNCTUATION, # Close Punctuation
+ "Pf": CODEPOINT_FLAG_PUNCTUATION, # Final Punctuation
+ "Pi": CODEPOINT_FLAG_PUNCTUATION, # Initial Punctuation
+ "Po": CODEPOINT_FLAG_PUNCTUATION, # Other Punctuation
+ "Ps": CODEPOINT_FLAG_PUNCTUATION, # Open Punctuation
+ "Sc": CODEPOINT_FLAG_SYMBOL, # Currency Symbol
+ "Sk": CODEPOINT_FLAG_SYMBOL, # Modifier Symbol
+ "Sm": CODEPOINT_FLAG_SYMBOL, # Math Symbol
+ "So": CODEPOINT_FLAG_SYMBOL, # Other Symbol
+ "Zl": CODEPOINT_FLAG_SEPARATOR, # Line Separator
+ "Zp": CODEPOINT_FLAG_SEPARATOR, # Paragraph Separator
+ "Zs": CODEPOINT_FLAG_SEPARATOR, # Space Separator
+}
+
+
+codepoint_flags = array.array('H', [CODEPOINT_FLAG_UNDEFINED]) * MAX_CODEPOINTS
table_whitespace = []
table_lowercase = []
table_uppercase = []
table_nfd = []
-for codepoint in range(MAX_CODEPOINTS):
+for (cpt, cpt_lower, cpt_upper, categ, bidir) in unicode_data_iter():
# convert codepoint to unicode character
- char = chr(codepoint)
-
- # regex categories
- flags = codepoint_flags[codepoint]
- flags.is_number = bool(regex_number.match(char))
- flags.is_letter = bool(regex_letter.match(char))
- flags.is_separator = bool(regex_separator.match(char))
- flags.is_accent_mark = bool(regex_accent_mark.match(char))
- flags.is_punctuation = bool(regex_punctuation.match(char))
- flags.is_symbol = bool(regex_symbol.match(char))
- flags.is_control = bool(regex_control.match(char))
- flags.is_undefined = bytes(flags)[0] == 0
- assert (not flags.is_undefined)
-
- # whitespaces
- if bool(regex_whitespace.match(char)):
- table_whitespace.append(codepoint)
+ char = chr(cpt)
+
+ # codepoint category flags
+ codepoint_flags[cpt] = UNICODE_CATEGORY_TO_FLAG[categ]
# lowercase conversion
- lower = ord(char.lower()[0])
- if codepoint != lower:
- table_lowercase.append((codepoint, lower))
+ if cpt_lower:
+ table_lowercase.append((cpt, cpt_lower))
# uppercase conversion
- upper = ord(char.upper()[0])
- if codepoint != upper:
- table_uppercase.append((codepoint, upper))
+ if cpt_upper:
+ table_uppercase.append((cpt, cpt_upper))
# NFD normalization
norm = ord(unicodedata.normalize('NFD', char)[0])
- if codepoint != norm:
- table_nfd.append((codepoint, norm))
+ if cpt != norm:
+ table_nfd.append((cpt, norm))
+
+
+# whitespaces, see "<White_Space>" https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
+table_whitespace.extend(range(0x0009, 0x000D + 1))
+table_whitespace.extend(range(0x2000, 0x200A + 1))
+table_whitespace.extend([0x0020, 0x0085, 0x00A0, 0x1680, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000])
+
+
+# sort by codepoint
+table_whitespace.sort()
+table_lowercase.sort()
+table_uppercase.sort()
+table_nfd.sort()
# group ranges with same flags
ranges_flags = [(0, codepoint_flags[0])] # start, flags
for codepoint, flags in enumerate(codepoint_flags):
- if bytes(flags) != bytes(ranges_flags[-1][1]):
+ if flags != ranges_flags[-1][1]:
ranges_flags.append((codepoint, flags))
-ranges_flags.append((MAX_CODEPOINTS, CoodepointFlags()))
+ranges_flags.append((MAX_CODEPOINTS, 0x0000))
# group ranges with same nfd
ranges_nfd[-1] = (start, codepoint, norm)
-# Generate 'unicode-data.cpp'
-
+# Generate 'unicode-data.cpp':
+# python ./scripts//gen-unicode-data.py > unicode-data.cpp
def out(line=""):
print(line, end='\n') # noqa
out("const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1")
for codepoint, flags in ranges_flags:
- flags = int.from_bytes(bytes(flags), "little")
out("{0x%06X, 0x%04X}," % (codepoint, flags))
out("};\n")
out("const std::unordered_set<uint32_t> unicode_set_whitespace = {")
-out(", ".join("0x%06X" % cpt for cpt in table_whitespace))
+for codepoint in table_whitespace:
+ out("0x%06X," % codepoint)
out("};\n")
out("const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {")
import argparse
import subprocess
import random
+import unicodedata
from typing import Callable, Iterator
import cffi
from transformers import AutoTokenizer
-logger = logging.getLogger("test-tokenizer-random-bpe")
+
+logger = logging.getLogger("test-tokenizer-random")
class LibLlama:
'Cửa Việt', # llama-3, ignore_merges = true
'<s>a', # Phi-3 fail
'<unk><|endoftext|><s>', # Phi-3 fail
- 'a\na', # TODO: Bert fail
- 'a </s> b', # rstrip phi-3
- 'a <mask> b', # lstrip jina-v2
+ 'a\na', # bert fail
+ '"`', # falcon
+ ' \u2e4e', # falcon
+ 'a\xa0\xa0\x00b', # jina-v2-es
+ 'one <mask>', # jina-v2-es <mask> lstrip=true
+ 'a </s> b', # rstrip phi-3
+ 'a <mask> b', # lstrip jina-v2
+ '\xa0aC', # deepseek
]
for m in range(iterations):
rand.seed(m)
words = rand.choices(all_tokens, k=500)
- if words[0] == tokenizer.bos_token: # skip spam warning of double BOS
+ if words and words[0] == tokenizer.bos_token: # skip spam warning of double BOS
while len(words) > 1 and words[1] == tokenizer.bos_token: # leave one starting BOS
words.pop(0)
if tokenizer.add_bos_token: # drop all starting BOS
words.pop(0)
+ if words and words[-1] == tokenizer.eos_token: # skip spam warning of double EOS
+ while len(words) > 1 and words[-2] == tokenizer.eos_token: # leave one trailing EOS
+ words.pop(-1)
+ if tokenizer.add_bos_token: # drop all trailing EOS
+ words.pop(-1)
yield "".join(words)
def generator_random_chars(iterations=100) -> Iterator[str]:
"""Brute force random text with simple characters"""
+ NUM_WORDS = 400
WHITESPACES = list(" " * 20 + "\n" * 5 + "\r\n" * 5 + "\t" * 5)
CHARS = list(sorted(set("""
ABCDEFGHIJKLMNOPQRSTUVWXYZ
for m in range(iterations):
rand.seed(m)
text = []
- num_words = rand.randint(300, 400)
- for i in range(num_words):
+ for _ in range(NUM_WORDS):
k = rand.randint(1, 7)
word = rand.choices(CHARS, k=k)
- space = rand.choice(WHITESPACES)
- text.append("".join(word) + space)
+ word.append(rand.choice(WHITESPACES))
+ text.append("".join(word))
+ yield "".join(text)
+
+
+def generator_unicodes() -> Iterator[str]:
+ """Iterate unicode characters"""
+
+ MAX_CODEPOINTS = 0x30000 # 0x110000
+
+ def _valid(cpt):
+ if cpt >= 0x30000: # unassigned and supplementary
+ return False
+ if 0x00D800 <= cpt <= 0x00F8FF: # Surrogates
+ return False
+ if unicodedata.category(chr(cpt)) == "Cn":
+ return False
+ return True
+
+ characters = [chr(cpt) for cpt in range(1, MAX_CODEPOINTS) if _valid(cpt)]
+
+ yield from characters
+
+
+def generator_random_unicodes(iterations=100) -> Iterator[str]:
+ """Brute force random text with unicode characters"""
+
+ NUM_WORDS = 200
+ WHITESPACES = list(" " * 20 + "\n" * 5 + "\r\n" * 5 + "\t" * 5)
+
+ characters = list(generator_unicodes())
+
+ rand = random.Random()
+ for m in range(iterations):
+ rand.seed(m)
+ text = []
+ for _ in range(NUM_WORDS):
+ k = rand.randint(1, 7)
+ word = rand.choices(characters, k=k)
+ word.append(rand.choice(WHITESPACES))
+ text.append("".join(word))
yield "".join(text)
yield "".join(text)
-def generator_random_bytes(iterations=100) -> Iterator[str]:
- """Brute force random bytes"""
-
- WHITESPACES = list(" " * 20 + "\n" * 5 + "\r\n" * 5 + "\t" * 5)
-
- rand = random.Random()
- for m in range(iterations):
- rand.seed(m)
- text = []
- num_words = rand.randint(300, 400)
- for i in range(num_words):
- k = rand.randint(1, 8)
- word = [chr(r) for r in rand.randbytes(k) if r]
- word.append(rand.choice(WHITESPACES))
- text.append("".join(word))
- yield "".join(text)
-
-
-def test_compare_tokenizer(func_tokenize1: Callable, func_tokenize2: Callable, generator: Iterator[str]):
+def compare_tokenizers(func_tokenize1: Callable, func_tokenize2: Callable, generator: Iterator[str]):
def find_first_mismatch(ids1: list[int], ids2: list[int]):
for i, (a, b) in enumerate(zip(ids1, ids2)):
return -1
return min(len(ids1), len(ids2))
- t0 = time.perf_counter()
+ t_tokenizer1 = 0
+ t_tokenizer2 = 0
+ t_start = time.perf_counter()
+ num_errors = 10
+
logger.info("%s: %s" % (generator.__name__, "ini"))
for text in generator:
+ # print(repr(text), hex(ord(text[0])), text.encode())
+ t0 = time.perf_counter()
ids1 = func_tokenize1(text)
+ t1 = time.perf_counter()
ids2 = func_tokenize2(text)
+ t2 = time.perf_counter()
+ t_tokenizer1 += t1 - t0
+ t_tokenizer2 += t2 - t1
if ids1 != ids2:
i = find_first_mismatch(ids1, ids2)
ids1 = list(ids1)[max(0, i - 2) : i + 5 + 1]
ids2 = list(ids2)[max(0, i - 2) : i + 5 + 1]
- logger.info(" TokenIDs: " + str(ids1))
- logger.info(" Expected: " + str(ids2))
- raise Exception()
- t1 = time.perf_counter()
- logger.info("%s: end, time: %.3f secs" % (generator.__name__, t1 - t0))
+ logger.error(" TokenIDs: " + str(ids1))
+ logger.error(" Expected: " + str(ids2))
+ # raise Exception()
+ num_errors += 1
+ if num_errors > 10:
+ break
+
+ t_total = time.perf_counter() - t_start
+ logger.info("%s: end, tok1: %.3f tok2: %.3f total: %.3f" % (generator.__name__, t_tokenizer1, t_tokenizer2, t_total))
def main(argv: list[str] = None):
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
args = parser.parse_args(argv)
- logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
+ logging.basicConfig(level = logging.DEBUG if args.verbose else logging.INFO)
+ logger.info(f"VOCABFILE: '{args.vocab_file}'")
model = LibLlamaModel(LibLlama(), args.vocab_file, mparams=dict(vocab_only=True), cparams=dict(n_ctx=4096))
tokenizer = AutoTokenizer.from_pretrained(args.dir_tokenizer)
ids = func_tokenize2("a")
assert 1 <= len(ids) <= 3
add_bos_token = len(ids) > 1 and tokenizer.bos_token_id == ids[0]
+ add_eos_token = len(ids) > 1 and tokenizer.eos_token_id == ids[-1]
tokenizer.add_bos_token = getattr(tokenizer, "add_bos_token", add_bos_token)
+ tokenizer.add_eos_token = getattr(tokenizer, "add_eos_token", add_eos_token)
vocab = list(sorted(tokenizer.batch_decode(list(tokenizer.get_vocab().values()), skip_special_tokens=True)))
- test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_custom_text())
- test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_custom_text_edge_cases())
- test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_vocab_words(vocab))
- test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_added_lr_strip(tokenizer))
- test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_random_added_tokens(tokenizer, 10_000))
- test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_random_chars(10_000))
- test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_random_vocab_chars(vocab, 10_000))
- test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_random_vocab_words(vocab, 5_000))
- # test_compare_tokenizer(func_tokenize1, func_tokenize2, generator_random_bytes(10_000)) # FAIL
+
+ compare_tokenizers(func_tokenize1, func_tokenize2, generator_custom_text())
+ compare_tokenizers(func_tokenize1, func_tokenize2, generator_custom_text_edge_cases())
+ compare_tokenizers(func_tokenize1, func_tokenize2, generator_unicodes())
+ compare_tokenizers(func_tokenize1, func_tokenize2, generator_vocab_words(vocab))
+ compare_tokenizers(func_tokenize1, func_tokenize2, generator_added_lr_strip(tokenizer))
+ compare_tokenizers(func_tokenize1, func_tokenize2, generator_random_added_tokens(tokenizer, 10_000))
+ compare_tokenizers(func_tokenize1, func_tokenize2, generator_random_chars(10_000))
+ compare_tokenizers(func_tokenize1, func_tokenize2, generator_random_unicodes(10_000))
+ compare_tokenizers(func_tokenize1, func_tokenize2, generator_random_vocab_chars(vocab, 10_000))
+ compare_tokenizers(func_tokenize1, func_tokenize2, generator_random_vocab_words(vocab, 5_000))
model.free()
if __name__ == "__main__":
# main()
+ logging.basicConfig(
+ level = logging.DEBUG,
+ format = "%(asctime)s.%(msecs)03d %(name)s %(levelname)s %(message)s",
+ datefmt = "%Y-%m-%d %H:%M:%S",
+ filename = logger.name + ".log",
+ filemode = "a"
+ )
+
path_tokenizers = "./models/tokenizers/"
path_vocab_format = "./models/ggml-vocab-%s.gguf"
# import os
# tokenizers = os.listdir(path_tokenizers)
tokenizers = [
- "llama-spm", # SPM
- "phi-3", # SPM
- "jina-v2-en", # WPM
- "bert-bge", # WPM
+ # "llama-spm", # SPM
+ # "phi-3", # SPM
+ # "bert-bge", # WPM
+ # "jina-v2-en", # WPM
+ "gpt-2", # BPE
+ "llama-bpe", # BPE
+ "falcon", # BPE
+ "starcoder", # BPE
+ "jina-v2-es", # BPE
+ "jina-v2-de", # BPE
+ "jina-v2-code", # BPE
+ "smaug-bpe", # BPE
+ "phi-2", # BPE
+ "deepseek-coder", # BPE
+ "deepseek-llm", # BPE
]
for tokenizer in tokenizers:
- print("\n" + "=" * 50 + "\n" + tokenizer + "\n") # noqa
+ logger.info("=" * 50)
+ logger.info(f"TOKENIZER: '{tokenizer}'")
vocab_file = path_vocab_format % tokenizer
dir_tokenizer = path_tokenizers + "/" + tokenizer
main([vocab_file, dir_tokenizer, "--verbose"])
{0x000370, 0x0004},
{0x000375, 0x0040},
{0x000376, 0x0004},
-{0x000378, 0x0080},
+{0x000378, 0x0001},
{0x00037A, 0x0004},
{0x00037E, 0x0020},
{0x00037F, 0x0004},
-{0x000380, 0x0080},
+{0x000380, 0x0001},
{0x000384, 0x0040},
{0x000386, 0x0004},
{0x000387, 0x0020},
{0x000388, 0x0004},
-{0x00038B, 0x0080},
+{0x00038B, 0x0001},
{0x00038C, 0x0004},
-{0x00038D, 0x0080},
+{0x00038D, 0x0001},
{0x00038E, 0x0004},
-{0x0003A2, 0x0080},
+{0x0003A2, 0x0001},
{0x0003A3, 0x0004},
{0x0003F6, 0x0040},
{0x0003F7, 0x0004},
{0x000482, 0x0040},
{0x000483, 0x0010},
{0x00048A, 0x0004},
-{0x000530, 0x0080},
+{0x000530, 0x0001},
{0x000531, 0x0004},
-{0x000557, 0x0080},
+{0x000557, 0x0001},
{0x000559, 0x0004},
{0x00055A, 0x0020},
{0x000560, 0x0004},
{0x000589, 0x0020},
-{0x00058B, 0x0080},
+{0x00058B, 0x0001},
{0x00058D, 0x0040},
-{0x000590, 0x0080},
+{0x000590, 0x0001},
{0x000591, 0x0010},
{0x0005BE, 0x0020},
{0x0005BF, 0x0010},
{0x0005C4, 0x0010},
{0x0005C6, 0x0020},
{0x0005C7, 0x0010},
-{0x0005C8, 0x0080},
+{0x0005C8, 0x0001},
{0x0005D0, 0x0004},
-{0x0005EB, 0x0080},
+{0x0005EB, 0x0001},
{0x0005EF, 0x0004},
{0x0005F3, 0x0020},
-{0x0005F5, 0x0080},
+{0x0005F5, 0x0001},
+{0x000600, 0x0080},
{0x000606, 0x0040},
{0x000609, 0x0020},
{0x00060B, 0x0040},
{0x0006FD, 0x0040},
{0x0006FF, 0x0004},
{0x000700, 0x0020},
-{0x00070E, 0x0080},
+{0x00070E, 0x0001},
+{0x00070F, 0x0080},
{0x000710, 0x0004},
{0x000711, 0x0010},
{0x000712, 0x0004},
{0x000730, 0x0010},
-{0x00074B, 0x0080},
+{0x00074B, 0x0001},
{0x00074D, 0x0004},
{0x0007A6, 0x0010},
{0x0007B1, 0x0004},
-{0x0007B2, 0x0080},
+{0x0007B2, 0x0001},
{0x0007C0, 0x0002},
{0x0007CA, 0x0004},
{0x0007EB, 0x0010},
{0x0007F6, 0x0040},
{0x0007F7, 0x0020},
{0x0007FA, 0x0004},
-{0x0007FB, 0x0080},
+{0x0007FB, 0x0001},
{0x0007FD, 0x0010},
{0x0007FE, 0x0040},
{0x000800, 0x0004},
{0x000825, 0x0010},
{0x000828, 0x0004},
{0x000829, 0x0010},
-{0x00082E, 0x0080},
+{0x00082E, 0x0001},
{0x000830, 0x0020},
-{0x00083F, 0x0080},
+{0x00083F, 0x0001},
{0x000840, 0x0004},
{0x000859, 0x0010},
-{0x00085C, 0x0080},
+{0x00085C, 0x0001},
{0x00085E, 0x0020},
-{0x00085F, 0x0080},
+{0x00085F, 0x0001},
{0x000860, 0x0004},
-{0x00086B, 0x0080},
+{0x00086B, 0x0001},
{0x000870, 0x0004},
{0x000888, 0x0040},
{0x000889, 0x0004},
-{0x00088F, 0x0080},
+{0x00088F, 0x0001},
+{0x000890, 0x0080},
+{0x000892, 0x0001},
{0x000898, 0x0010},
{0x0008A0, 0x0004},
{0x0008CA, 0x0010},
{0x000970, 0x0020},
{0x000971, 0x0004},
{0x000981, 0x0010},
-{0x000984, 0x0080},
+{0x000984, 0x0001},
{0x000985, 0x0004},
-{0x00098D, 0x0080},
+{0x00098D, 0x0001},
{0x00098F, 0x0004},
-{0x000991, 0x0080},
+{0x000991, 0x0001},
{0x000993, 0x0004},
-{0x0009A9, 0x0080},
+{0x0009A9, 0x0001},
{0x0009AA, 0x0004},
-{0x0009B1, 0x0080},
+{0x0009B1, 0x0001},
{0x0009B2, 0x0004},
-{0x0009B3, 0x0080},
+{0x0009B3, 0x0001},
{0x0009B6, 0x0004},
-{0x0009BA, 0x0080},
+{0x0009BA, 0x0001},
{0x0009BC, 0x0010},
{0x0009BD, 0x0004},
{0x0009BE, 0x0010},
-{0x0009C5, 0x0080},
+{0x0009C5, 0x0001},
{0x0009C7, 0x0010},
-{0x0009C9, 0x0080},
+{0x0009C9, 0x0001},
{0x0009CB, 0x0010},
{0x0009CE, 0x0004},
-{0x0009CF, 0x0080},
+{0x0009CF, 0x0001},
{0x0009D7, 0x0010},
-{0x0009D8, 0x0080},
+{0x0009D8, 0x0001},
{0x0009DC, 0x0004},
-{0x0009DE, 0x0080},
+{0x0009DE, 0x0001},
{0x0009DF, 0x0004},
{0x0009E2, 0x0010},
-{0x0009E4, 0x0080},
+{0x0009E4, 0x0001},
{0x0009E6, 0x0002},
{0x0009F0, 0x0004},
{0x0009F2, 0x0040},
{0x0009FC, 0x0004},
{0x0009FD, 0x0020},
{0x0009FE, 0x0010},
-{0x0009FF, 0x0080},
+{0x0009FF, 0x0001},
{0x000A01, 0x0010},
-{0x000A04, 0x0080},
+{0x000A04, 0x0001},
{0x000A05, 0x0004},
-{0x000A0B, 0x0080},
+{0x000A0B, 0x0001},
{0x000A0F, 0x0004},
-{0x000A11, 0x0080},
+{0x000A11, 0x0001},
{0x000A13, 0x0004},
-{0x000A29, 0x0080},
+{0x000A29, 0x0001},
{0x000A2A, 0x0004},
-{0x000A31, 0x0080},
+{0x000A31, 0x0001},
{0x000A32, 0x0004},
-{0x000A34, 0x0080},
+{0x000A34, 0x0001},
{0x000A35, 0x0004},
-{0x000A37, 0x0080},
+{0x000A37, 0x0001},
{0x000A38, 0x0004},
-{0x000A3A, 0x0080},
+{0x000A3A, 0x0001},
{0x000A3C, 0x0010},
-{0x000A3D, 0x0080},
+{0x000A3D, 0x0001},
{0x000A3E, 0x0010},
-{0x000A43, 0x0080},
+{0x000A43, 0x0001},
{0x000A47, 0x0010},
-{0x000A49, 0x0080},
+{0x000A49, 0x0001},
{0x000A4B, 0x0010},
-{0x000A4E, 0x0080},
+{0x000A4E, 0x0001},
{0x000A51, 0x0010},
-{0x000A52, 0x0080},
+{0x000A52, 0x0001},
{0x000A59, 0x0004},
-{0x000A5D, 0x0080},
+{0x000A5D, 0x0001},
{0x000A5E, 0x0004},
-{0x000A5F, 0x0080},
+{0x000A5F, 0x0001},
{0x000A66, 0x0002},
{0x000A70, 0x0010},
{0x000A72, 0x0004},
{0x000A75, 0x0010},
{0x000A76, 0x0020},
-{0x000A77, 0x0080},
+{0x000A77, 0x0001},
{0x000A81, 0x0010},
-{0x000A84, 0x0080},
+{0x000A84, 0x0001},
{0x000A85, 0x0004},
-{0x000A8E, 0x0080},
+{0x000A8E, 0x0001},
{0x000A8F, 0x0004},
-{0x000A92, 0x0080},
+{0x000A92, 0x0001},
{0x000A93, 0x0004},
-{0x000AA9, 0x0080},
+{0x000AA9, 0x0001},
{0x000AAA, 0x0004},
-{0x000AB1, 0x0080},
+{0x000AB1, 0x0001},
{0x000AB2, 0x0004},
-{0x000AB4, 0x0080},
+{0x000AB4, 0x0001},
{0x000AB5, 0x0004},
-{0x000ABA, 0x0080},
+{0x000ABA, 0x0001},
{0x000ABC, 0x0010},
{0x000ABD, 0x0004},
{0x000ABE, 0x0010},
-{0x000AC6, 0x0080},
+{0x000AC6, 0x0001},
{0x000AC7, 0x0010},
-{0x000ACA, 0x0080},
+{0x000ACA, 0x0001},
{0x000ACB, 0x0010},
-{0x000ACE, 0x0080},
+{0x000ACE, 0x0001},
{0x000AD0, 0x0004},
-{0x000AD1, 0x0080},
+{0x000AD1, 0x0001},
{0x000AE0, 0x0004},
{0x000AE2, 0x0010},
-{0x000AE4, 0x0080},
+{0x000AE4, 0x0001},
{0x000AE6, 0x0002},
{0x000AF0, 0x0020},
{0x000AF1, 0x0040},
-{0x000AF2, 0x0080},
+{0x000AF2, 0x0001},
{0x000AF9, 0x0004},
{0x000AFA, 0x0010},
-{0x000B00, 0x0080},
+{0x000B00, 0x0001},
{0x000B01, 0x0010},
-{0x000B04, 0x0080},
+{0x000B04, 0x0001},
{0x000B05, 0x0004},
-{0x000B0D, 0x0080},
+{0x000B0D, 0x0001},
{0x000B0F, 0x0004},
-{0x000B11, 0x0080},
+{0x000B11, 0x0001},
{0x000B13, 0x0004},
-{0x000B29, 0x0080},
+{0x000B29, 0x0001},
{0x000B2A, 0x0004},
-{0x000B31, 0x0080},
+{0x000B31, 0x0001},
{0x000B32, 0x0004},
-{0x000B34, 0x0080},
+{0x000B34, 0x0001},
{0x000B35, 0x0004},
-{0x000B3A, 0x0080},
+{0x000B3A, 0x0001},
{0x000B3C, 0x0010},
{0x000B3D, 0x0004},
{0x000B3E, 0x0010},
-{0x000B45, 0x0080},
+{0x000B45, 0x0001},
{0x000B47, 0x0010},
-{0x000B49, 0x0080},
+{0x000B49, 0x0001},
{0x000B4B, 0x0010},
-{0x000B4E, 0x0080},
+{0x000B4E, 0x0001},
{0x000B55, 0x0010},
-{0x000B58, 0x0080},
+{0x000B58, 0x0001},
{0x000B5C, 0x0004},
-{0x000B5E, 0x0080},
+{0x000B5E, 0x0001},
{0x000B5F, 0x0004},
{0x000B62, 0x0010},
-{0x000B64, 0x0080},
+{0x000B64, 0x0001},
{0x000B66, 0x0002},
{0x000B70, 0x0040},
{0x000B71, 0x0004},
{0x000B72, 0x0002},
-{0x000B78, 0x0080},
+{0x000B78, 0x0001},
{0x000B82, 0x0010},
{0x000B83, 0x0004},
-{0x000B84, 0x0080},
+{0x000B84, 0x0001},
{0x000B85, 0x0004},
-{0x000B8B, 0x0080},
+{0x000B8B, 0x0001},
{0x000B8E, 0x0004},
-{0x000B91, 0x0080},
+{0x000B91, 0x0001},
{0x000B92, 0x0004},
-{0x000B96, 0x0080},
+{0x000B96, 0x0001},
{0x000B99, 0x0004},
-{0x000B9B, 0x0080},
+{0x000B9B, 0x0001},
{0x000B9C, 0x0004},
-{0x000B9D, 0x0080},
+{0x000B9D, 0x0001},
{0x000B9E, 0x0004},
-{0x000BA0, 0x0080},
+{0x000BA0, 0x0001},
{0x000BA3, 0x0004},
-{0x000BA5, 0x0080},
+{0x000BA5, 0x0001},
{0x000BA8, 0x0004},
-{0x000BAB, 0x0080},
+{0x000BAB, 0x0001},
{0x000BAE, 0x0004},
-{0x000BBA, 0x0080},
+{0x000BBA, 0x0001},
{0x000BBE, 0x0010},
-{0x000BC3, 0x0080},
+{0x000BC3, 0x0001},
{0x000BC6, 0x0010},
-{0x000BC9, 0x0080},
+{0x000BC9, 0x0001},
{0x000BCA, 0x0010},
-{0x000BCE, 0x0080},
+{0x000BCE, 0x0001},
{0x000BD0, 0x0004},
-{0x000BD1, 0x0080},
+{0x000BD1, 0x0001},
{0x000BD7, 0x0010},
-{0x000BD8, 0x0080},
+{0x000BD8, 0x0001},
{0x000BE6, 0x0002},
{0x000BF3, 0x0040},
-{0x000BFB, 0x0080},
+{0x000BFB, 0x0001},
{0x000C00, 0x0010},
{0x000C05, 0x0004},
-{0x000C0D, 0x0080},
+{0x000C0D, 0x0001},
{0x000C0E, 0x0004},
-{0x000C11, 0x0080},
+{0x000C11, 0x0001},
{0x000C12, 0x0004},
-{0x000C29, 0x0080},
+{0x000C29, 0x0001},
{0x000C2A, 0x0004},
-{0x000C3A, 0x0080},
+{0x000C3A, 0x0001},
{0x000C3C, 0x0010},
{0x000C3D, 0x0004},
{0x000C3E, 0x0010},
-{0x000C45, 0x0080},
+{0x000C45, 0x0001},
{0x000C46, 0x0010},
-{0x000C49, 0x0080},
+{0x000C49, 0x0001},
{0x000C4A, 0x0010},
-{0x000C4E, 0x0080},
+{0x000C4E, 0x0001},
{0x000C55, 0x0010},
-{0x000C57, 0x0080},
+{0x000C57, 0x0001},
{0x000C58, 0x0004},
-{0x000C5B, 0x0080},
+{0x000C5B, 0x0001},
{0x000C5D, 0x0004},
-{0x000C5E, 0x0080},
+{0x000C5E, 0x0001},
{0x000C60, 0x0004},
{0x000C62, 0x0010},
-{0x000C64, 0x0080},
+{0x000C64, 0x0001},
{0x000C66, 0x0002},
-{0x000C70, 0x0080},
+{0x000C70, 0x0001},
{0x000C77, 0x0020},
{0x000C78, 0x0002},
{0x000C7F, 0x0040},
{0x000C81, 0x0010},
{0x000C84, 0x0020},
{0x000C85, 0x0004},
-{0x000C8D, 0x0080},
+{0x000C8D, 0x0001},
{0x000C8E, 0x0004},
-{0x000C91, 0x0080},
+{0x000C91, 0x0001},
{0x000C92, 0x0004},
-{0x000CA9, 0x0080},
+{0x000CA9, 0x0001},
{0x000CAA, 0x0004},
-{0x000CB4, 0x0080},
+{0x000CB4, 0x0001},
{0x000CB5, 0x0004},
-{0x000CBA, 0x0080},
+{0x000CBA, 0x0001},
{0x000CBC, 0x0010},
{0x000CBD, 0x0004},
{0x000CBE, 0x0010},
-{0x000CC5, 0x0080},
+{0x000CC5, 0x0001},
{0x000CC6, 0x0010},
-{0x000CC9, 0x0080},
+{0x000CC9, 0x0001},
{0x000CCA, 0x0010},
-{0x000CCE, 0x0080},
+{0x000CCE, 0x0001},
{0x000CD5, 0x0010},
-{0x000CD7, 0x0080},
+{0x000CD7, 0x0001},
{0x000CDD, 0x0004},
-{0x000CDF, 0x0080},
+{0x000CDF, 0x0001},
{0x000CE0, 0x0004},
{0x000CE2, 0x0010},
-{0x000CE4, 0x0080},
+{0x000CE4, 0x0001},
{0x000CE6, 0x0002},
-{0x000CF0, 0x0080},
+{0x000CF0, 0x0001},
{0x000CF1, 0x0004},
{0x000CF3, 0x0010},
-{0x000CF4, 0x0080},
+{0x000CF4, 0x0001},
{0x000D00, 0x0010},
{0x000D04, 0x0004},
-{0x000D0D, 0x0080},
+{0x000D0D, 0x0001},
{0x000D0E, 0x0004},
-{0x000D11, 0x0080},
+{0x000D11, 0x0001},
{0x000D12, 0x0004},
{0x000D3B, 0x0010},
{0x000D3D, 0x0004},
{0x000D3E, 0x0010},
-{0x000D45, 0x0080},
+{0x000D45, 0x0001},
{0x000D46, 0x0010},
-{0x000D49, 0x0080},
+{0x000D49, 0x0001},
{0x000D4A, 0x0010},
{0x000D4E, 0x0004},
{0x000D4F, 0x0040},
-{0x000D50, 0x0080},
+{0x000D50, 0x0001},
{0x000D54, 0x0004},
{0x000D57, 0x0010},
{0x000D58, 0x0002},
{0x000D5F, 0x0004},
{0x000D62, 0x0010},
-{0x000D64, 0x0080},
+{0x000D64, 0x0001},
{0x000D66, 0x0002},
{0x000D79, 0x0040},
{0x000D7A, 0x0004},
-{0x000D80, 0x0080},
+{0x000D80, 0x0001},
{0x000D81, 0x0010},
-{0x000D84, 0x0080},
+{0x000D84, 0x0001},
{0x000D85, 0x0004},
-{0x000D97, 0x0080},
+{0x000D97, 0x0001},
{0x000D9A, 0x0004},
-{0x000DB2, 0x0080},
+{0x000DB2, 0x0001},
{0x000DB3, 0x0004},
-{0x000DBC, 0x0080},
+{0x000DBC, 0x0001},
{0x000DBD, 0x0004},
-{0x000DBE, 0x0080},
+{0x000DBE, 0x0001},
{0x000DC0, 0x0004},
-{0x000DC7, 0x0080},
+{0x000DC7, 0x0001},
{0x000DCA, 0x0010},
-{0x000DCB, 0x0080},
+{0x000DCB, 0x0001},
{0x000DCF, 0x0010},
-{0x000DD5, 0x0080},
+{0x000DD5, 0x0001},
{0x000DD6, 0x0010},
-{0x000DD7, 0x0080},
+{0x000DD7, 0x0001},
{0x000DD8, 0x0010},
-{0x000DE0, 0x0080},
+{0x000DE0, 0x0001},
{0x000DE6, 0x0002},
-{0x000DF0, 0x0080},
+{0x000DF0, 0x0001},
{0x000DF2, 0x0010},
{0x000DF4, 0x0020},
-{0x000DF5, 0x0080},
+{0x000DF5, 0x0001},
{0x000E01, 0x0004},
{0x000E31, 0x0010},
{0x000E32, 0x0004},
{0x000E34, 0x0010},
-{0x000E3B, 0x0080},
+{0x000E3B, 0x0001},
{0x000E3F, 0x0040},
{0x000E40, 0x0004},
{0x000E47, 0x0010},
{0x000E4F, 0x0020},
{0x000E50, 0x0002},
{0x000E5A, 0x0020},
-{0x000E5C, 0x0080},
+{0x000E5C, 0x0001},
{0x000E81, 0x0004},
-{0x000E83, 0x0080},
+{0x000E83, 0x0001},
{0x000E84, 0x0004},
-{0x000E85, 0x0080},
+{0x000E85, 0x0001},
{0x000E86, 0x0004},
-{0x000E8B, 0x0080},
+{0x000E8B, 0x0001},
{0x000E8C, 0x0004},
-{0x000EA4, 0x0080},
+{0x000EA4, 0x0001},
{0x000EA5, 0x0004},
-{0x000EA6, 0x0080},
+{0x000EA6, 0x0001},
{0x000EA7, 0x0004},
{0x000EB1, 0x0010},
{0x000EB2, 0x0004},
{0x000EB4, 0x0010},
{0x000EBD, 0x0004},
-{0x000EBE, 0x0080},
+{0x000EBE, 0x0001},
{0x000EC0, 0x0004},
-{0x000EC5, 0x0080},
+{0x000EC5, 0x0001},
{0x000EC6, 0x0004},
-{0x000EC7, 0x0080},
+{0x000EC7, 0x0001},
{0x000EC8, 0x0010},
-{0x000ECF, 0x0080},
+{0x000ECF, 0x0001},
{0x000ED0, 0x0002},
-{0x000EDA, 0x0080},
+{0x000EDA, 0x0001},
{0x000EDC, 0x0004},
-{0x000EE0, 0x0080},
+{0x000EE0, 0x0001},
{0x000F00, 0x0004},
{0x000F01, 0x0040},
{0x000F04, 0x0020},
{0x000F3A, 0x0020},
{0x000F3E, 0x0010},
{0x000F40, 0x0004},
-{0x000F48, 0x0080},
+{0x000F48, 0x0001},
{0x000F49, 0x0004},
-{0x000F6D, 0x0080},
+{0x000F6D, 0x0001},
{0x000F71, 0x0010},
{0x000F85, 0x0020},
{0x000F86, 0x0010},
{0x000F88, 0x0004},
{0x000F8D, 0x0010},
-{0x000F98, 0x0080},
+{0x000F98, 0x0001},
{0x000F99, 0x0010},
-{0x000FBD, 0x0080},
+{0x000FBD, 0x0001},
{0x000FBE, 0x0040},
{0x000FC6, 0x0010},
{0x000FC7, 0x0040},
-{0x000FCD, 0x0080},
+{0x000FCD, 0x0001},
{0x000FCE, 0x0040},
{0x000FD0, 0x0020},
{0x000FD5, 0x0040},
{0x000FD9, 0x0020},
-{0x000FDB, 0x0080},
+{0x000FDB, 0x0001},
{0x001000, 0x0004},
{0x00102B, 0x0010},
{0x00103F, 0x0004},
{0x00109A, 0x0010},
{0x00109E, 0x0040},
{0x0010A0, 0x0004},
-{0x0010C6, 0x0080},
+{0x0010C6, 0x0001},
{0x0010C7, 0x0004},
-{0x0010C8, 0x0080},
+{0x0010C8, 0x0001},
{0x0010CD, 0x0004},
-{0x0010CE, 0x0080},
+{0x0010CE, 0x0001},
{0x0010D0, 0x0004},
{0x0010FB, 0x0020},
{0x0010FC, 0x0004},
-{0x001249, 0x0080},
+{0x001249, 0x0001},
{0x00124A, 0x0004},
-{0x00124E, 0x0080},
+{0x00124E, 0x0001},
{0x001250, 0x0004},
-{0x001257, 0x0080},
+{0x001257, 0x0001},
{0x001258, 0x0004},
-{0x001259, 0x0080},
+{0x001259, 0x0001},
{0x00125A, 0x0004},
-{0x00125E, 0x0080},
+{0x00125E, 0x0001},
{0x001260, 0x0004},
-{0x001289, 0x0080},
+{0x001289, 0x0001},
{0x00128A, 0x0004},
-{0x00128E, 0x0080},
+{0x00128E, 0x0001},
{0x001290, 0x0004},
-{0x0012B1, 0x0080},
+{0x0012B1, 0x0001},
{0x0012B2, 0x0004},
-{0x0012B6, 0x0080},
+{0x0012B6, 0x0001},
{0x0012B8, 0x0004},
-{0x0012BF, 0x0080},
+{0x0012BF, 0x0001},
{0x0012C0, 0x0004},
-{0x0012C1, 0x0080},
+{0x0012C1, 0x0001},
{0x0012C2, 0x0004},
-{0x0012C6, 0x0080},
+{0x0012C6, 0x0001},
{0x0012C8, 0x0004},
-{0x0012D7, 0x0080},
+{0x0012D7, 0x0001},
{0x0012D8, 0x0004},
-{0x001311, 0x0080},
+{0x001311, 0x0001},
{0x001312, 0x0004},
-{0x001316, 0x0080},
+{0x001316, 0x0001},
{0x001318, 0x0004},
-{0x00135B, 0x0080},
+{0x00135B, 0x0001},
{0x00135D, 0x0010},
{0x001360, 0x0020},
{0x001369, 0x0002},
-{0x00137D, 0x0080},
+{0x00137D, 0x0001},
{0x001380, 0x0004},
{0x001390, 0x0040},
-{0x00139A, 0x0080},
+{0x00139A, 0x0001},
{0x0013A0, 0x0004},
-{0x0013F6, 0x0080},
+{0x0013F6, 0x0001},
{0x0013F8, 0x0004},
-{0x0013FE, 0x0080},
+{0x0013FE, 0x0001},
{0x001400, 0x0020},
{0x001401, 0x0004},
{0x00166D, 0x0040},
{0x001680, 0x0008},
{0x001681, 0x0004},
{0x00169B, 0x0020},
-{0x00169D, 0x0080},
+{0x00169D, 0x0001},
{0x0016A0, 0x0004},
{0x0016EB, 0x0020},
{0x0016EE, 0x0002},
{0x0016F1, 0x0004},
-{0x0016F9, 0x0080},
+{0x0016F9, 0x0001},
{0x001700, 0x0004},
{0x001712, 0x0010},
-{0x001716, 0x0080},
+{0x001716, 0x0001},
{0x00171F, 0x0004},
{0x001732, 0x0010},
{0x001735, 0x0020},
-{0x001737, 0x0080},
+{0x001737, 0x0001},
{0x001740, 0x0004},
{0x001752, 0x0010},
-{0x001754, 0x0080},
+{0x001754, 0x0001},
{0x001760, 0x0004},
-{0x00176D, 0x0080},
+{0x00176D, 0x0001},
{0x00176E, 0x0004},
-{0x001771, 0x0080},
+{0x001771, 0x0001},
{0x001772, 0x0010},
-{0x001774, 0x0080},
+{0x001774, 0x0001},
{0x001780, 0x0004},
{0x0017B4, 0x0010},
{0x0017D4, 0x0020},
{0x0017DB, 0x0040},
{0x0017DC, 0x0004},
{0x0017DD, 0x0010},
-{0x0017DE, 0x0080},
+{0x0017DE, 0x0001},
{0x0017E0, 0x0002},
-{0x0017EA, 0x0080},
+{0x0017EA, 0x0001},
{0x0017F0, 0x0002},
-{0x0017FA, 0x0080},
+{0x0017FA, 0x0001},
{0x001800, 0x0020},
{0x00180B, 0x0010},
{0x00180E, 0x0080},
{0x00180F, 0x0010},
{0x001810, 0x0002},
-{0x00181A, 0x0080},
+{0x00181A, 0x0001},
{0x001820, 0x0004},
-{0x001879, 0x0080},
+{0x001879, 0x0001},
{0x001880, 0x0004},
{0x001885, 0x0010},
{0x001887, 0x0004},
{0x0018A9, 0x0010},
{0x0018AA, 0x0004},
-{0x0018AB, 0x0080},
+{0x0018AB, 0x0001},
{0x0018B0, 0x0004},
-{0x0018F6, 0x0080},
+{0x0018F6, 0x0001},
{0x001900, 0x0004},
-{0x00191F, 0x0080},
+{0x00191F, 0x0001},
{0x001920, 0x0010},
-{0x00192C, 0x0080},
+{0x00192C, 0x0001},
{0x001930, 0x0010},
-{0x00193C, 0x0080},
+{0x00193C, 0x0001},
{0x001940, 0x0040},
-{0x001941, 0x0080},
+{0x001941, 0x0001},
{0x001944, 0x0020},
{0x001946, 0x0002},
{0x001950, 0x0004},
-{0x00196E, 0x0080},
+{0x00196E, 0x0001},
{0x001970, 0x0004},
-{0x001975, 0x0080},
+{0x001975, 0x0001},
{0x001980, 0x0004},
-{0x0019AC, 0x0080},
+{0x0019AC, 0x0001},
{0x0019B0, 0x0004},
-{0x0019CA, 0x0080},
+{0x0019CA, 0x0001},
{0x0019D0, 0x0002},
-{0x0019DB, 0x0080},
+{0x0019DB, 0x0001},
{0x0019DE, 0x0040},
{0x001A00, 0x0004},
{0x001A17, 0x0010},
-{0x001A1C, 0x0080},
+{0x001A1C, 0x0001},
{0x001A1E, 0x0020},
{0x001A20, 0x0004},
{0x001A55, 0x0010},
-{0x001A5F, 0x0080},
+{0x001A5F, 0x0001},
{0x001A60, 0x0010},
-{0x001A7D, 0x0080},
+{0x001A7D, 0x0001},
{0x001A7F, 0x0010},
{0x001A80, 0x0002},
-{0x001A8A, 0x0080},
+{0x001A8A, 0x0001},
{0x001A90, 0x0002},
-{0x001A9A, 0x0080},
+{0x001A9A, 0x0001},
{0x001AA0, 0x0020},
{0x001AA7, 0x0004},
{0x001AA8, 0x0020},
-{0x001AAE, 0x0080},
+{0x001AAE, 0x0001},
{0x001AB0, 0x0010},
-{0x001ACF, 0x0080},
+{0x001ACF, 0x0001},
{0x001B00, 0x0010},
{0x001B05, 0x0004},
{0x001B34, 0x0010},
{0x001B45, 0x0004},
-{0x001B4D, 0x0080},
+{0x001B4D, 0x0001},
{0x001B50, 0x0002},
{0x001B5A, 0x0020},
{0x001B61, 0x0040},
{0x001B6B, 0x0010},
{0x001B74, 0x0040},
{0x001B7D, 0x0020},
-{0x001B7F, 0x0080},
+{0x001B7F, 0x0001},
{0x001B80, 0x0010},
{0x001B83, 0x0004},
{0x001BA1, 0x0010},
{0x001BB0, 0x0002},
{0x001BBA, 0x0004},
{0x001BE6, 0x0010},
-{0x001BF4, 0x0080},
+{0x001BF4, 0x0001},
{0x001BFC, 0x0020},
{0x001C00, 0x0004},
{0x001C24, 0x0010},
-{0x001C38, 0x0080},
+{0x001C38, 0x0001},
{0x001C3B, 0x0020},
{0x001C40, 0x0002},
-{0x001C4A, 0x0080},
+{0x001C4A, 0x0001},
{0x001C4D, 0x0004},
{0x001C50, 0x0002},
{0x001C5A, 0x0004},
{0x001C7E, 0x0020},
{0x001C80, 0x0004},
-{0x001C89, 0x0080},
+{0x001C89, 0x0001},
{0x001C90, 0x0004},
-{0x001CBB, 0x0080},
+{0x001CBB, 0x0001},
{0x001CBD, 0x0004},
{0x001CC0, 0x0020},
-{0x001CC8, 0x0080},
+{0x001CC8, 0x0001},
{0x001CD0, 0x0010},
{0x001CD3, 0x0020},
{0x001CD4, 0x0010},
{0x001CF5, 0x0004},
{0x001CF7, 0x0010},
{0x001CFA, 0x0004},
-{0x001CFB, 0x0080},
+{0x001CFB, 0x0001},
{0x001D00, 0x0004},
{0x001DC0, 0x0010},
{0x001E00, 0x0004},
-{0x001F16, 0x0080},
+{0x001F16, 0x0001},
{0x001F18, 0x0004},
-{0x001F1E, 0x0080},
+{0x001F1E, 0x0001},
{0x001F20, 0x0004},
-{0x001F46, 0x0080},
+{0x001F46, 0x0001},
{0x001F48, 0x0004},
-{0x001F4E, 0x0080},
+{0x001F4E, 0x0001},
{0x001F50, 0x0004},
-{0x001F58, 0x0080},
+{0x001F58, 0x0001},
{0x001F59, 0x0004},
-{0x001F5A, 0x0080},
+{0x001F5A, 0x0001},
{0x001F5B, 0x0004},
-{0x001F5C, 0x0080},
+{0x001F5C, 0x0001},
{0x001F5D, 0x0004},
-{0x001F5E, 0x0080},
+{0x001F5E, 0x0001},
{0x001F5F, 0x0004},
-{0x001F7E, 0x0080},
+{0x001F7E, 0x0001},
{0x001F80, 0x0004},
-{0x001FB5, 0x0080},
+{0x001FB5, 0x0001},
{0x001FB6, 0x0004},
{0x001FBD, 0x0040},
{0x001FBE, 0x0004},
{0x001FBF, 0x0040},
{0x001FC2, 0x0004},
-{0x001FC5, 0x0080},
+{0x001FC5, 0x0001},
{0x001FC6, 0x0004},
{0x001FCD, 0x0040},
{0x001FD0, 0x0004},
-{0x001FD4, 0x0080},
+{0x001FD4, 0x0001},
{0x001FD6, 0x0004},
-{0x001FDC, 0x0080},
+{0x001FDC, 0x0001},
{0x001FDD, 0x0040},
{0x001FE0, 0x0004},
{0x001FED, 0x0040},
-{0x001FF0, 0x0080},
+{0x001FF0, 0x0001},
{0x001FF2, 0x0004},
-{0x001FF5, 0x0080},
+{0x001FF5, 0x0001},
{0x001FF6, 0x0004},
{0x001FFD, 0x0040},
-{0x001FFF, 0x0080},
+{0x001FFF, 0x0001},
{0x002000, 0x0008},
{0x00200B, 0x0080},
{0x002010, 0x0020},
{0x002053, 0x0020},
{0x00205F, 0x0008},
{0x002060, 0x0080},
+{0x002065, 0x0001},
+{0x002066, 0x0080},
{0x002070, 0x0002},
{0x002071, 0x0004},
-{0x002072, 0x0080},
+{0x002072, 0x0001},
{0x002074, 0x0002},
{0x00207A, 0x0040},
{0x00207D, 0x0020},
{0x002080, 0x0002},
{0x00208A, 0x0040},
{0x00208D, 0x0020},
-{0x00208F, 0x0080},
+{0x00208F, 0x0001},
{0x002090, 0x0004},
-{0x00209D, 0x0080},
+{0x00209D, 0x0001},
{0x0020A0, 0x0040},
-{0x0020C1, 0x0080},
+{0x0020C1, 0x0001},
{0x0020D0, 0x0010},
-{0x0020F1, 0x0080},
+{0x0020F1, 0x0001},
{0x002100, 0x0040},
{0x002102, 0x0004},
{0x002103, 0x0040},
{0x002183, 0x0004},
{0x002185, 0x0002},
{0x00218A, 0x0040},
-{0x00218C, 0x0080},
+{0x00218C, 0x0001},
{0x002190, 0x0040},
{0x002308, 0x0020},
{0x00230C, 0x0040},
{0x002329, 0x0020},
{0x00232B, 0x0040},
-{0x002427, 0x0080},
+{0x002427, 0x0001},
{0x002440, 0x0040},
-{0x00244B, 0x0080},
+{0x00244B, 0x0001},
{0x002460, 0x0002},
{0x00249C, 0x0040},
{0x0024EA, 0x0002},
{0x0029DC, 0x0040},
{0x0029FC, 0x0020},
{0x0029FE, 0x0040},
-{0x002B74, 0x0080},
+{0x002B74, 0x0001},
{0x002B76, 0x0040},
-{0x002B96, 0x0080},
+{0x002B96, 0x0001},
{0x002B97, 0x0040},
{0x002C00, 0x0004},
{0x002CE5, 0x0040},
{0x002CEB, 0x0004},
{0x002CEF, 0x0010},
{0x002CF2, 0x0004},
-{0x002CF4, 0x0080},
+{0x002CF4, 0x0001},
{0x002CF9, 0x0020},
{0x002CFD, 0x0002},
{0x002CFE, 0x0020},
{0x002D00, 0x0004},
-{0x002D26, 0x0080},
+{0x002D26, 0x0001},
{0x002D27, 0x0004},
-{0x002D28, 0x0080},
+{0x002D28, 0x0001},
{0x002D2D, 0x0004},
-{0x002D2E, 0x0080},
+{0x002D2E, 0x0001},
{0x002D30, 0x0004},
-{0x002D68, 0x0080},
+{0x002D68, 0x0001},
{0x002D6F, 0x0004},
{0x002D70, 0x0020},
-{0x002D71, 0x0080},
+{0x002D71, 0x0001},
{0x002D7F, 0x0010},
{0x002D80, 0x0004},
-{0x002D97, 0x0080},
+{0x002D97, 0x0001},
{0x002DA0, 0x0004},
-{0x002DA7, 0x0080},
+{0x002DA7, 0x0001},
{0x002DA8, 0x0004},
-{0x002DAF, 0x0080},
+{0x002DAF, 0x0001},
{0x002DB0, 0x0004},
-{0x002DB7, 0x0080},
+{0x002DB7, 0x0001},
{0x002DB8, 0x0004},
-{0x002DBF, 0x0080},
+{0x002DBF, 0x0001},
{0x002DC0, 0x0004},
-{0x002DC7, 0x0080},
+{0x002DC7, 0x0001},
{0x002DC8, 0x0004},
-{0x002DCF, 0x0080},
+{0x002DCF, 0x0001},
{0x002DD0, 0x0004},
-{0x002DD7, 0x0080},
+{0x002DD7, 0x0001},
{0x002DD8, 0x0004},
-{0x002DDF, 0x0080},
+{0x002DDF, 0x0001},
{0x002DE0, 0x0010},
{0x002E00, 0x0020},
{0x002E2F, 0x0004},
{0x002E30, 0x0020},
{0x002E50, 0x0040},
{0x002E52, 0x0020},
-{0x002E5E, 0x0080},
+{0x002E5E, 0x0001},
{0x002E80, 0x0040},
-{0x002E9A, 0x0080},
+{0x002E9A, 0x0001},
{0x002E9B, 0x0040},
-{0x002EF4, 0x0080},
+{0x002EF4, 0x0001},
{0x002F00, 0x0040},
-{0x002FD6, 0x0080},
+{0x002FD6, 0x0001},
{0x002FF0, 0x0040},
{0x003000, 0x0008},
{0x003001, 0x0020},
{0x00303B, 0x0004},
{0x00303D, 0x0020},
{0x00303E, 0x0040},
-{0x003040, 0x0080},
+{0x003040, 0x0001},
{0x003041, 0x0004},
-{0x003097, 0x0080},
+{0x003097, 0x0001},
{0x003099, 0x0010},
{0x00309B, 0x0040},
{0x00309D, 0x0004},
{0x0030A1, 0x0004},
{0x0030FB, 0x0020},
{0x0030FC, 0x0004},
-{0x003100, 0x0080},
+{0x003100, 0x0001},
{0x003105, 0x0004},
-{0x003130, 0x0080},
+{0x003130, 0x0001},
{0x003131, 0x0004},
-{0x00318F, 0x0080},
+{0x00318F, 0x0001},
{0x003190, 0x0040},
{0x003192, 0x0002},
{0x003196, 0x0040},
{0x0031A0, 0x0004},
{0x0031C0, 0x0040},
-{0x0031E4, 0x0080},
+{0x0031E4, 0x0001},
{0x0031EF, 0x0040},
{0x0031F0, 0x0004},
{0x003200, 0x0040},
-{0x00321F, 0x0080},
+{0x00321F, 0x0001},
{0x003220, 0x0002},
{0x00322A, 0x0040},
{0x003248, 0x0002},
{0x003400, 0x0004},
{0x004DC0, 0x0040},
{0x004E00, 0x0004},
-{0x00A48D, 0x0080},
+{0x00A48D, 0x0001},
{0x00A490, 0x0040},
-{0x00A4C7, 0x0080},
+{0x00A4C7, 0x0001},
{0x00A4D0, 0x0004},
{0x00A4FE, 0x0020},
{0x00A500, 0x0004},
{0x00A610, 0x0004},
{0x00A620, 0x0002},
{0x00A62A, 0x0004},
-{0x00A62C, 0x0080},
+{0x00A62C, 0x0001},
{0x00A640, 0x0004},
{0x00A66F, 0x0010},
{0x00A673, 0x0020},
{0x00A6E6, 0x0002},
{0x00A6F0, 0x0010},
{0x00A6F2, 0x0020},
-{0x00A6F8, 0x0080},
+{0x00A6F8, 0x0001},
{0x00A700, 0x0040},
{0x00A717, 0x0004},
{0x00A720, 0x0040},
{0x00A722, 0x0004},
{0x00A789, 0x0040},
{0x00A78B, 0x0004},
-{0x00A7CB, 0x0080},
+{0x00A7CB, 0x0001},
{0x00A7D0, 0x0004},
-{0x00A7D2, 0x0080},
+{0x00A7D2, 0x0001},
{0x00A7D3, 0x0004},
-{0x00A7D4, 0x0080},
+{0x00A7D4, 0x0001},
{0x00A7D5, 0x0004},
-{0x00A7DA, 0x0080},
+{0x00A7DA, 0x0001},
{0x00A7F2, 0x0004},
{0x00A802, 0x0010},
{0x00A803, 0x0004},
{0x00A823, 0x0010},
{0x00A828, 0x0040},
{0x00A82C, 0x0010},
-{0x00A82D, 0x0080},
+{0x00A82D, 0x0001},
{0x00A830, 0x0002},
{0x00A836, 0x0040},
-{0x00A83A, 0x0080},
+{0x00A83A, 0x0001},
{0x00A840, 0x0004},
{0x00A874, 0x0020},
-{0x00A878, 0x0080},
+{0x00A878, 0x0001},
{0x00A880, 0x0010},
{0x00A882, 0x0004},
{0x00A8B4, 0x0010},
-{0x00A8C6, 0x0080},
+{0x00A8C6, 0x0001},
{0x00A8CE, 0x0020},
{0x00A8D0, 0x0002},
-{0x00A8DA, 0x0080},
+{0x00A8DA, 0x0001},
{0x00A8E0, 0x0010},
{0x00A8F2, 0x0004},
{0x00A8F8, 0x0020},
{0x00A92E, 0x0020},
{0x00A930, 0x0004},
{0x00A947, 0x0010},
-{0x00A954, 0x0080},
+{0x00A954, 0x0001},
{0x00A95F, 0x0020},
{0x00A960, 0x0004},
-{0x00A97D, 0x0080},
+{0x00A97D, 0x0001},
{0x00A980, 0x0010},
{0x00A984, 0x0004},
{0x00A9B3, 0x0010},
{0x00A9C1, 0x0020},
-{0x00A9CE, 0x0080},
+{0x00A9CE, 0x0001},
{0x00A9CF, 0x0004},
{0x00A9D0, 0x0002},
-{0x00A9DA, 0x0080},
+{0x00A9DA, 0x0001},
{0x00A9DE, 0x0020},
{0x00A9E0, 0x0004},
{0x00A9E5, 0x0010},
{0x00A9E6, 0x0004},
{0x00A9F0, 0x0002},
{0x00A9FA, 0x0004},
-{0x00A9FF, 0x0080},
+{0x00A9FF, 0x0001},
{0x00AA00, 0x0004},
{0x00AA29, 0x0010},
-{0x00AA37, 0x0080},
+{0x00AA37, 0x0001},
{0x00AA40, 0x0004},
{0x00AA43, 0x0010},
{0x00AA44, 0x0004},
{0x00AA4C, 0x0010},
-{0x00AA4E, 0x0080},
+{0x00AA4E, 0x0001},
{0x00AA50, 0x0002},
-{0x00AA5A, 0x0080},
+{0x00AA5A, 0x0001},
{0x00AA5C, 0x0020},
{0x00AA60, 0x0004},
{0x00AA77, 0x0040},
{0x00AAC0, 0x0004},
{0x00AAC1, 0x0010},
{0x00AAC2, 0x0004},
-{0x00AAC3, 0x0080},
+{0x00AAC3, 0x0001},
{0x00AADB, 0x0004},
{0x00AADE, 0x0020},
{0x00AAE0, 0x0004},
{0x00AAF0, 0x0020},
{0x00AAF2, 0x0004},
{0x00AAF5, 0x0010},
-{0x00AAF7, 0x0080},
+{0x00AAF7, 0x0001},
{0x00AB01, 0x0004},
-{0x00AB07, 0x0080},
+{0x00AB07, 0x0001},
{0x00AB09, 0x0004},
-{0x00AB0F, 0x0080},
+{0x00AB0F, 0x0001},
{0x00AB11, 0x0004},
-{0x00AB17, 0x0080},
+{0x00AB17, 0x0001},
{0x00AB20, 0x0004},
-{0x00AB27, 0x0080},
+{0x00AB27, 0x0001},
{0x00AB28, 0x0004},
-{0x00AB2F, 0x0080},
+{0x00AB2F, 0x0001},
{0x00AB30, 0x0004},
{0x00AB5B, 0x0040},
{0x00AB5C, 0x0004},
{0x00AB6A, 0x0040},
-{0x00AB6C, 0x0080},
+{0x00AB6C, 0x0001},
{0x00AB70, 0x0004},
{0x00ABE3, 0x0010},
{0x00ABEB, 0x0020},
{0x00ABEC, 0x0010},
-{0x00ABEE, 0x0080},
+{0x00ABEE, 0x0001},
{0x00ABF0, 0x0002},
-{0x00ABFA, 0x0080},
+{0x00ABFA, 0x0001},
{0x00AC00, 0x0004},
-{0x00D7A4, 0x0080},
+{0x00D7A4, 0x0001},
{0x00D7B0, 0x0004},
-{0x00D7C7, 0x0080},
+{0x00D7C7, 0x0001},
{0x00D7CB, 0x0004},
-{0x00D7FC, 0x0080},
+{0x00D7FC, 0x0001},
+{0x00D800, 0x0080},
{0x00F900, 0x0004},
-{0x00FA6E, 0x0080},
+{0x00FA6E, 0x0001},
{0x00FA70, 0x0004},
-{0x00FADA, 0x0080},
+{0x00FADA, 0x0001},
{0x00FB00, 0x0004},
-{0x00FB07, 0x0080},
+{0x00FB07, 0x0001},
{0x00FB13, 0x0004},
-{0x00FB18, 0x0080},
+{0x00FB18, 0x0001},
{0x00FB1D, 0x0004},
{0x00FB1E, 0x0010},
{0x00FB1F, 0x0004},
{0x00FB29, 0x0040},
{0x00FB2A, 0x0004},
-{0x00FB37, 0x0080},
+{0x00FB37, 0x0001},
{0x00FB38, 0x0004},
-{0x00FB3D, 0x0080},
+{0x00FB3D, 0x0001},
{0x00FB3E, 0x0004},
-{0x00FB3F, 0x0080},
+{0x00FB3F, 0x0001},
{0x00FB40, 0x0004},
-{0x00FB42, 0x0080},
+{0x00FB42, 0x0001},
{0x00FB43, 0x0004},
-{0x00FB45, 0x0080},
+{0x00FB45, 0x0001},
{0x00FB46, 0x0004},
{0x00FBB2, 0x0040},
-{0x00FBC3, 0x0080},
+{0x00FBC3, 0x0001},
{0x00FBD3, 0x0004},
{0x00FD3E, 0x0020},
{0x00FD40, 0x0040},
{0x00FD50, 0x0004},
-{0x00FD90, 0x0080},
+{0x00FD90, 0x0001},
{0x00FD92, 0x0004},
-{0x00FDC8, 0x0080},
+{0x00FDC8, 0x0001},
{0x00FDCF, 0x0040},
-{0x00FDD0, 0x0080},
+{0x00FDD0, 0x0001},
{0x00FDF0, 0x0004},
{0x00FDFC, 0x0040},
{0x00FE00, 0x0010},
{0x00FE10, 0x0020},
-{0x00FE1A, 0x0080},
+{0x00FE1A, 0x0001},
{0x00FE20, 0x0010},
{0x00FE30, 0x0020},
-{0x00FE53, 0x0080},
+{0x00FE53, 0x0001},
{0x00FE54, 0x0020},
{0x00FE62, 0x0040},
{0x00FE63, 0x0020},
{0x00FE64, 0x0040},
-{0x00FE67, 0x0080},
+{0x00FE67, 0x0001},
{0x00FE68, 0x0020},
{0x00FE69, 0x0040},
{0x00FE6A, 0x0020},
-{0x00FE6C, 0x0080},
+{0x00FE6C, 0x0001},
{0x00FE70, 0x0004},
-{0x00FE75, 0x0080},
+{0x00FE75, 0x0001},
{0x00FE76, 0x0004},
-{0x00FEFD, 0x0080},
+{0x00FEFD, 0x0001},
+{0x00FEFF, 0x0080},
+{0x00FF00, 0x0001},
{0x00FF01, 0x0020},
{0x00FF04, 0x0040},
{0x00FF05, 0x0020},
{0x00FF5E, 0x0040},
{0x00FF5F, 0x0020},
{0x00FF66, 0x0004},
-{0x00FFBF, 0x0080},
+{0x00FFBF, 0x0001},
{0x00FFC2, 0x0004},
-{0x00FFC8, 0x0080},
+{0x00FFC8, 0x0001},
{0x00FFCA, 0x0004},
-{0x00FFD0, 0x0080},
+{0x00FFD0, 0x0001},
{0x00FFD2, 0x0004},
-{0x00FFD8, 0x0080},
+{0x00FFD8, 0x0001},
{0x00FFDA, 0x0004},
-{0x00FFDD, 0x0080},
+{0x00FFDD, 0x0001},
{0x00FFE0, 0x0040},
-{0x00FFE7, 0x0080},
+{0x00FFE7, 0x0001},
{0x00FFE8, 0x0040},
-{0x00FFEF, 0x0080},
+{0x00FFEF, 0x0001},
+{0x00FFF9, 0x0080},
{0x00FFFC, 0x0040},
-{0x00FFFE, 0x0080},
+{0x00FFFE, 0x0001},
{0x010000, 0x0004},
-{0x01000C, 0x0080},
+{0x01000C, 0x0001},
{0x01000D, 0x0004},
-{0x010027, 0x0080},
+{0x010027, 0x0001},
{0x010028, 0x0004},
-{0x01003B, 0x0080},
+{0x01003B, 0x0001},
{0x01003C, 0x0004},
-{0x01003E, 0x0080},
+{0x01003E, 0x0001},
{0x01003F, 0x0004},
-{0x01004E, 0x0080},
+{0x01004E, 0x0001},
{0x010050, 0x0004},
-{0x01005E, 0x0080},
+{0x01005E, 0x0001},
{0x010080, 0x0004},
-{0x0100FB, 0x0080},
+{0x0100FB, 0x0001},
{0x010100, 0x0020},
-{0x010103, 0x0080},
+{0x010103, 0x0001},
{0x010107, 0x0002},
-{0x010134, 0x0080},
+{0x010134, 0x0001},
{0x010137, 0x0040},
{0x010140, 0x0002},
{0x010179, 0x0040},
{0x01018A, 0x0002},
{0x01018C, 0x0040},
-{0x01018F, 0x0080},
+{0x01018F, 0x0001},
{0x010190, 0x0040},
-{0x01019D, 0x0080},
+{0x01019D, 0x0001},
{0x0101A0, 0x0040},
-{0x0101A1, 0x0080},
+{0x0101A1, 0x0001},
{0x0101D0, 0x0040},
{0x0101FD, 0x0010},
-{0x0101FE, 0x0080},
+{0x0101FE, 0x0001},
{0x010280, 0x0004},
-{0x01029D, 0x0080},
+{0x01029D, 0x0001},
{0x0102A0, 0x0004},
-{0x0102D1, 0x0080},
+{0x0102D1, 0x0001},
{0x0102E0, 0x0010},
{0x0102E1, 0x0002},
-{0x0102FC, 0x0080},
+{0x0102FC, 0x0001},
{0x010300, 0x0004},
{0x010320, 0x0002},
-{0x010324, 0x0080},
+{0x010324, 0x0001},
{0x01032D, 0x0004},
{0x010341, 0x0002},
{0x010342, 0x0004},
{0x01034A, 0x0002},
-{0x01034B, 0x0080},
+{0x01034B, 0x0001},
{0x010350, 0x0004},
{0x010376, 0x0010},
-{0x01037B, 0x0080},
+{0x01037B, 0x0001},
{0x010380, 0x0004},
-{0x01039E, 0x0080},
+{0x01039E, 0x0001},
{0x01039F, 0x0020},
{0x0103A0, 0x0004},
-{0x0103C4, 0x0080},
+{0x0103C4, 0x0001},
{0x0103C8, 0x0004},
{0x0103D0, 0x0020},
{0x0103D1, 0x0002},
-{0x0103D6, 0x0080},
+{0x0103D6, 0x0001},
{0x010400, 0x0004},
-{0x01049E, 0x0080},
+{0x01049E, 0x0001},
{0x0104A0, 0x0002},
-{0x0104AA, 0x0080},
+{0x0104AA, 0x0001},
{0x0104B0, 0x0004},
-{0x0104D4, 0x0080},
+{0x0104D4, 0x0001},
{0x0104D8, 0x0004},
-{0x0104FC, 0x0080},
+{0x0104FC, 0x0001},
{0x010500, 0x0004},
-{0x010528, 0x0080},
+{0x010528, 0x0001},
{0x010530, 0x0004},
-{0x010564, 0x0080},
+{0x010564, 0x0001},
{0x01056F, 0x0020},
{0x010570, 0x0004},
-{0x01057B, 0x0080},
+{0x01057B, 0x0001},
{0x01057C, 0x0004},
-{0x01058B, 0x0080},
+{0x01058B, 0x0001},
{0x01058C, 0x0004},
-{0x010593, 0x0080},
+{0x010593, 0x0001},
{0x010594, 0x0004},
-{0x010596, 0x0080},
+{0x010596, 0x0001},
{0x010597, 0x0004},
-{0x0105A2, 0x0080},
+{0x0105A2, 0x0001},
{0x0105A3, 0x0004},
-{0x0105B2, 0x0080},
+{0x0105B2, 0x0001},
{0x0105B3, 0x0004},
-{0x0105BA, 0x0080},
+{0x0105BA, 0x0001},
{0x0105BB, 0x0004},
-{0x0105BD, 0x0080},
+{0x0105BD, 0x0001},
{0x010600, 0x0004},
-{0x010737, 0x0080},
+{0x010737, 0x0001},
{0x010740, 0x0004},
-{0x010756, 0x0080},
+{0x010756, 0x0001},
{0x010760, 0x0004},
-{0x010768, 0x0080},
+{0x010768, 0x0001},
{0x010780, 0x0004},
-{0x010786, 0x0080},
+{0x010786, 0x0001},
{0x010787, 0x0004},
-{0x0107B1, 0x0080},
+{0x0107B1, 0x0001},
{0x0107B2, 0x0004},
-{0x0107BB, 0x0080},
+{0x0107BB, 0x0001},
{0x010800, 0x0004},
-{0x010806, 0x0080},
+{0x010806, 0x0001},
{0x010808, 0x0004},
-{0x010809, 0x0080},
+{0x010809, 0x0001},
{0x01080A, 0x0004},
-{0x010836, 0x0080},
+{0x010836, 0x0001},
{0x010837, 0x0004},
-{0x010839, 0x0080},
+{0x010839, 0x0001},
{0x01083C, 0x0004},
-{0x01083D, 0x0080},
+{0x01083D, 0x0001},
{0x01083F, 0x0004},
-{0x010856, 0x0080},
+{0x010856, 0x0001},
{0x010857, 0x0020},
{0x010858, 0x0002},
{0x010860, 0x0004},
{0x010877, 0x0040},
{0x010879, 0x0002},
{0x010880, 0x0004},
-{0x01089F, 0x0080},
+{0x01089F, 0x0001},
{0x0108A7, 0x0002},
-{0x0108B0, 0x0080},
+{0x0108B0, 0x0001},
{0x0108E0, 0x0004},
-{0x0108F3, 0x0080},
+{0x0108F3, 0x0001},
{0x0108F4, 0x0004},
-{0x0108F6, 0x0080},
+{0x0108F6, 0x0001},
{0x0108FB, 0x0002},
{0x010900, 0x0004},
{0x010916, 0x0002},
-{0x01091C, 0x0080},
+{0x01091C, 0x0001},
{0x01091F, 0x0020},
{0x010920, 0x0004},
-{0x01093A, 0x0080},
+{0x01093A, 0x0001},
{0x01093F, 0x0020},
-{0x010940, 0x0080},
+{0x010940, 0x0001},
{0x010980, 0x0004},
-{0x0109B8, 0x0080},
+{0x0109B8, 0x0001},
{0x0109BC, 0x0002},
{0x0109BE, 0x0004},
{0x0109C0, 0x0002},
-{0x0109D0, 0x0080},
+{0x0109D0, 0x0001},
{0x0109D2, 0x0002},
{0x010A00, 0x0004},
{0x010A01, 0x0010},
-{0x010A04, 0x0080},
+{0x010A04, 0x0001},
{0x010A05, 0x0010},
-{0x010A07, 0x0080},
+{0x010A07, 0x0001},
{0x010A0C, 0x0010},
{0x010A10, 0x0004},
-{0x010A14, 0x0080},
+{0x010A14, 0x0001},
{0x010A15, 0x0004},
-{0x010A18, 0x0080},
+{0x010A18, 0x0001},
{0x010A19, 0x0004},
-{0x010A36, 0x0080},
+{0x010A36, 0x0001},
{0x010A38, 0x0010},
-{0x010A3B, 0x0080},
+{0x010A3B, 0x0001},
{0x010A3F, 0x0010},
{0x010A40, 0x0002},
-{0x010A49, 0x0080},
+{0x010A49, 0x0001},
{0x010A50, 0x0020},
-{0x010A59, 0x0080},
+{0x010A59, 0x0001},
{0x010A60, 0x0004},
{0x010A7D, 0x0002},
{0x010A7F, 0x0020},
{0x010A80, 0x0004},
{0x010A9D, 0x0002},
-{0x010AA0, 0x0080},
+{0x010AA0, 0x0001},
{0x010AC0, 0x0004},
{0x010AC8, 0x0040},
{0x010AC9, 0x0004},
{0x010AE5, 0x0010},
-{0x010AE7, 0x0080},
+{0x010AE7, 0x0001},
{0x010AEB, 0x0002},
{0x010AF0, 0x0020},
-{0x010AF7, 0x0080},
+{0x010AF7, 0x0001},
{0x010B00, 0x0004},
-{0x010B36, 0x0080},
+{0x010B36, 0x0001},
{0x010B39, 0x0020},
{0x010B40, 0x0004},
-{0x010B56, 0x0080},
+{0x010B56, 0x0001},
{0x010B58, 0x0002},
{0x010B60, 0x0004},
-{0x010B73, 0x0080},
+{0x010B73, 0x0001},
{0x010B78, 0x0002},
{0x010B80, 0x0004},
-{0x010B92, 0x0080},
+{0x010B92, 0x0001},
{0x010B99, 0x0020},
-{0x010B9D, 0x0080},
+{0x010B9D, 0x0001},
{0x010BA9, 0x0002},
-{0x010BB0, 0x0080},
+{0x010BB0, 0x0001},
{0x010C00, 0x0004},
-{0x010C49, 0x0080},
+{0x010C49, 0x0001},
{0x010C80, 0x0004},
-{0x010CB3, 0x0080},
+{0x010CB3, 0x0001},
{0x010CC0, 0x0004},
-{0x010CF3, 0x0080},
+{0x010CF3, 0x0001},
{0x010CFA, 0x0002},
{0x010D00, 0x0004},
{0x010D24, 0x0010},
-{0x010D28, 0x0080},
+{0x010D28, 0x0001},
{0x010D30, 0x0002},
-{0x010D3A, 0x0080},
+{0x010D3A, 0x0001},
{0x010E60, 0x0002},
-{0x010E7F, 0x0080},
+{0x010E7F, 0x0001},
{0x010E80, 0x0004},
-{0x010EAA, 0x0080},
+{0x010EAA, 0x0001},
{0x010EAB, 0x0010},
{0x010EAD, 0x0020},
-{0x010EAE, 0x0080},
+{0x010EAE, 0x0001},
{0x010EB0, 0x0004},
-{0x010EB2, 0x0080},
+{0x010EB2, 0x0001},
{0x010EFD, 0x0010},
{0x010F00, 0x0004},
{0x010F1D, 0x0002},
{0x010F27, 0x0004},
-{0x010F28, 0x0080},
+{0x010F28, 0x0001},
{0x010F30, 0x0004},
{0x010F46, 0x0010},
{0x010F51, 0x0002},
{0x010F55, 0x0020},
-{0x010F5A, 0x0080},
+{0x010F5A, 0x0001},
{0x010F70, 0x0004},
{0x010F82, 0x0010},
{0x010F86, 0x0020},
-{0x010F8A, 0x0080},
+{0x010F8A, 0x0001},
{0x010FB0, 0x0004},
{0x010FC5, 0x0002},
-{0x010FCC, 0x0080},
+{0x010FCC, 0x0001},
{0x010FE0, 0x0004},
-{0x010FF7, 0x0080},
+{0x010FF7, 0x0001},
{0x011000, 0x0010},
{0x011003, 0x0004},
{0x011038, 0x0010},
{0x011047, 0x0020},
-{0x01104E, 0x0080},
+{0x01104E, 0x0001},
{0x011052, 0x0002},
{0x011070, 0x0010},
{0x011071, 0x0004},
{0x011073, 0x0010},
{0x011075, 0x0004},
-{0x011076, 0x0080},
+{0x011076, 0x0001},
{0x01107F, 0x0010},
{0x011083, 0x0004},
{0x0110B0, 0x0010},
{0x0110BD, 0x0080},
{0x0110BE, 0x0020},
{0x0110C2, 0x0010},
-{0x0110C3, 0x0080},
+{0x0110C3, 0x0001},
+{0x0110CD, 0x0080},
+{0x0110CE, 0x0001},
{0x0110D0, 0x0004},
-{0x0110E9, 0x0080},
+{0x0110E9, 0x0001},
{0x0110F0, 0x0002},
-{0x0110FA, 0x0080},
+{0x0110FA, 0x0001},
{0x011100, 0x0010},
{0x011103, 0x0004},
{0x011127, 0x0010},
-{0x011135, 0x0080},
+{0x011135, 0x0001},
{0x011136, 0x0002},
{0x011140, 0x0020},
{0x011144, 0x0004},
{0x011145, 0x0010},
{0x011147, 0x0004},
-{0x011148, 0x0080},
+{0x011148, 0x0001},
{0x011150, 0x0004},
{0x011173, 0x0010},
{0x011174, 0x0020},
{0x011176, 0x0004},
-{0x011177, 0x0080},
+{0x011177, 0x0001},
{0x011180, 0x0010},
{0x011183, 0x0004},
{0x0111B3, 0x0010},
{0x0111DB, 0x0020},
{0x0111DC, 0x0004},
{0x0111DD, 0x0020},
-{0x0111E0, 0x0080},
+{0x0111E0, 0x0001},
{0x0111E1, 0x0002},
-{0x0111F5, 0x0080},
+{0x0111F5, 0x0001},
{0x011200, 0x0004},
-{0x011212, 0x0080},
+{0x011212, 0x0001},
{0x011213, 0x0004},
{0x01122C, 0x0010},
{0x011238, 0x0020},
{0x01123E, 0x0010},
{0x01123F, 0x0004},
{0x011241, 0x0010},
-{0x011242, 0x0080},
+{0x011242, 0x0001},
{0x011280, 0x0004},
-{0x011287, 0x0080},
+{0x011287, 0x0001},
{0x011288, 0x0004},
-{0x011289, 0x0080},
+{0x011289, 0x0001},
{0x01128A, 0x0004},
-{0x01128E, 0x0080},
+{0x01128E, 0x0001},
{0x01128F, 0x0004},
-{0x01129E, 0x0080},
+{0x01129E, 0x0001},
{0x01129F, 0x0004},
{0x0112A9, 0x0020},
-{0x0112AA, 0x0080},
+{0x0112AA, 0x0001},
{0x0112B0, 0x0004},
{0x0112DF, 0x0010},
-{0x0112EB, 0x0080},
+{0x0112EB, 0x0001},
{0x0112F0, 0x0002},
-{0x0112FA, 0x0080},
+{0x0112FA, 0x0001},
{0x011300, 0x0010},
-{0x011304, 0x0080},
+{0x011304, 0x0001},
{0x011305, 0x0004},
-{0x01130D, 0x0080},
+{0x01130D, 0x0001},
{0x01130F, 0x0004},
-{0x011311, 0x0080},
+{0x011311, 0x0001},
{0x011313, 0x0004},
-{0x011329, 0x0080},
+{0x011329, 0x0001},
{0x01132A, 0x0004},
-{0x011331, 0x0080},
+{0x011331, 0x0001},
{0x011332, 0x0004},
-{0x011334, 0x0080},
+{0x011334, 0x0001},
{0x011335, 0x0004},
-{0x01133A, 0x0080},
+{0x01133A, 0x0001},
{0x01133B, 0x0010},
{0x01133D, 0x0004},
{0x01133E, 0x0010},
-{0x011345, 0x0080},
+{0x011345, 0x0001},
{0x011347, 0x0010},
-{0x011349, 0x0080},
+{0x011349, 0x0001},
{0x01134B, 0x0010},
-{0x01134E, 0x0080},
+{0x01134E, 0x0001},
{0x011350, 0x0004},
-{0x011351, 0x0080},
+{0x011351, 0x0001},
{0x011357, 0x0010},
-{0x011358, 0x0080},
+{0x011358, 0x0001},
{0x01135D, 0x0004},
{0x011362, 0x0010},
-{0x011364, 0x0080},
+{0x011364, 0x0001},
{0x011366, 0x0010},
-{0x01136D, 0x0080},
+{0x01136D, 0x0001},
{0x011370, 0x0010},
-{0x011375, 0x0080},
+{0x011375, 0x0001},
{0x011400, 0x0004},
{0x011435, 0x0010},
{0x011447, 0x0004},
{0x01144B, 0x0020},
{0x011450, 0x0002},
{0x01145A, 0x0020},
-{0x01145C, 0x0080},
+{0x01145C, 0x0001},
{0x01145D, 0x0020},
{0x01145E, 0x0010},
{0x01145F, 0x0004},
-{0x011462, 0x0080},
+{0x011462, 0x0001},
{0x011480, 0x0004},
{0x0114B0, 0x0010},
{0x0114C4, 0x0004},
{0x0114C6, 0x0020},
{0x0114C7, 0x0004},
-{0x0114C8, 0x0080},
+{0x0114C8, 0x0001},
{0x0114D0, 0x0002},
-{0x0114DA, 0x0080},
+{0x0114DA, 0x0001},
{0x011580, 0x0004},
{0x0115AF, 0x0010},
-{0x0115B6, 0x0080},
+{0x0115B6, 0x0001},
{0x0115B8, 0x0010},
{0x0115C1, 0x0020},
{0x0115D8, 0x0004},
{0x0115DC, 0x0010},
-{0x0115DE, 0x0080},
+{0x0115DE, 0x0001},
{0x011600, 0x0004},
{0x011630, 0x0010},
{0x011641, 0x0020},
{0x011644, 0x0004},
-{0x011645, 0x0080},
+{0x011645, 0x0001},
{0x011650, 0x0002},
-{0x01165A, 0x0080},
+{0x01165A, 0x0001},
{0x011660, 0x0020},
-{0x01166D, 0x0080},
+{0x01166D, 0x0001},
{0x011680, 0x0004},
{0x0116AB, 0x0010},
{0x0116B8, 0x0004},
{0x0116B9, 0x0020},
-{0x0116BA, 0x0080},
+{0x0116BA, 0x0001},
{0x0116C0, 0x0002},
-{0x0116CA, 0x0080},
+{0x0116CA, 0x0001},
{0x011700, 0x0004},
-{0x01171B, 0x0080},
+{0x01171B, 0x0001},
{0x01171D, 0x0010},
-{0x01172C, 0x0080},
+{0x01172C, 0x0001},
{0x011730, 0x0002},
{0x01173C, 0x0020},
{0x01173F, 0x0040},
{0x011740, 0x0004},
-{0x011747, 0x0080},
+{0x011747, 0x0001},
{0x011800, 0x0004},
{0x01182C, 0x0010},
{0x01183B, 0x0020},
-{0x01183C, 0x0080},
+{0x01183C, 0x0001},
{0x0118A0, 0x0004},
{0x0118E0, 0x0002},
-{0x0118F3, 0x0080},
+{0x0118F3, 0x0001},
{0x0118FF, 0x0004},
-{0x011907, 0x0080},
+{0x011907, 0x0001},
{0x011909, 0x0004},
-{0x01190A, 0x0080},
+{0x01190A, 0x0001},
{0x01190C, 0x0004},
-{0x011914, 0x0080},
+{0x011914, 0x0001},
{0x011915, 0x0004},
-{0x011917, 0x0080},
+{0x011917, 0x0001},
{0x011918, 0x0004},
{0x011930, 0x0010},
-{0x011936, 0x0080},
+{0x011936, 0x0001},
{0x011937, 0x0010},
-{0x011939, 0x0080},
+{0x011939, 0x0001},
{0x01193B, 0x0010},
{0x01193F, 0x0004},
{0x011940, 0x0010},
{0x011941, 0x0004},
{0x011942, 0x0010},
{0x011944, 0x0020},
-{0x011947, 0x0080},
+{0x011947, 0x0001},
{0x011950, 0x0002},
-{0x01195A, 0x0080},
+{0x01195A, 0x0001},
{0x0119A0, 0x0004},
-{0x0119A8, 0x0080},
+{0x0119A8, 0x0001},
{0x0119AA, 0x0004},
{0x0119D1, 0x0010},
-{0x0119D8, 0x0080},
+{0x0119D8, 0x0001},
{0x0119DA, 0x0010},
{0x0119E1, 0x0004},
{0x0119E2, 0x0020},
{0x0119E3, 0x0004},
{0x0119E4, 0x0010},
-{0x0119E5, 0x0080},
+{0x0119E5, 0x0001},
{0x011A00, 0x0004},
{0x011A01, 0x0010},
{0x011A0B, 0x0004},
{0x011A3B, 0x0010},
{0x011A3F, 0x0020},
{0x011A47, 0x0010},
-{0x011A48, 0x0080},
+{0x011A48, 0x0001},
{0x011A50, 0x0004},
{0x011A51, 0x0010},
{0x011A5C, 0x0004},
{0x011A9A, 0x0020},
{0x011A9D, 0x0004},
{0x011A9E, 0x0020},
-{0x011AA3, 0x0080},
+{0x011AA3, 0x0001},
{0x011AB0, 0x0004},
-{0x011AF9, 0x0080},
+{0x011AF9, 0x0001},
{0x011B00, 0x0020},
-{0x011B0A, 0x0080},
+{0x011B0A, 0x0001},
{0x011C00, 0x0004},
-{0x011C09, 0x0080},
+{0x011C09, 0x0001},
{0x011C0A, 0x0004},
{0x011C2F, 0x0010},
-{0x011C37, 0x0080},
+{0x011C37, 0x0001},
{0x011C38, 0x0010},
{0x011C40, 0x0004},
{0x011C41, 0x0020},
-{0x011C46, 0x0080},
+{0x011C46, 0x0001},
{0x011C50, 0x0002},
-{0x011C6D, 0x0080},
+{0x011C6D, 0x0001},
{0x011C70, 0x0020},
{0x011C72, 0x0004},
-{0x011C90, 0x0080},
+{0x011C90, 0x0001},
{0x011C92, 0x0010},
-{0x011CA8, 0x0080},
+{0x011CA8, 0x0001},
{0x011CA9, 0x0010},
-{0x011CB7, 0x0080},
+{0x011CB7, 0x0001},
{0x011D00, 0x0004},
-{0x011D07, 0x0080},
+{0x011D07, 0x0001},
{0x011D08, 0x0004},
-{0x011D0A, 0x0080},
+{0x011D0A, 0x0001},
{0x011D0B, 0x0004},
{0x011D31, 0x0010},
-{0x011D37, 0x0080},
+{0x011D37, 0x0001},
{0x011D3A, 0x0010},
-{0x011D3B, 0x0080},
+{0x011D3B, 0x0001},
{0x011D3C, 0x0010},
-{0x011D3E, 0x0080},
+{0x011D3E, 0x0001},
{0x011D3F, 0x0010},
{0x011D46, 0x0004},
{0x011D47, 0x0010},
-{0x011D48, 0x0080},
+{0x011D48, 0x0001},
{0x011D50, 0x0002},
-{0x011D5A, 0x0080},
+{0x011D5A, 0x0001},
{0x011D60, 0x0004},
-{0x011D66, 0x0080},
+{0x011D66, 0x0001},
{0x011D67, 0x0004},
-{0x011D69, 0x0080},
+{0x011D69, 0x0001},
{0x011D6A, 0x0004},
{0x011D8A, 0x0010},
-{0x011D8F, 0x0080},
+{0x011D8F, 0x0001},
{0x011D90, 0x0010},
-{0x011D92, 0x0080},
+{0x011D92, 0x0001},
{0x011D93, 0x0010},
{0x011D98, 0x0004},
-{0x011D99, 0x0080},
+{0x011D99, 0x0001},
{0x011DA0, 0x0002},
-{0x011DAA, 0x0080},
+{0x011DAA, 0x0001},
{0x011EE0, 0x0004},
{0x011EF3, 0x0010},
{0x011EF7, 0x0020},
-{0x011EF9, 0x0080},
+{0x011EF9, 0x0001},
{0x011F00, 0x0010},
{0x011F02, 0x0004},
{0x011F03, 0x0010},
{0x011F04, 0x0004},
-{0x011F11, 0x0080},
+{0x011F11, 0x0001},
{0x011F12, 0x0004},
{0x011F34, 0x0010},
-{0x011F3B, 0x0080},
+{0x011F3B, 0x0001},
{0x011F3E, 0x0010},
{0x011F43, 0x0020},
{0x011F50, 0x0002},
-{0x011F5A, 0x0080},
+{0x011F5A, 0x0001},
{0x011FB0, 0x0004},
-{0x011FB1, 0x0080},
+{0x011FB1, 0x0001},
{0x011FC0, 0x0002},
{0x011FD5, 0x0040},
-{0x011FF2, 0x0080},
+{0x011FF2, 0x0001},
{0x011FFF, 0x0020},
{0x012000, 0x0004},
-{0x01239A, 0x0080},
+{0x01239A, 0x0001},
{0x012400, 0x0002},
-{0x01246F, 0x0080},
+{0x01246F, 0x0001},
{0x012470, 0x0020},
-{0x012475, 0x0080},
+{0x012475, 0x0001},
{0x012480, 0x0004},
-{0x012544, 0x0080},
+{0x012544, 0x0001},
{0x012F90, 0x0004},
{0x012FF1, 0x0020},
-{0x012FF3, 0x0080},
+{0x012FF3, 0x0001},
{0x013000, 0x0004},
{0x013430, 0x0080},
{0x013440, 0x0010},
{0x013441, 0x0004},
{0x013447, 0x0010},
-{0x013456, 0x0080},
+{0x013456, 0x0001},
{0x014400, 0x0004},
-{0x014647, 0x0080},
+{0x014647, 0x0001},
{0x016800, 0x0004},
-{0x016A39, 0x0080},
+{0x016A39, 0x0001},
{0x016A40, 0x0004},
-{0x016A5F, 0x0080},
+{0x016A5F, 0x0001},
{0x016A60, 0x0002},
-{0x016A6A, 0x0080},
+{0x016A6A, 0x0001},
{0x016A6E, 0x0020},
{0x016A70, 0x0004},
-{0x016ABF, 0x0080},
+{0x016ABF, 0x0001},
{0x016AC0, 0x0002},
-{0x016ACA, 0x0080},
+{0x016ACA, 0x0001},
{0x016AD0, 0x0004},
-{0x016AEE, 0x0080},
+{0x016AEE, 0x0001},
{0x016AF0, 0x0010},
{0x016AF5, 0x0020},
-{0x016AF6, 0x0080},
+{0x016AF6, 0x0001},
{0x016B00, 0x0004},
{0x016B30, 0x0010},
{0x016B37, 0x0020},
{0x016B40, 0x0004},
{0x016B44, 0x0020},
{0x016B45, 0x0040},
-{0x016B46, 0x0080},
+{0x016B46, 0x0001},
{0x016B50, 0x0002},
-{0x016B5A, 0x0080},
+{0x016B5A, 0x0001},
{0x016B5B, 0x0002},
-{0x016B62, 0x0080},
+{0x016B62, 0x0001},
{0x016B63, 0x0004},
-{0x016B78, 0x0080},
+{0x016B78, 0x0001},
{0x016B7D, 0x0004},
-{0x016B90, 0x0080},
+{0x016B90, 0x0001},
{0x016E40, 0x0004},
{0x016E80, 0x0002},
{0x016E97, 0x0020},
-{0x016E9B, 0x0080},
+{0x016E9B, 0x0001},
{0x016F00, 0x0004},
-{0x016F4B, 0x0080},
+{0x016F4B, 0x0001},
{0x016F4F, 0x0010},
{0x016F50, 0x0004},
{0x016F51, 0x0010},
-{0x016F88, 0x0080},
+{0x016F88, 0x0001},
{0x016F8F, 0x0010},
{0x016F93, 0x0004},
-{0x016FA0, 0x0080},
+{0x016FA0, 0x0001},
{0x016FE0, 0x0004},
{0x016FE2, 0x0020},
{0x016FE3, 0x0004},
{0x016FE4, 0x0010},
-{0x016FE5, 0x0080},
+{0x016FE5, 0x0001},
{0x016FF0, 0x0010},
-{0x016FF2, 0x0080},
+{0x016FF2, 0x0001},
{0x017000, 0x0004},
-{0x0187F8, 0x0080},
+{0x0187F8, 0x0001},
{0x018800, 0x0004},
-{0x018CD6, 0x0080},
+{0x018CD6, 0x0001},
{0x018D00, 0x0004},
-{0x018D09, 0x0080},
+{0x018D09, 0x0001},
{0x01AFF0, 0x0004},
-{0x01AFF4, 0x0080},
+{0x01AFF4, 0x0001},
{0x01AFF5, 0x0004},
-{0x01AFFC, 0x0080},
+{0x01AFFC, 0x0001},
{0x01AFFD, 0x0004},
-{0x01AFFF, 0x0080},
+{0x01AFFF, 0x0001},
{0x01B000, 0x0004},
-{0x01B123, 0x0080},
+{0x01B123, 0x0001},
{0x01B132, 0x0004},
-{0x01B133, 0x0080},
+{0x01B133, 0x0001},
{0x01B150, 0x0004},
-{0x01B153, 0x0080},
+{0x01B153, 0x0001},
{0x01B155, 0x0004},
-{0x01B156, 0x0080},
+{0x01B156, 0x0001},
{0x01B164, 0x0004},
-{0x01B168, 0x0080},
+{0x01B168, 0x0001},
{0x01B170, 0x0004},
-{0x01B2FC, 0x0080},
+{0x01B2FC, 0x0001},
{0x01BC00, 0x0004},
-{0x01BC6B, 0x0080},
+{0x01BC6B, 0x0001},
{0x01BC70, 0x0004},
-{0x01BC7D, 0x0080},
+{0x01BC7D, 0x0001},
{0x01BC80, 0x0004},
-{0x01BC89, 0x0080},
+{0x01BC89, 0x0001},
{0x01BC90, 0x0004},
-{0x01BC9A, 0x0080},
+{0x01BC9A, 0x0001},
{0x01BC9C, 0x0040},
{0x01BC9D, 0x0010},
{0x01BC9F, 0x0020},
{0x01BCA0, 0x0080},
+{0x01BCA4, 0x0001},
{0x01CF00, 0x0010},
-{0x01CF2E, 0x0080},
+{0x01CF2E, 0x0001},
{0x01CF30, 0x0010},
-{0x01CF47, 0x0080},
+{0x01CF47, 0x0001},
{0x01CF50, 0x0040},
-{0x01CFC4, 0x0080},
+{0x01CFC4, 0x0001},
{0x01D000, 0x0040},
-{0x01D0F6, 0x0080},
+{0x01D0F6, 0x0001},
{0x01D100, 0x0040},
-{0x01D127, 0x0080},
+{0x01D127, 0x0001},
{0x01D129, 0x0040},
{0x01D165, 0x0010},
{0x01D16A, 0x0040},
{0x01D18C, 0x0040},
{0x01D1AA, 0x0010},
{0x01D1AE, 0x0040},
-{0x01D1EB, 0x0080},
+{0x01D1EB, 0x0001},
{0x01D200, 0x0040},
{0x01D242, 0x0010},
{0x01D245, 0x0040},
-{0x01D246, 0x0080},
+{0x01D246, 0x0001},
{0x01D2C0, 0x0002},
-{0x01D2D4, 0x0080},
+{0x01D2D4, 0x0001},
{0x01D2E0, 0x0002},
-{0x01D2F4, 0x0080},
+{0x01D2F4, 0x0001},
{0x01D300, 0x0040},
-{0x01D357, 0x0080},
+{0x01D357, 0x0001},
{0x01D360, 0x0002},
-{0x01D379, 0x0080},
+{0x01D379, 0x0001},
{0x01D400, 0x0004},
-{0x01D455, 0x0080},
+{0x01D455, 0x0001},
{0x01D456, 0x0004},
-{0x01D49D, 0x0080},
+{0x01D49D, 0x0001},
{0x01D49E, 0x0004},
-{0x01D4A0, 0x0080},
+{0x01D4A0, 0x0001},
{0x01D4A2, 0x0004},
-{0x01D4A3, 0x0080},
+{0x01D4A3, 0x0001},
{0x01D4A5, 0x0004},
-{0x01D4A7, 0x0080},
+{0x01D4A7, 0x0001},
{0x01D4A9, 0x0004},
-{0x01D4AD, 0x0080},
+{0x01D4AD, 0x0001},
{0x01D4AE, 0x0004},
-{0x01D4BA, 0x0080},
+{0x01D4BA, 0x0001},
{0x01D4BB, 0x0004},
-{0x01D4BC, 0x0080},
+{0x01D4BC, 0x0001},
{0x01D4BD, 0x0004},
-{0x01D4C4, 0x0080},
+{0x01D4C4, 0x0001},
{0x01D4C5, 0x0004},
-{0x01D506, 0x0080},
+{0x01D506, 0x0001},
{0x01D507, 0x0004},
-{0x01D50B, 0x0080},
+{0x01D50B, 0x0001},
{0x01D50D, 0x0004},
-{0x01D515, 0x0080},
+{0x01D515, 0x0001},
{0x01D516, 0x0004},
-{0x01D51D, 0x0080},
+{0x01D51D, 0x0001},
{0x01D51E, 0x0004},
-{0x01D53A, 0x0080},
+{0x01D53A, 0x0001},
{0x01D53B, 0x0004},
-{0x01D53F, 0x0080},
+{0x01D53F, 0x0001},
{0x01D540, 0x0004},
-{0x01D545, 0x0080},
+{0x01D545, 0x0001},
{0x01D546, 0x0004},
-{0x01D547, 0x0080},
+{0x01D547, 0x0001},
{0x01D54A, 0x0004},
-{0x01D551, 0x0080},
+{0x01D551, 0x0001},
{0x01D552, 0x0004},
-{0x01D6A6, 0x0080},
+{0x01D6A6, 0x0001},
{0x01D6A8, 0x0004},
{0x01D6C1, 0x0040},
{0x01D6C2, 0x0004},
{0x01D7AA, 0x0004},
{0x01D7C3, 0x0040},
{0x01D7C4, 0x0004},
-{0x01D7CC, 0x0080},
+{0x01D7CC, 0x0001},
{0x01D7CE, 0x0002},
{0x01D800, 0x0040},
{0x01DA00, 0x0010},
{0x01DA84, 0x0010},
{0x01DA85, 0x0040},
{0x01DA87, 0x0020},
-{0x01DA8C, 0x0080},
+{0x01DA8C, 0x0001},
{0x01DA9B, 0x0010},
-{0x01DAA0, 0x0080},
+{0x01DAA0, 0x0001},
{0x01DAA1, 0x0010},
-{0x01DAB0, 0x0080},
+{0x01DAB0, 0x0001},
{0x01DF00, 0x0004},
-{0x01DF1F, 0x0080},
+{0x01DF1F, 0x0001},
{0x01DF25, 0x0004},
-{0x01DF2B, 0x0080},
+{0x01DF2B, 0x0001},
{0x01E000, 0x0010},
-{0x01E007, 0x0080},
+{0x01E007, 0x0001},
{0x01E008, 0x0010},
-{0x01E019, 0x0080},
+{0x01E019, 0x0001},
{0x01E01B, 0x0010},
-{0x01E022, 0x0080},
+{0x01E022, 0x0001},
{0x01E023, 0x0010},
-{0x01E025, 0x0080},
+{0x01E025, 0x0001},
{0x01E026, 0x0010},
-{0x01E02B, 0x0080},
+{0x01E02B, 0x0001},
{0x01E030, 0x0004},
-{0x01E06E, 0x0080},
+{0x01E06E, 0x0001},
{0x01E08F, 0x0010},
-{0x01E090, 0x0080},
+{0x01E090, 0x0001},
{0x01E100, 0x0004},
-{0x01E12D, 0x0080},
+{0x01E12D, 0x0001},
{0x01E130, 0x0010},
{0x01E137, 0x0004},
-{0x01E13E, 0x0080},
+{0x01E13E, 0x0001},
{0x01E140, 0x0002},
-{0x01E14A, 0x0080},
+{0x01E14A, 0x0001},
{0x01E14E, 0x0004},
{0x01E14F, 0x0040},
-{0x01E150, 0x0080},
+{0x01E150, 0x0001},
{0x01E290, 0x0004},
{0x01E2AE, 0x0010},
-{0x01E2AF, 0x0080},
+{0x01E2AF, 0x0001},
{0x01E2C0, 0x0004},
{0x01E2EC, 0x0010},
{0x01E2F0, 0x0002},
-{0x01E2FA, 0x0080},
+{0x01E2FA, 0x0001},
{0x01E2FF, 0x0040},
-{0x01E300, 0x0080},
+{0x01E300, 0x0001},
{0x01E4D0, 0x0004},
{0x01E4EC, 0x0010},
{0x01E4F0, 0x0002},
-{0x01E4FA, 0x0080},
+{0x01E4FA, 0x0001},
{0x01E7E0, 0x0004},
-{0x01E7E7, 0x0080},
+{0x01E7E7, 0x0001},
{0x01E7E8, 0x0004},
-{0x01E7EC, 0x0080},
+{0x01E7EC, 0x0001},
{0x01E7ED, 0x0004},
-{0x01E7EF, 0x0080},
+{0x01E7EF, 0x0001},
{0x01E7F0, 0x0004},
-{0x01E7FF, 0x0080},
+{0x01E7FF, 0x0001},
{0x01E800, 0x0004},
-{0x01E8C5, 0x0080},
+{0x01E8C5, 0x0001},
{0x01E8C7, 0x0002},
{0x01E8D0, 0x0010},
-{0x01E8D7, 0x0080},
+{0x01E8D7, 0x0001},
{0x01E900, 0x0004},
{0x01E944, 0x0010},
{0x01E94B, 0x0004},
-{0x01E94C, 0x0080},
+{0x01E94C, 0x0001},
{0x01E950, 0x0002},
-{0x01E95A, 0x0080},
+{0x01E95A, 0x0001},
{0x01E95E, 0x0020},
-{0x01E960, 0x0080},
+{0x01E960, 0x0001},
{0x01EC71, 0x0002},
{0x01ECAC, 0x0040},
{0x01ECAD, 0x0002},
{0x01ECB0, 0x0040},
{0x01ECB1, 0x0002},
-{0x01ECB5, 0x0080},
+{0x01ECB5, 0x0001},
{0x01ED01, 0x0002},
{0x01ED2E, 0x0040},
{0x01ED2F, 0x0002},
-{0x01ED3E, 0x0080},
+{0x01ED3E, 0x0001},
{0x01EE00, 0x0004},
-{0x01EE04, 0x0080},
+{0x01EE04, 0x0001},
{0x01EE05, 0x0004},
-{0x01EE20, 0x0080},
+{0x01EE20, 0x0001},
{0x01EE21, 0x0004},
-{0x01EE23, 0x0080},
+{0x01EE23, 0x0001},
{0x01EE24, 0x0004},
-{0x01EE25, 0x0080},
+{0x01EE25, 0x0001},
{0x01EE27, 0x0004},
-{0x01EE28, 0x0080},
+{0x01EE28, 0x0001},
{0x01EE29, 0x0004},
-{0x01EE33, 0x0080},
+{0x01EE33, 0x0001},
{0x01EE34, 0x0004},
-{0x01EE38, 0x0080},
+{0x01EE38, 0x0001},
{0x01EE39, 0x0004},
-{0x01EE3A, 0x0080},
+{0x01EE3A, 0x0001},
{0x01EE3B, 0x0004},
-{0x01EE3C, 0x0080},
+{0x01EE3C, 0x0001},
{0x01EE42, 0x0004},
-{0x01EE43, 0x0080},
+{0x01EE43, 0x0001},
{0x01EE47, 0x0004},
-{0x01EE48, 0x0080},
+{0x01EE48, 0x0001},
{0x01EE49, 0x0004},
-{0x01EE4A, 0x0080},
+{0x01EE4A, 0x0001},
{0x01EE4B, 0x0004},
-{0x01EE4C, 0x0080},
+{0x01EE4C, 0x0001},
{0x01EE4D, 0x0004},
-{0x01EE50, 0x0080},
+{0x01EE50, 0x0001},
{0x01EE51, 0x0004},
-{0x01EE53, 0x0080},
+{0x01EE53, 0x0001},
{0x01EE54, 0x0004},
-{0x01EE55, 0x0080},
+{0x01EE55, 0x0001},
{0x01EE57, 0x0004},
-{0x01EE58, 0x0080},
+{0x01EE58, 0x0001},
{0x01EE59, 0x0004},
-{0x01EE5A, 0x0080},
+{0x01EE5A, 0x0001},
{0x01EE5B, 0x0004},
-{0x01EE5C, 0x0080},
+{0x01EE5C, 0x0001},
{0x01EE5D, 0x0004},
-{0x01EE5E, 0x0080},
+{0x01EE5E, 0x0001},
{0x01EE5F, 0x0004},
-{0x01EE60, 0x0080},
+{0x01EE60, 0x0001},
{0x01EE61, 0x0004},
-{0x01EE63, 0x0080},
+{0x01EE63, 0x0001},
{0x01EE64, 0x0004},
-{0x01EE65, 0x0080},
+{0x01EE65, 0x0001},
{0x01EE67, 0x0004},
-{0x01EE6B, 0x0080},
+{0x01EE6B, 0x0001},
{0x01EE6C, 0x0004},
-{0x01EE73, 0x0080},
+{0x01EE73, 0x0001},
{0x01EE74, 0x0004},
-{0x01EE78, 0x0080},
+{0x01EE78, 0x0001},
{0x01EE79, 0x0004},
-{0x01EE7D, 0x0080},
+{0x01EE7D, 0x0001},
{0x01EE7E, 0x0004},
-{0x01EE7F, 0x0080},
+{0x01EE7F, 0x0001},
{0x01EE80, 0x0004},
-{0x01EE8A, 0x0080},
+{0x01EE8A, 0x0001},
{0x01EE8B, 0x0004},
-{0x01EE9C, 0x0080},
+{0x01EE9C, 0x0001},
{0x01EEA1, 0x0004},
-{0x01EEA4, 0x0080},
+{0x01EEA4, 0x0001},
{0x01EEA5, 0x0004},
-{0x01EEAA, 0x0080},
+{0x01EEAA, 0x0001},
{0x01EEAB, 0x0004},
-{0x01EEBC, 0x0080},
+{0x01EEBC, 0x0001},
{0x01EEF0, 0x0040},
-{0x01EEF2, 0x0080},
+{0x01EEF2, 0x0001},
{0x01F000, 0x0040},
-{0x01F02C, 0x0080},
+{0x01F02C, 0x0001},
{0x01F030, 0x0040},
-{0x01F094, 0x0080},
+{0x01F094, 0x0001},
{0x01F0A0, 0x0040},
-{0x01F0AF, 0x0080},
+{0x01F0AF, 0x0001},
{0x01F0B1, 0x0040},
-{0x01F0C0, 0x0080},
+{0x01F0C0, 0x0001},
{0x01F0C1, 0x0040},
-{0x01F0D0, 0x0080},
+{0x01F0D0, 0x0001},
{0x01F0D1, 0x0040},
-{0x01F0F6, 0x0080},
+{0x01F0F6, 0x0001},
{0x01F100, 0x0002},
{0x01F10D, 0x0040},
-{0x01F1AE, 0x0080},
+{0x01F1AE, 0x0001},
{0x01F1E6, 0x0040},
-{0x01F203, 0x0080},
+{0x01F203, 0x0001},
{0x01F210, 0x0040},
-{0x01F23C, 0x0080},
+{0x01F23C, 0x0001},
{0x01F240, 0x0040},
-{0x01F249, 0x0080},
+{0x01F249, 0x0001},
{0x01F250, 0x0040},
-{0x01F252, 0x0080},
+{0x01F252, 0x0001},
{0x01F260, 0x0040},
-{0x01F266, 0x0080},
+{0x01F266, 0x0001},
{0x01F300, 0x0040},
-{0x01F6D8, 0x0080},
+{0x01F6D8, 0x0001},
{0x01F6DC, 0x0040},
-{0x01F6ED, 0x0080},
+{0x01F6ED, 0x0001},
{0x01F6F0, 0x0040},
-{0x01F6FD, 0x0080},
+{0x01F6FD, 0x0001},
{0x01F700, 0x0040},
-{0x01F777, 0x0080},
+{0x01F777, 0x0001},
{0x01F77B, 0x0040},
-{0x01F7DA, 0x0080},
+{0x01F7DA, 0x0001},
{0x01F7E0, 0x0040},
-{0x01F7EC, 0x0080},
+{0x01F7EC, 0x0001},
{0x01F7F0, 0x0040},
-{0x01F7F1, 0x0080},
+{0x01F7F1, 0x0001},
{0x01F800, 0x0040},
-{0x01F80C, 0x0080},
+{0x01F80C, 0x0001},
{0x01F810, 0x0040},
-{0x01F848, 0x0080},
+{0x01F848, 0x0001},
{0x01F850, 0x0040},
-{0x01F85A, 0x0080},
+{0x01F85A, 0x0001},
{0x01F860, 0x0040},
-{0x01F888, 0x0080},
+{0x01F888, 0x0001},
{0x01F890, 0x0040},
-{0x01F8AE, 0x0080},
+{0x01F8AE, 0x0001},
{0x01F8B0, 0x0040},
-{0x01F8B2, 0x0080},
+{0x01F8B2, 0x0001},
{0x01F900, 0x0040},
-{0x01FA54, 0x0080},
+{0x01FA54, 0x0001},
{0x01FA60, 0x0040},
-{0x01FA6E, 0x0080},
+{0x01FA6E, 0x0001},
{0x01FA70, 0x0040},
-{0x01FA7D, 0x0080},
+{0x01FA7D, 0x0001},
{0x01FA80, 0x0040},
-{0x01FA89, 0x0080},
+{0x01FA89, 0x0001},
{0x01FA90, 0x0040},
-{0x01FABE, 0x0080},
+{0x01FABE, 0x0001},
{0x01FABF, 0x0040},
-{0x01FAC6, 0x0080},
+{0x01FAC6, 0x0001},
{0x01FACE, 0x0040},
-{0x01FADC, 0x0080},
+{0x01FADC, 0x0001},
{0x01FAE0, 0x0040},
-{0x01FAE9, 0x0080},
+{0x01FAE9, 0x0001},
{0x01FAF0, 0x0040},
-{0x01FAF9, 0x0080},
+{0x01FAF9, 0x0001},
{0x01FB00, 0x0040},
-{0x01FB93, 0x0080},
+{0x01FB93, 0x0001},
{0x01FB94, 0x0040},
-{0x01FBCB, 0x0080},
+{0x01FBCB, 0x0001},
{0x01FBF0, 0x0002},
-{0x01FBFA, 0x0080},
+{0x01FBFA, 0x0001},
{0x020000, 0x0004},
-{0x02A6E0, 0x0080},
+{0x02A6E0, 0x0001},
{0x02A700, 0x0004},
-{0x02B73A, 0x0080},
+{0x02B73A, 0x0001},
{0x02B740, 0x0004},
-{0x02B81E, 0x0080},
+{0x02B81E, 0x0001},
{0x02B820, 0x0004},
-{0x02CEA2, 0x0080},
+{0x02CEA2, 0x0001},
{0x02CEB0, 0x0004},
-{0x02EBE1, 0x0080},
+{0x02EBE1, 0x0001},
{0x02EBF0, 0x0004},
-{0x02EE5E, 0x0080},
+{0x02EE5E, 0x0001},
{0x02F800, 0x0004},
-{0x02FA1E, 0x0080},
+{0x02FA1E, 0x0001},
{0x030000, 0x0004},
-{0x03134B, 0x0080},
+{0x03134B, 0x0001},
{0x031350, 0x0004},
-{0x0323B0, 0x0080},
+{0x0323B0, 0x0001},
+{0x0E0001, 0x0080},
+{0x0E0002, 0x0001},
+{0x0E0020, 0x0080},
+{0x0E0080, 0x0001},
{0x0E0100, 0x0010},
-{0x0E01F0, 0x0080},
+{0x0E01F0, 0x0001},
+{0x0F0000, 0x0080},
+{0x0FFFFE, 0x0001},
+{0x100000, 0x0080},
+{0x10FFFE, 0x0001},
{0x110000, 0x0000},
};
const std::unordered_set<uint32_t> unicode_set_whitespace = {
-0x000009, 0x00000A, 0x00000B, 0x00000C, 0x00000D, 0x000020, 0x000085, 0x0000A0, 0x001680, 0x002000, 0x002001, 0x002002, 0x002003, 0x002004, 0x002005, 0x002006, 0x002007, 0x002008, 0x002009, 0x00200A, 0x002028, 0x002029, 0x00202F, 0x00205F, 0x003000
+0x000009,
+0x00000A,
+0x00000B,
+0x00000C,
+0x00000D,
+0x000020,
+0x000085,
+0x0000A0,
+0x001680,
+0x002000,
+0x002001,
+0x002002,
+0x002003,
+0x002004,
+0x002005,
+0x002006,
+0x002007,
+0x002008,
+0x002009,
+0x00200A,
+0x002028,
+0x002029,
+0x00202F,
+0x00205F,
+0x003000,
};
const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {
{0x002C2C, 0x002C5C},
{0x002C2D, 0x002C5D},
{0x002C2E, 0x002C5E},
+{0x002C2F, 0x002C5F},
{0x002C60, 0x002C61},
{0x002C62, 0x00026B},
{0x002C63, 0x001D7D},
{0x00A7BA, 0x00A7BB},
{0x00A7BC, 0x00A7BD},
{0x00A7BE, 0x00A7BF},
+{0x00A7C0, 0x00A7C1},
{0x00A7C2, 0x00A7C3},
{0x00A7C4, 0x00A794},
{0x00A7C5, 0x000282},
{0x00A7C6, 0x001D8E},
{0x00A7C7, 0x00A7C8},
{0x00A7C9, 0x00A7CA},
+{0x00A7D0, 0x00A7D1},
+{0x00A7D6, 0x00A7D7},
+{0x00A7D8, 0x00A7D9},
{0x00A7F5, 0x00A7F6},
{0x00FF21, 0x00FF41},
{0x00FF22, 0x00FF42},
{0x0104D1, 0x0104F9},
{0x0104D2, 0x0104FA},
{0x0104D3, 0x0104FB},
+{0x010570, 0x010597},
+{0x010571, 0x010598},
+{0x010572, 0x010599},
+{0x010573, 0x01059A},
+{0x010574, 0x01059B},
+{0x010575, 0x01059C},
+{0x010576, 0x01059D},
+{0x010577, 0x01059E},
+{0x010578, 0x01059F},
+{0x010579, 0x0105A0},
+{0x01057A, 0x0105A1},
+{0x01057C, 0x0105A3},
+{0x01057D, 0x0105A4},
+{0x01057E, 0x0105A5},
+{0x01057F, 0x0105A6},
+{0x010580, 0x0105A7},
+{0x010581, 0x0105A8},
+{0x010582, 0x0105A9},
+{0x010583, 0x0105AA},
+{0x010584, 0x0105AB},
+{0x010585, 0x0105AC},
+{0x010586, 0x0105AD},
+{0x010587, 0x0105AE},
+{0x010588, 0x0105AF},
+{0x010589, 0x0105B0},
+{0x01058A, 0x0105B1},
+{0x01058C, 0x0105B3},
+{0x01058D, 0x0105B4},
+{0x01058E, 0x0105B5},
+{0x01058F, 0x0105B6},
+{0x010590, 0x0105B7},
+{0x010591, 0x0105B8},
+{0x010592, 0x0105B9},
+{0x010594, 0x0105BB},
+{0x010595, 0x0105BC},
{0x010C80, 0x010CC0},
{0x010C81, 0x010CC1},
{0x010C82, 0x010CC2},
{0x000079, 0x000059},
{0x00007A, 0x00005A},
{0x0000B5, 0x00039C},
-{0x0000DF, 0x000053},
{0x0000E0, 0x0000C0},
{0x0000E1, 0x0000C1},
{0x0000E2, 0x0000C2},
{0x000144, 0x000143},
{0x000146, 0x000145},
{0x000148, 0x000147},
-{0x000149, 0x0002BC},
{0x00014B, 0x00014A},
{0x00014D, 0x00014C},
{0x00014F, 0x00014E},
{0x0001EB, 0x0001EA},
{0x0001ED, 0x0001EC},
{0x0001EF, 0x0001EE},
-{0x0001F0, 0x00004A},
{0x0001F2, 0x0001F1},
{0x0001F3, 0x0001F1},
{0x0001F5, 0x0001F4},
{0x00037B, 0x0003FD},
{0x00037C, 0x0003FE},
{0x00037D, 0x0003FF},
-{0x000390, 0x000399},
{0x0003AC, 0x000386},
{0x0003AD, 0x000388},
{0x0003AE, 0x000389},
{0x0003AF, 0x00038A},
-{0x0003B0, 0x0003A5},
{0x0003B1, 0x000391},
{0x0003B2, 0x000392},
{0x0003B3, 0x000393},
{0x000584, 0x000554},
{0x000585, 0x000555},
{0x000586, 0x000556},
-{0x000587, 0x000535},
{0x0010D0, 0x001C90},
{0x0010D1, 0x001C91},
{0x0010D2, 0x001C92},
{0x001E91, 0x001E90},
{0x001E93, 0x001E92},
{0x001E95, 0x001E94},
-{0x001E96, 0x000048},
-{0x001E97, 0x000054},
-{0x001E98, 0x000057},
-{0x001E99, 0x000059},
-{0x001E9A, 0x000041},
{0x001E9B, 0x001E60},
{0x001EA1, 0x001EA0},
{0x001EA3, 0x001EA2},
{0x001F43, 0x001F4B},
{0x001F44, 0x001F4C},
{0x001F45, 0x001F4D},
-{0x001F50, 0x0003A5},
{0x001F51, 0x001F59},
-{0x001F52, 0x0003A5},
{0x001F53, 0x001F5B},
-{0x001F54, 0x0003A5},
{0x001F55, 0x001F5D},
-{0x001F56, 0x0003A5},
{0x001F57, 0x001F5F},
{0x001F60, 0x001F68},
{0x001F61, 0x001F69},
{0x001F7B, 0x001FEB},
{0x001F7C, 0x001FFA},
{0x001F7D, 0x001FFB},
-{0x001F80, 0x001F08},
-{0x001F81, 0x001F09},
-{0x001F82, 0x001F0A},
-{0x001F83, 0x001F0B},
-{0x001F84, 0x001F0C},
-{0x001F85, 0x001F0D},
-{0x001F86, 0x001F0E},
-{0x001F87, 0x001F0F},
-{0x001F88, 0x001F08},
-{0x001F89, 0x001F09},
-{0x001F8A, 0x001F0A},
-{0x001F8B, 0x001F0B},
-{0x001F8C, 0x001F0C},
-{0x001F8D, 0x001F0D},
-{0x001F8E, 0x001F0E},
-{0x001F8F, 0x001F0F},
-{0x001F90, 0x001F28},
-{0x001F91, 0x001F29},
-{0x001F92, 0x001F2A},
-{0x001F93, 0x001F2B},
-{0x001F94, 0x001F2C},
-{0x001F95, 0x001F2D},
-{0x001F96, 0x001F2E},
-{0x001F97, 0x001F2F},
-{0x001F98, 0x001F28},
-{0x001F99, 0x001F29},
-{0x001F9A, 0x001F2A},
-{0x001F9B, 0x001F2B},
-{0x001F9C, 0x001F2C},
-{0x001F9D, 0x001F2D},
-{0x001F9E, 0x001F2E},
-{0x001F9F, 0x001F2F},
-{0x001FA0, 0x001F68},
-{0x001FA1, 0x001F69},
-{0x001FA2, 0x001F6A},
-{0x001FA3, 0x001F6B},
-{0x001FA4, 0x001F6C},
-{0x001FA5, 0x001F6D},
-{0x001FA6, 0x001F6E},
-{0x001FA7, 0x001F6F},
-{0x001FA8, 0x001F68},
-{0x001FA9, 0x001F69},
-{0x001FAA, 0x001F6A},
-{0x001FAB, 0x001F6B},
-{0x001FAC, 0x001F6C},
-{0x001FAD, 0x001F6D},
-{0x001FAE, 0x001F6E},
-{0x001FAF, 0x001F6F},
+{0x001F80, 0x001F88},
+{0x001F81, 0x001F89},
+{0x001F82, 0x001F8A},
+{0x001F83, 0x001F8B},
+{0x001F84, 0x001F8C},
+{0x001F85, 0x001F8D},
+{0x001F86, 0x001F8E},
+{0x001F87, 0x001F8F},
+{0x001F90, 0x001F98},
+{0x001F91, 0x001F99},
+{0x001F92, 0x001F9A},
+{0x001F93, 0x001F9B},
+{0x001F94, 0x001F9C},
+{0x001F95, 0x001F9D},
+{0x001F96, 0x001F9E},
+{0x001F97, 0x001F9F},
+{0x001FA0, 0x001FA8},
+{0x001FA1, 0x001FA9},
+{0x001FA2, 0x001FAA},
+{0x001FA3, 0x001FAB},
+{0x001FA4, 0x001FAC},
+{0x001FA5, 0x001FAD},
+{0x001FA6, 0x001FAE},
+{0x001FA7, 0x001FAF},
{0x001FB0, 0x001FB8},
{0x001FB1, 0x001FB9},
-{0x001FB2, 0x001FBA},
-{0x001FB3, 0x000391},
-{0x001FB4, 0x000386},
-{0x001FB6, 0x000391},
-{0x001FB7, 0x000391},
-{0x001FBC, 0x000391},
+{0x001FB3, 0x001FBC},
{0x001FBE, 0x000399},
-{0x001FC2, 0x001FCA},
-{0x001FC3, 0x000397},
-{0x001FC4, 0x000389},
-{0x001FC6, 0x000397},
-{0x001FC7, 0x000397},
-{0x001FCC, 0x000397},
+{0x001FC3, 0x001FCC},
{0x001FD0, 0x001FD8},
{0x001FD1, 0x001FD9},
-{0x001FD2, 0x000399},
-{0x001FD3, 0x000399},
-{0x001FD6, 0x000399},
-{0x001FD7, 0x000399},
{0x001FE0, 0x001FE8},
{0x001FE1, 0x001FE9},
-{0x001FE2, 0x0003A5},
-{0x001FE3, 0x0003A5},
-{0x001FE4, 0x0003A1},
{0x001FE5, 0x001FEC},
-{0x001FE6, 0x0003A5},
-{0x001FE7, 0x0003A5},
-{0x001FF2, 0x001FFA},
-{0x001FF3, 0x0003A9},
-{0x001FF4, 0x00038F},
-{0x001FF6, 0x0003A9},
-{0x001FF7, 0x0003A9},
-{0x001FFC, 0x0003A9},
+{0x001FF3, 0x001FFC},
{0x00214E, 0x002132},
{0x002170, 0x002160},
{0x002171, 0x002161},
{0x002C5C, 0x002C2C},
{0x002C5D, 0x002C2D},
{0x002C5E, 0x002C2E},
+{0x002C5F, 0x002C2F},
{0x002C61, 0x002C60},
{0x002C65, 0x00023A},
{0x002C66, 0x00023E},
{0x00A7BB, 0x00A7BA},
{0x00A7BD, 0x00A7BC},
{0x00A7BF, 0x00A7BE},
+{0x00A7C1, 0x00A7C0},
{0x00A7C3, 0x00A7C2},
{0x00A7C8, 0x00A7C7},
{0x00A7CA, 0x00A7C9},
+{0x00A7D1, 0x00A7D0},
+{0x00A7D7, 0x00A7D6},
+{0x00A7D9, 0x00A7D8},
{0x00A7F6, 0x00A7F5},
{0x00AB53, 0x00A7B3},
{0x00AB70, 0x0013A0},
{0x00ABBD, 0x0013ED},
{0x00ABBE, 0x0013EE},
{0x00ABBF, 0x0013EF},
-{0x00FB00, 0x000046},
-{0x00FB01, 0x000046},
-{0x00FB02, 0x000046},
-{0x00FB03, 0x000046},
-{0x00FB04, 0x000046},
-{0x00FB05, 0x000053},
-{0x00FB06, 0x000053},
-{0x00FB13, 0x000544},
-{0x00FB14, 0x000544},
-{0x00FB15, 0x000544},
-{0x00FB16, 0x00054E},
-{0x00FB17, 0x000544},
{0x00FF41, 0x00FF21},
{0x00FF42, 0x00FF22},
{0x00FF43, 0x00FF23},
{0x0104F9, 0x0104D1},
{0x0104FA, 0x0104D2},
{0x0104FB, 0x0104D3},
+{0x010597, 0x010570},
+{0x010598, 0x010571},
+{0x010599, 0x010572},
+{0x01059A, 0x010573},
+{0x01059B, 0x010574},
+{0x01059C, 0x010575},
+{0x01059D, 0x010576},
+{0x01059E, 0x010577},
+{0x01059F, 0x010578},
+{0x0105A0, 0x010579},
+{0x0105A1, 0x01057A},
+{0x0105A3, 0x01057C},
+{0x0105A4, 0x01057D},
+{0x0105A5, 0x01057E},
+{0x0105A6, 0x01057F},
+{0x0105A7, 0x010580},
+{0x0105A8, 0x010581},
+{0x0105A9, 0x010582},
+{0x0105AA, 0x010583},
+{0x0105AB, 0x010584},
+{0x0105AC, 0x010585},
+{0x0105AD, 0x010586},
+{0x0105AE, 0x010587},
+{0x0105AF, 0x010588},
+{0x0105B0, 0x010589},
+{0x0105B1, 0x01058A},
+{0x0105B3, 0x01058C},
+{0x0105B4, 0x01058D},
+{0x0105B5, 0x01058E},
+{0x0105B6, 0x01058F},
+{0x0105B7, 0x010590},
+{0x0105B8, 0x010591},
+{0x0105B9, 0x010592},
+{0x0105BB, 0x010594},
+{0x0105BC, 0x010595},
{0x010CC0, 0x010C80},
{0x010CC1, 0x010C81},
{0x010CC2, 0x010C82},
assert(offset_end <= cpts.size());
start = offset_end;
+ static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
- return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : 0;
+ return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
};
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
}
// regex: \s+(?!\S)
- if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != 0) {
+ if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != OUT_OF_RANGE) {
pos += num_whitespaces - 1;
_add_token(pos);
continue;
assert(offset_end <= cpts.size());
start = offset_end;
+ static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
- return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : 0;
+ return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
};
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
}
// regex: \s+(?!\S)
- if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != 0) {
+ if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != OUT_OF_RANGE) {
pos += num_whitespaces - 1;
_add_token(pos);
continue;
continue;
}
- const int cpt_flag = unicode_cpt_flags(cpts[i]).category_flag();
+ const auto flags = unicode_cpt_flags(cpts[i]);
- if (k_ucat_cpt.find(cpt_flag) != k_ucat_cpt.end()) {
- text_collapsed[i] = k_ucat_cpt.at(cpt_flag);
+ if (flags.is_whitespace) {
+ //NOTE: C++ std::regex \s does not mach 0x85, Rust and Python regex does.
+ //text_collapsed[i] = (char) 0x85; // <Next Line> as whitespace fallback
+ text_collapsed[i] = (char) 0x0B; // <vertical tab> as whitespace fallback
+ } else if (k_ucat_cpt.find(flags.category_flag()) != k_ucat_cpt.end()) {
+ text_collapsed[i] = k_ucat_cpt.at(flags.category_flag());
} else {
text_collapsed[i] = (char) 0xD0; // fallback
}
bpe_offsets = unicode_regex_split_stl(text_collapsed, regex_expr_collapsed, bpe_offsets);
} else {
// no unicode category used, we can use std::wregex directly
- const std::wstring wtext = unicode_wstring_from_utf8(text);
const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr);
+ // std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback
+ std::wstring wtext(cpts.begin(), cpts.end());
+ for (size_t i = 0; i < wtext.size(); ++i) {
+ if (wtext[i] > 0x7F && unicode_cpt_flags(wtext[i]).is_whitespace) {
+ wtext[i] = 0x0B;
+ }
+ }
+
//printf("text: %s\n", text.c_str());
//printf("regex_expr: %s\n", regex_expr.c_str());
bpe_offsets = unicode_regex_split_stl(wtext, wregex_expr, bpe_offsets);