]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
llama : use Unicode Escape Sequence to replace encoded characters (#2814)
authorTim Miller <redacted>
Sat, 26 Aug 2023 18:27:07 +0000 (03:27 +0900)
committerGitHub <redacted>
Sat, 26 Aug 2023 18:27:07 +0000 (21:27 +0300)
The use of special characters within source files can break compiling on some computers with different region and language settings. Using Unicode escape sequences should allow for the code to be compiled on all setups without needing to change your computers settings or switch regions.

llama.cpp

index 62889b3edc37f1fc503634c943a635728e54af57..05c54c2135aa031326b48809850cc13e6a3d2b37 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -955,10 +955,10 @@ struct llama_vocab {
     id linefeed_id = 13;
 
     int find_bpe_rank(std::string token_left, std::string token_right) const {
-        replace_all(token_left,  " ",  "Ġ");
-        replace_all(token_left,  "\n", "Ċ");
-        replace_all(token_right, " ",  "Ġ");
-        replace_all(token_right, "\n", "Ċ");
+        replace_all(token_left,  " ",  "\u0120");
+        replace_all(token_left,  "\n", "\u010A");
+        replace_all(token_right, " ",  "\u0120");
+        replace_all(token_right, "\n", "\u010A");
 
         auto it = bpe_ranks.find(std::make_pair(token_left, token_right));
         if (it == bpe_ranks.end()) {