llama : speedup tokenization (#2831)

author Kawrakow <redacted>

Sun, 27 Aug 2023 13:50:33 +0000 (16:50 +0300)

committer GitHub <redacted>

Sun, 27 Aug 2023 13:50:33 +0000 (16:50 +0300)
author Kawrakow <redacted>
Sun, 27 Aug 2023 13:50:33 +0000 (16:50 +0300)
committer GitHub <redacted>
Sun, 27 Aug 2023 13:50:33 +0000 (16:50 +0300)
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp

index b596d062613d7cd66c57053f9c1524f3b2cd8baf..ebafa0c29f54003e539a8778450bd47f2588ffb9 100644 (file)
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -190,10 +190,14 @@ void perplexity(llama_context * ctx, const gpt_params & params) {
      const bool is_spm = llama_vocab_type(ctx) == LLAMA_VOCAB_TYPE_SPM;
      const bool add_bos = is_spm;
  
+    auto tim1 = std::chrono::high_resolution_clock::now();
      fprintf(stderr, "%s: tokenizing the input ..\n", __func__);
  
      auto tokens = ::llama_tokenize(ctx, params.prompt, add_bos);
  
+    auto tim2 = std::chrono::high_resolution_clock::now();
+    fprintf(stderr, "%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
+
      const int n_chunk_max = tokens.size() / params.n_ctx;
  
      const int n_chunk = params.n_chunks < 0 ? n_chunk_max : std::min(params.n_chunks, n_chunk_max);
diff --git a/llama.cpp b/llama.cpp

index 0d12d9cca031f4b5b0bb15d9006e78a808781b0d..0bb8fcd6eaa0ac79e75d40c2a46e863c64f81b21 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -114,12 +114,17 @@ static size_t utf8_len(char src) {
  }
  
  void replace_all(std::string & s, const std::string & search, const std::string & replace) {
-    for (size_t pos = 0; ; pos += replace.length()) {
-        pos = s.find(search, pos);
-        if (pos == std::string::npos) break;
-        s.erase(pos, search.length());
-        s.insert(pos, replace);
+    std::string result;
+    for (size_t pos = 0; ; pos += search.length()) {
+        auto new_pos = s.find(search, pos);
+        if (new_pos == std::string::npos) {
+            result += s.substr(pos, s.size() - pos);
+            break;
+        }
+        result += s.substr(pos, new_pos - pos) + replace;
+        pos = new_pos;
      }
+    s = std::move(result);
  }
  
  static void zeros(std::ofstream & file, size_t n) {
author	Kawrakow <redacted>
	Sun, 27 Aug 2023 13:50:33 +0000 (16:50 +0300)
committer	GitHub <redacted>
	Sun, 27 Aug 2023 13:50:33 +0000 (16:50 +0300)
examples/perplexity/perplexity.cpp		patch \| blob \| history
llama.cpp		patch \| blob \| history