fix perplexity after c-api refactor (#390)

author Erik Scholz <redacted>

Wed, 22 Mar 2023 16:09:38 +0000 (17:09 +0100)

committer GitHub <redacted>

Wed, 22 Mar 2023 16:09:38 +0000 (18:09 +0200)
author Erik Scholz <redacted>
Wed, 22 Mar 2023 16:09:38 +0000 (17:09 +0100)
committer GitHub <redacted>
Wed, 22 Mar 2023 16:09:38 +0000 (18:09 +0200)
diff --git a/main.cpp b/main.cpp

index c164c102dbf4127d3568cf8637a4cc6179fbe952..fbb43a8cca15b1170e22a50e9c219f42ad0cf492 100644 (file)
--- a/main.cpp
+++ b/main.cpp
@@ -85,7 +85,7 @@ void perplexity(llama_context * ctx, const gpt_params & params) {
      // Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
      // Run `./main --perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
      // Output: `perplexity: 13.5106 [114/114]`
-    auto tokens = ::llama_tokenize(ctx, params.prompt.c_str(), true);
+    auto tokens = ::llama_tokenize(ctx, params.prompt, true);
  
      int count = 0;
      double nll = 0.0;
diff --git a/utils.cpp b/utils.cpp

index 1679ae10a3af7907c409a526b8accd47e892a267..3909c974f1e1fe3b4e20ce8a4c2b0ac58c0a8fe2 100644 (file)
--- a/utils.cpp
+++ b/utils.cpp
@@ -146,8 +146,10 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
  
  // TODO: not great allocating this every time
  std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
-    std::vector<llama_token> res(8096);
+    // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars
+    std::vector<llama_token> res(text.size() + (int)add_bos);
      int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
+    assert(n >= 0);
      res.resize(n);
  
      return res;
author	Erik Scholz <redacted>
	Wed, 22 Mar 2023 16:09:38 +0000 (17:09 +0100)
committer	GitHub <redacted>
	Wed, 22 Mar 2023 16:09:38 +0000 (18:09 +0200)
main.cpp		patch \| blob \| history
utils.cpp		patch \| blob \| history