lookup: fibonacci hashing, fix crashes (#8548)

author Johannes Gäßler <redacted>

Wed, 17 Jul 2024 21:35:44 +0000 (23:35 +0200)

committer GitHub <redacted>

Wed, 17 Jul 2024 21:35:44 +0000 (23:35 +0200)
author Johannes Gäßler <redacted>
Wed, 17 Jul 2024 21:35:44 +0000 (23:35 +0200)
committer GitHub <redacted>
Wed, 17 Jul 2024 21:35:44 +0000 (23:35 +0200)
diff --git a/common/ngram-cache.h b/common/ngram-cache.h

index e4fa4cbd12f11eea481a3d22dd975f64f5c7a6f2..ab4c9b37665460281683a6bb09bcddd444eb3634 100644 (file)
--- a/common/ngram-cache.h
+++ b/common/ngram-cache.h
@@ -37,11 +37,18 @@ struct llama_ngram {
      }
  };
  
+struct llama_token_hash_function {
+    size_t operator()(const llama_token token) const {
+        // see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
+        return token * 11400714819323198485llu;
+    }
+};
+
  struct llama_ngram_hash_function {
      size_t operator()(const llama_ngram & ngram) const {
-        size_t hash = 0;
-        for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
-            hash ^= std::hash<llama_token>{}(ngram.tokens[i]);
+        size_t hash = llama_token_hash_function{}(ngram.tokens[0]);
+        for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
+            hash ^= llama_token_hash_function{}(ngram.tokens[i]);
          }
          return hash;
      }
diff --git a/examples/lookup/lookup-stats.cpp b/examples/lookup/lookup-stats.cpp

index 0b171c87273d19d58e8197040b6443ef71f66ed2..2fe67100e6c03b7be94be899cfef868c6bbd60c2 100644 (file)
--- a/examples/lookup/lookup-stats.cpp
+++ b/examples/lookup/lookup-stats.cpp
@@ -31,7 +31,6 @@ int main(int argc, char ** argv){
  
      // load the model
      std::tie(model, ctx) = llama_init_from_gpt_params(params);
-    GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
  
      // tokenize the prompt
      std::vector<llama_token> inp;
@@ -65,7 +64,7 @@ int main(int argc, char ** argv){
      }
  
      const int n_input = inp.size();
-    const int n_ctx = params.n_ctx;
+    const int n_ctx = llama_n_ctx(ctx);
  
      int n_drafted = 0;
      int n_accept  = 0;
diff --git a/examples/lookup/lookup.cpp b/examples/lookup/lookup.cpp

index 80ecd925d5962766ba80d1a1c4679ed43fa8c79e..bb571bac4d7781ff87428bc8c52abd673c479b04 100644 (file)
--- a/examples/lookup/lookup.cpp
+++ b/examples/lookup/lookup.cpp
@@ -39,7 +39,6 @@ int main(int argc, char ** argv){
  
      // load the model
      std::tie(model, ctx) = llama_init_from_gpt_params(params);
-    GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
  
      // tokenize the prompt
      std::vector<llama_token> inp;
author	Johannes Gäßler <redacted>
	Wed, 17 Jul 2024 21:35:44 +0000 (23:35 +0200)
committer	GitHub <redacted>
	Wed, 17 Jul 2024 21:35:44 +0000 (23:35 +0200)
common/ngram-cache.h		patch \| blob \| history
examples/lookup/lookup-stats.cpp		patch \| blob \| history
examples/lookup/lookup.cpp		patch \| blob \| history